Example #1
0
    def get_value(self, plan, penalty_coeff=1e0):
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan)
        model.update()
        self._bexpr_to_pred = {}

        obj_bexprs = self._get_trajopt_obj(plan)
        self._add_obj_bexprs(obj_bexprs)
        self._add_all_timesteps_of_actions(plan,
                                           priority=1,
                                           add_nonlin=True,
                                           verbose=False)
        return self._prob.get_value(penalty_coeff)
Example #2
0
    def _solve_opt_prob(self, plan, priority, callback=None, active_ts=None,
                        verbose=False, resample=True):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts==None:
            active_ts = (0, plan.horizon-1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose)
            tol = 1e-2
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon-1)

            obj_bexprs = []

            if resample:
                failed_preds = plan.get_failed_preds()
                ## this is an objective that places
                ## a high value on matching the resampled values
                obj_bexprs.extend(self._resample(plan, failed_preds))

            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)
            self._add_first_and_last_timesteps_of_actions(
                plan, priority=-1, add_nonlin=True, verbose=verbose)
            self._add_all_timesteps_of_actions(
                plan, priority=0, add_nonlin=False, verbose=verbose)
            tol = 1e-2
        elif priority == 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose)
            tol=1e-4

        solv = Solver()
        solv.initial_penalty_coeff = self.init_penalty_coeff
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose)
        self._update_ll_params()
        self._failed_groups = self._prob.nonconverged_groups
        return success
Example #3
0
    def get_value(self, plan, penalty_coeff=1e0):
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan)
        model.update()
        self._bexpr_to_pred = {}

        obj_bexprs = self._get_trajopt_obj(plan)
        self._add_obj_bexprs(obj_bexprs)
        self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=False)
        return self._prob.get_value(penalty_coeff)
Example #4
0
    def monitor_update(self, plan, update_values, callback=None, n_resamples=5, active_ts=None, verbose=False):
        print 'Resolving after environment update...\n'
        if callback is not None: viewer = callback()
        if active_ts==None:
            active_ts = (0, plan.horizon-1)

        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # _free_attrs is paied attentioned in here
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()
        self._bexpr_to_pred = {}
        tol = 1e-3
        obj_bexprs = []
        rs_obj = self._update(plan, update_values)
        obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

        self._add_all_timesteps_of_actions(plan, priority=MAX_PRIORITY,
            add_nonlin=True, active_ts= active_ts, verbose=verbose)
        obj_bexprs.extend(rs_obj)
        self._add_obj_bexprs(obj_bexprs)
        initial_trust_region_size = 1e-2

        solv = Solver()
        solv.initial_trust_region_size = initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose)
        self._update_ll_params()

        if DEBUG: assert not plan.has_nan(active_ts)

        plan.restore_free_attrs()

        self.reset_variable()
        print "monitor_update\n"
        return success
Example #5
0
class NAMOSolver(LLSolver):

    def __init__(self, early_converge=True, transfer_norm='min-vel'):
        self.transfer_coeff = 1e1
        self.rs_coeff = 1e6
        self.init_penalty_coeff = 1e2
        self.child_solver = None
        self._param_to_ll = {}
        self._failed_groups = []

        self.early_converge=early_converge
        self.transfer_norm = transfer_norm

    def backtrack_solve(self, plan, callback=None, verbose=False):
        plan.save_free_attrs()
        success = self._backtrack_solve(plan, callback, anum=0, verbose=verbose)
        plan.restore_free_attrs()
        return success


    def _backtrack_solve(self, plan, callback=None, anum=0, verbose=False):
        if anum > len(plan.actions) - 1:
            return True
        a = plan.actions[anum]
        active_ts = a.active_timesteps
        inits = {}
        if a.name == 'moveto':
            ## find possible values for the final pose
            rs_param = a.params[2]
        elif a.name == 'movetoholding':
            ## find possible values for the final pose
            rs_param = a.params[2]
        elif a.name == 'grasp':
            ## sample the grasp/grasp_pose
            rs_param = a.params[4]
        elif a.name == 'putdown':
            ## sample the end pose
            rs_param = a.params[4]
        else:
            raise NotImplemented

        def recursive_solve():
            ## don't optimize over any params that are already set
            old_params_free = {}
            for p in plan.params.itervalues():
                if p.is_symbol():
                    if p not in a.params: continue
                    old_params_free[p] = p._free_attrs['value'].copy()
                    p._free_attrs['value'][:] = 0
                else:
                    old_params_free[p] = p._free_attrs['pose'][:, active_ts[1]].copy()
                    p._free_attrs['pose'][:, active_ts[1]] = 0
            self.child_solver = NAMOSolver()
            if self.child_solver._backtrack_solve(plan, callback=callback, anum=anum+1, verbose=verbose):
                return True
            ## reset free_attrs
            for p in a.params:
                if p.is_symbol():
                    if p not in a.params: continue
                    p._free_attrs['value'] = old_params_free[p]
                else:
                    p._free_attrs['pose'][:, active_ts[1]] = old_params_free[p]
            return False
        if not np.all(rs_param._free_attrs['value']):
            ## this parameter is fixed
            if callback is not None:
                callback_a = lambda: callback(a)
            else:
                callback_a = None
            self.child_solver = NAMOSolver()
            success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0,
                                              active_ts = active_ts, verbose=verbose, force_init=True)

            if not success:
                ## if planning fails we're done
                return False
            ## no other options, so just return here
            return recursive_solve()

        ## so that this won't be optimized over
        rs_free = rs_param._free_attrs['value'].copy()
        rs_param._free_attrs['value'][:] = 0

        targets = plan.get_param('InContact', 2, {1:rs_param}, negated=False)
        if len(targets) > 1:
            import pdb; pdb.set_trace()

        if callback is not None:
            callback_a = lambda: callback(a)
        else:
            callback_a = None

        robot_poses = []

        if len(targets) == 0 or np.all(targets[0]._free_attrs['value']):
            ## sample 4 possible poses
            coords = list(itertools.product(range(WIDTH), range(HEIGHT)))
            random.shuffle(coords)
            robot_poses = [np.array(x)[:, None] for x in coords[:4]]
        elif np.any(targets[0]._free_attrs['value']):
            ## there shouldn't be only some free_attrs set
            raise NotImplementedError
        else:
            grasp_dirs = [np.array([0, -1]),
                          np.array([1, 0]),
                          np.array([0, 1]),
                          np.array([-1, 0])]
            grasp_len = plan.params['pr2'].geom.radius + targets[0].geom.radius - dsafe
            for g_dir in grasp_dirs:
                grasp = (g_dir*grasp_len).reshape((2, 1))
                robot_poses.append(targets[0].value + grasp)

        for rp in robot_poses:
            rs_param.value = rp
            success = False
            self.child_solver = NAMOSolver()
            success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0,
                                              active_ts = active_ts, verbose=verbose,
                                              force_init=True)
            if success:
                if recursive_solve():
                    break
                else:
                    success = False
        rs_param._free_attrs['value'] = rs_free
        return success

    def solve(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False, force_init=False):
        success = False

        if force_init or not plan.initialized:
             ## solve at priority -1 to get an initial value for the parameters
            self._solve_opt_prob(plan, priority=-1, callback=callback, active_ts=active_ts, verbose=verbose)
            plan.initialized=True
        success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose)
        success = plan.satisfied(active_ts)
        if success:
            return success


        for _ in range(n_resamples):
        ## refinement loop
            ## priority 0 resamples the first failed predicate in the plan
            ## and then solves a transfer optimization that only includes linear constraints
            self._solve_opt_prob(plan, priority=0, callback=callback, active_ts=active_ts, verbose=verbose)
            success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose)
            success = plan.satisfied(active_ts)
            if success:
                return success
        return success

    # @profile
    def _solve_opt_prob(self, plan, priority, callback=None, active_ts=None,
                        verbose=False, resample=True):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts==None:
            active_ts = (0, plan.horizon-1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose)
            tol = 1e-2
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon-1)

            obj_bexprs = []

            if resample:
                failed_preds = plan.get_failed_preds()
                ## this is an objective that places
                ## a high value on matching the resampled values
                obj_bexprs.extend(self._resample(plan, failed_preds))

            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)
            self._add_first_and_last_timesteps_of_actions(
                plan, priority=-1, add_nonlin=True, verbose=verbose)
            self._add_all_timesteps_of_actions(
                plan, priority=0, add_nonlin=False, verbose=verbose)
            tol = 1e-2
        elif priority == 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose)
            tol=1e-4

        solv = Solver()
        solv.initial_penalty_coeff = self.init_penalty_coeff
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose)
        self._update_ll_params()
        self._failed_groups = self._prob.nonconverged_groups
        return success

    def get_value(self, plan, penalty_coeff=1e0):
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan)
        model.update()
        self._bexpr_to_pred = {}

        obj_bexprs = self._get_trajopt_obj(plan)
        self._add_obj_bexprs(obj_bexprs)
        self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=False)
        return self._prob.get_value(penalty_coeff)



    def _get_transfer_obj(self, plan, norm):
        transfer_objs = []
        if norm in ['min-vel', 'l2']:
            for param in plan.params.values():
                # if param._type in ['Robot', 'Can']:
                K = 2
                if param.is_symbol():
                    T = 1
                    pose = param.value
                else:
                    T = plan.horizon
                    pose = param.pose
                assert (K, T) == pose.shape
                KT = K*T
                if norm == 'min-vel' and not param.is_symbol():
                    v = -1 * np.ones((KT - K, 1))
                    d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                    # [:,0] allows numpy to see v and d as one-dimensional so
                    # that numpy will create a diagonal matrix with v and d as a diagonal
                    P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                    Q = np.dot(np.transpose(P), P)
                else: ## l2
                    Q = np.eye(KT)
                cur_pose = pose.reshape((KT, 1), order='F')
                A = -2*cur_pose.T.dot(Q)
                b = cur_pose.T.dot(Q.dot(cur_pose))
                # QuadExpr is 0.5*x^Tx + Ax + b
                quad_expr = QuadExpr(2*self.transfer_coeff*Q,
                                     self.transfer_coeff*A, self.transfer_coeff*b)
                ll_param = self._param_to_ll[param]
                if param.is_symbol():
                    ll_grb_vars = ll_param.value.reshape((KT, 1), order='F')
                else:
                    ll_grb_vars = ll_param.pose.reshape((KT, 1), order='F')
                bexpr = BoundExpr(quad_expr, Variable(ll_grb_vars, cur_pose))
                transfer_objs.append(bexpr)
        elif norm == 'straightline':
            return self._get_trajopt_obj(plan)
        else:
            raise NotImplementedError
        return transfer_objs

    def _resample(self, plan, preds):
        val, attr_inds = None, None
        for negated, pred, t in preds:
            ## returns a vector of new values and an
            ## attr_inds (OrderedDict) that gives the mapping
            ## to parameter attributes
            if (self.early_converge and self._failed_groups != ['all'] and self._failed_groups != []
                and not np.any([param.name in self._failed_groups for param in pred.params])):
                ## using early converge and one group didn't converge
                ## but no parameter from this pred in that group
                continue

            val, attr_inds = pred.resample(negated, t, plan)
            ## no resample defined for that pred
            if val is not None: break
        if val is None:
            # import pdb; pdb.set_trace()
            return []

        t_local = t
        bexprs = []
        i = 0
        for p in attr_inds:
                ## get the ll_param for p and gurobi variables
            ll_p = self._param_to_ll[p]
            if p.is_symbol(): t_local = 0
            n_vals = 0
            grb_vars = []
            for attr, ind_arr in attr_inds[p]:
                n_vals += len(ind_arr)
                grb_vars.extend(
                    list(getattr(ll_p, attr)[ind_arr, t_local]))

            for j, grb_var in enumerate(grb_vars):
                ## create an objective saying stay close to this value
                ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2
                Q = np.eye(1)
                A = -2*val[i+j]*np.ones((1, 1))
                b = np.ones((1, 1))*np.power(val[i+j], 2)
                # QuadExpr is 0.5*x^Tx + Ax + b
                quad_expr = QuadExpr(2*Q*self.rs_coeff, A*self.rs_coeff, b*self.rs_coeff)
                v_arr = np.array([grb_var]).reshape((1, 1), order='F')
                init_val = np.ones((1, 1))*val[i+j]
                bexpr = BoundExpr(quad_expr,
                                  Variable(v_arr, val[i+j].reshape((1, 1))))

                bexprs.append(bexpr)

            i += n_vals
        return bexprs

    def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False):
        # verbose=True
        ## for debugging
        ignore_preds = []
        priority = np.maximum(priority, 0)
        if not pred_dict['hl_info'] == "hl_state":
            start, end = pred_dict['active_timesteps']
            active_range = range(start, end+1)
            if verbose:
                print "pred being added: ", pred_dict
                print active_range, effective_timesteps
            negated = pred_dict['negated']
            pred = pred_dict['pred']

            if pred.get_type() in ignore_preds:
                return

            if pred.priority > priority: return
            assert isinstance(pred, common_predicates.ExprPredicate)
            expr = pred.get_expr(negated)

            for t in effective_timesteps:
                if t in active_range:
                    if expr is not None:
                        if add_nonlin or isinstance(expr.expr, AffExpr):
                            if verbose:
                                print "expr being added at time ", t
                            var = self._spawn_sco_var_for_pred(pred, t)
                            bexpr = BoundExpr(expr, var)
                            self._bexpr_to_pred[bexpr] = (negated, pred, t)
                            groups = ['all']
                            if self.early_converge:
                                ## this will check for convergence per parameter
                                ## this is good if e.g., a single trajectory quickly
                                ## gets stuck
                                groups.extend([param.name for param in pred.params])
                            self._prob.add_cnt_expr(bexpr, groups)

    def _add_first_and_last_timesteps_of_actions(self, plan, priority = MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False):
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            ## only add an action
            if action_start >= active_ts[1] and action_start > active_ts[0]: continue
            if action_end < active_ts[0]: continue
            for pred_dict in action.preds:
                if action_start >= active_ts[0]:
                    self._add_pred_dict(pred_dict, [action_start],
                                        priority=priority, add_nonlin=add_nonlin, verbose=verbose)
                if action_end <= active_ts[1]:
                    self._add_pred_dict(pred_dict, [action_end],
                                        priority=priority, add_nonlin=add_nonlin, verbose=verbose)
            ## add all of the linear ineqs
            timesteps = range(max(action_start+1, active_ts[0]),
                              min(action_end, active_ts[1]))
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose)


    def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False):
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            if action_start >= active_ts[1] and action_start > active_ts[0]: continue
            if action_end < active_ts[0]: continue

            timesteps = range(max(action_start, active_ts[0]),
                              min(action_end, active_ts[1])+1)
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose)

    def _update_ll_params(self):
        for ll_param in self._param_to_ll.values():
            ll_param.update_param()
        if self.child_solver:
            self.child_solver._update_ll_params()

    def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None):
        if active_ts == None:
            active_ts=(0, plan.horizon-1)
        horizon = active_ts[1] - active_ts[0] + 1
        self._param_to_ll = {}
        self.ll_start = active_ts[0]
        for param in plan.params.values():
            ll_param = LLParam(model, param, horizon, active_ts)
            ll_param.create_grb_vars()
            self._param_to_ll[param] = ll_param
        model.update()
        for ll_param in self._param_to_ll.values():
            ll_param.batch_add_cnts()

    def _add_obj_bexprs(self, obj_bexprs):
        for bexpr in obj_bexprs:
            self._prob.add_obj_expr(bexpr)


    def _get_trajopt_obj(self, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon-1)
        start, end = active_ts
        traj_objs = []
        for param in plan.params.values():
            if param not in self._param_to_ll:
                continue
            if param._type in ['Robot', 'Can']:
                T = end - start + 1
                K = 2
                pose = param.pose
                KT = K*T
                v = -1 * np.ones((KT - K, 1))
                d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                # [:,0] allows numpy to see v and d as one-dimensional so
                # that numpy will create a diagonal matrix with v and d as a diagonal
                P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                Q = np.dot(np.transpose(P), P)

                Q *= TRAJOPT_COEFF

                quad_expr = QuadExpr(Q, np.zeros((1,KT)), np.zeros((1,1)))
                robot_ll = self._param_to_ll[param]
                robot_ll_grb_vars = robot_ll.pose.reshape((KT, 1), order='F')
                init_val = param.pose[:, start:end+1].reshape((KT, 1), order='F')
                bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars, init_val))
                traj_objs.append(bexpr)
        return traj_objs
Example #6
0
class DrivingSolver(LLSolver):
    def __init__(self, early_converge=False, transfer_norm='min-vel'):
        # To avoid numerical difficulties during optimization, try keep
        # range of coefficeint within 1e9
        # (largest_coefficient/smallest_coefficient < 1e9)
        self.transfer_coeff = 1e0
        self.rs_coeff = 5e1
        self.trajopt_coeff = 1e0
        self.initial_trust_region_size = 1e-2
        self.init_penalty_coeff = 4e3
        self.smooth_penalty_coeff = 7e4
        self.max_merit_coeff_increases = 5
        self._param_to_ll = {}
        self.early_converge=early_converge
        self.child_solver = None
        self.solve_priorities = [0, 1, 2, 3]
        self.transfer_norm = transfer_norm
        self.grb_init_mapping = {}
        self.var_list = []
        self._grb_to_var_ind = {}
        self.tol = 1e-3

    def _solve_helper(self, plan, callback, active_ts, verbose):
        # certain constraints should be solved first
        success = False
        for priority in self.solve_priorities:
            success = self._solve_opt_prob(plan, priority=priority,
                            callback=callback, active_ts=active_ts,
                            verbose=verbose)

        return success

    def backtrack_solve(self, plan, callback=None, verbose=False, n_resamples=5):
        plan.save_free_attrs()
        success = self._backtrack_solve(plan, callback, anum=0, verbose=verbose, n_resamples=n_resamples)
        # plan.restore_free_attrs()
        return success

    #@profile
    def _backtrack_solve(self, plan, callback=None, anum=0, verbose=False, amax = None, n_resamples=5):
        # if anum == 2:
        #     import ipdb; ipdb.set_trace()
        if amax is None:
            amax = len(plan.actions) - 1

        if anum > amax:
            return True

        a = plan.actions[anum]
        print "backtracking Solve on {}".format(a.name)
        active_ts = a.active_timesteps
        inits = {}
        if a.name == 'drive_down_road':
            rs_param = a.params[3]
        else:
            raise NotImplemented

        def recursive_solve():
            # import ipdb; ipdb.set_trace()
            ## don't optimize over any params that are already set
            old_params_free = {}
            for p in plan.params.itervalues():
                if p.is_symbol():
                    if p not in a.params: continue
                    old_params_free[p] = p._free_attrs
                    p._free_attrs = {}
                    for attr in old_params_free[p].keys():
                        p._free_attrs[attr] = np.zeros(old_params_free[p][attr].shape)
                else:
                    p_attrs = {}
                    old_params_free[p] = p_attrs
                    for attr in p._free_attrs:
                        p_attrs[attr] = p._free_attrs[attr][:, active_ts[1]].copy()
                        p._free_attrs[attr][:, active_ts[1]] = 0
            self.child_solver = DrivingSolver()
            success = self.child_solver._backtrack_solve(plan, callback=callback, anum=anum+1, verbose=verbose, amax = amax)

            # reset free_attrs
            for p in plan.params.itervalues():
                if p.is_symbol():
                    if p not in a.params: continue
                    p._free_attrs = old_params_free[p]
                else:
                    for attr in p._free_attrs:
                        p._free_attrs[attr][:, active_ts[1]] = old_params_free[p][attr]
            return success

        # if there is no parameter to resample or some part of rs_param is fixed, then go ahead optimize over this action
        if rs_param is None or sum([not np.all(rs_param._free_attrs[attr]) for attr in rs_param._free_attrs.keys() ]):
            ## this parameter is fixed
            if callback is not None:
                callback_a = lambda: callback(a)
            else:
                callback_a = None
            self.child_solver = DrivingSolver()
            success = self.child_solver.solve(plan, callback=callback_a, n_resamples=n_resamples,
                                              active_ts = active_ts, verbose=verbose, force_init=True)

            if not success:
                ## if planning fails we're done
                return False
            ## no other options, so just return here
            return recursive_solve()

        ## so that this won't be optimized over
        rs_free = rs_param._free_attrs
        rs_param._free_attrs = {}
        for attr in rs_free.keys():
            rs_param._free_attrs[attr] = np.zeros(rs_free[attr].shape)

        """
        sampler_begin
        """
        vehicle_poses = self.obj_pose_suggester(plan, anum, resample_size=3)
        if not vehicle_poses:
            success = False
            # print "Using Random Poses"
            # robot_poses = self.random_pose_suggester(plan, anum, resample_size = 5)

        """
        sampler end
        """

        if callback is not None:
            callback_a = lambda: callback(a)
        else:
            callback_a = None

        for rp in vehicle_poses:
            for attr, val in rp.iteritems():
                setattr(rs_param, attr, val)

            success = False
            self.child_solver = DrivingSolver()
            success = self.child_solver.solve(plan, callback=callback_a, n_resamples=n_resamples,
                                              active_ts = active_ts, verbose=verbose,
                                              force_init=True)
            if success:
                if recursive_solve():
                    break
                else:
                    success = False

        rs_param._free_attrs = rs_free
        return success

    #@profile
    def random_pose_suggester(self, plan, anum, resample_size=5):
        pass

    #@profile
    def obj_pose_suggester(self, plan, anum, resample_size=20):
        vehicle_poses = []
        assert anum + 1 <= len(plan.actions)

        if anum + 1 < len(plan.actions):
            act, next_act = plan.actions[anum], plan.actions[anum+1]
        else:
            act, next_act = plan.actions[anum], None

        vehicle = act.params[0]

        start_ts, end_ts = act.active_timesteps

        for i in range(resample_size):
            if act.name == "drive_down_road":
                road = act.params[1]
                init_pos = act.params[2]

                direction = road.geom.direction
                dist = road.geom.length / 2
                x = road.geom.x
                y = road.geom.y
                final_xy = x + dist * np.cos(direction), y + dist * np.sin(direction)

                vehicle_poses.append({'xy': final_xy, 'theta': direction, 'vel': 0, 'phi': 0, 'u1': 0, 'u2': 0, 'value': 0})
            else:
                raise NotImplementedError
        if not vehicle_poses:
            print "Unable to find IK"
            # import ipdb; ipdb.set_trace()

        return vehicle_poses

    #@profile
    def solve(self, plan, callback=None, n_resamples=5, active_ts=None,
              verbose=False, force_init=False):
        success = False
        if callback is not None:
            viewer = callback()
        if force_init or not plan.initialized:
            self._solve_opt_prob(plan, priority=-2, callback=callback,
                active_ts=active_ts, verbose=verbose)
            # self._solve_opt_prob(plan, priority=-1, callback=callback,
            #     active_ts=active_ts, verbose=verbose)
            plan.initialized=True

        if success or len(plan.get_failed_preds(active_ts = active_ts)) == 0:
            return True


        for priority in self.solve_priorities:
            for attempt in range(n_resamples):
                ## refinement loop
                success = self._solve_opt_prob(plan, priority=priority,
                                callback=callback, active_ts=active_ts, verbose=verbose)

                try:
                    if DEBUG: plan.check_cnt_violation(active_ts = active_ts, priority = priority, tol = 1e-3)
                except:
                    print "error in predicate checking"
                if success:
                    break

                self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose, resample = True)

                # if len(plan.get_failed_preds(active_ts=active_ts, tol=1e-3)) > 9:
                #     break

                print "resample attempt: {}".format(attempt)

                try:
                    if DEBUG: plan.check_cnt_violation(active_ts = active_ts, priority = priority, tol = 1e-3)
                except:
                    print "error in predicate checking"

                assert not (success and not len(plan.get_failed_preds(active_ts = active_ts, priority = priority, tol = 1e-3)) == 0)

            if not success:
                return False

        return success

    #@profile
    def _solve_opt_prob(self, plan, priority, callback=None, init=True,
                        active_ts=None, verbose=False, resample=False, smoothing = False):
        if callback is not None: viewer = callback()
        self.plan = plan
        vehicle = plan.params['vehicle']
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()
        initial_trust_region_size = self.initial_trust_region_size
        if resample:
            tol = 1e-3
            def  variable_helper():
                error_bin = []
                for sco_var in self._prob._vars:
                    for grb_var, val in zip(sco_var.get_grb_vars(), sco_var.get_value()):
                        grb_name = grb_var[0].VarName
                        one, two = grb_name.find('-'), grb_name.find('-(')
                        param_name = grb_name[1:one]
                        attr = grb_name[one+1:two]
                        index = eval(grb_name[two+1:-1])
                        param = plan.params[param_name]
                        if not np.allclose(val, getattr(param, attr)[index]):
                            error_bin.append((grb_name, val, getattr(param, attr)[index]))
                if len(error_bin) != 0:
                    print "something wrong"
                    if DEBUG: import ipdb; ipdb.set_trace()

            """
            When Optimization fails, resample new values for certain timesteps
            of the trajectory and solver as initialization
            """
            obj_bexprs = []

            ## this is an objective that places
            ## a high value on matching the resampled values
            failed_preds = plan.get_failed_preds(active_ts = active_ts, priority=priority, tol = tol)
            rs_obj = self._resample(plan, failed_preds, sample_all = True)
            # import ipdb; ipdb.set_trace()
            # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory.
            # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

            self._add_all_timesteps_of_actions(plan, priority=priority,
                add_nonlin=False, active_ts= active_ts, verbose=verbose)
            obj_bexprs.extend(rs_obj)
            self._add_obj_bexprs(obj_bexprs)
            initial_trust_region_size = 1e3
            # import ipdb; ipdb.set_trace()
        else:
            self._bexpr_to_pred = {}
            if priority == -2:
                """
                Initialize an linear trajectory while enforceing the linear constraints in the intermediate step.
                """
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_first_and_last_timesteps_of_actions(plan,
                    priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False)
                tol = 1e-1
                initial_trust_region_size = 1e3
            elif priority == -1:
                """
                Solve the optimization problem while enforcing every constraints.
                """
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_first_and_last_timesteps_of_actions(plan,
                    priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose,
                    add_nonlin=True)
                tol = 1e-1
            elif priority >= 0:
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True,
                                                   active_ts=active_ts, verbose=verbose)
                tol=1e-3

        solv = Solver()
        solv.initial_trust_region_size = initial_trust_region_size

        if smoothing:
            solv.initial_penalty_coeff = self.smooth_penalty_coeff
        else:
            solv.initial_penalty_coeff = self.init_penalty_coeff

        solv.max_merit_coeff_increases = self.max_merit_coeff_increases

        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose)
        if priority == MAX_PRIORITY:
            success = success or len(plan.get_failed_preds(tol=tol, active_ts=active_ts, priority=priority)) == 0
        self._update_ll_params()

        if DEBUG: assert not plan.has_nan(active_ts)

        if resample:
            # During resampling phases, there must be changes added to sampling_trace
            if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]:
                reward = 0
                if len(plan.get_failed_preds(active_ts = active_ts, priority=priority)) == 0:
                    reward = len(plan.actions)
                else:
                    failed_t = plan.get_failed_pred(active_ts=(0,active_ts[1]), priority=priority)[2]
                    for i in range(len(plan.actions)):
                        if failed_t > plan.actions[i].active_timesteps[1]:
                            reward += 1
                plan.sampling_trace[-1]['reward'] = reward
        ##Restore free_attrs values
        plan.restore_free_attrs()

        self.reset_variable()
        print "priority: {}\n".format(priority)

        return success

    #@profile
    def traj_smoother(self, plan, callback=None, n_resamples=5, verbose=False):
        # plan.save_free_attrs()
        a_num = 0
        success = True
        while a_num < len(plan.actions) - 1:
            act_1 = plan.actions[a_num]
            act_2 = plan.actions[a_num+1]
            active_ts = (act_1.active_timesteps[0], act_2.active_timesteps[1])
            # print active_ts
            old_params_free = {}
            for p in plan.params.itervalues():
                if p.is_symbol():
                    if p in act_1.params or p in act_2.params: continue
                    old_params_free[p] = p._free_attrs
                    p._free_attrs = {}
                    for attr in old_params_free[p].keys():
                        p._free_attrs[attr] = np.zeros(old_params_free[p][attr].shape)
                else:
                    p_attrs = {}
                    old_params_free[p] = p_attrs
                    for attr in p._free_attrs:
                        p_attrs[attr] = [p._free_attrs[attr][:, :active_ts[0]].copy(), p._free_attrs[attr][:, active_ts[1]:].copy()]
                        p._free_attrs[attr][:, active_ts[1]:] = 0
                        p._free_attrs[attr][:, :active_ts[0]] = 0
            success = self._traj_smoother(plan, callback, n_resamples, active_ts, verbose)
            # reset free_attrs
            for p in plan.params.itervalues():
                if p.is_symbol():
                    if p in act_1.params or p in act_2.params: continue
                    p._free_attrs = old_params_free[p]
                else:
                    for attr in p._free_attrs:
                        p._free_attrs[attr][:, :active_ts[0]] = old_params_free[p][attr][0]
                        p._free_attrs[attr][:, active_ts[1]:] = old_params_free[p][attr][1]

            if not success:
                return success
            print 'Actions: {} and {}'.format(plan.actions[a_num].name, plan.actions[a_num+1].name)
            a_num += 1
        # try:
        #     success = self._traj_smoother(plan, callback, n_resamples, active_ts, verbose)
        # except:
        #     print "Error occured during planning, but not catched"
        #     return False
        # plan.restore_free_attrs()
        return success

    # @profile
    def _traj_smoother(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False):
        print "Smoothing Trajectory..."
        priority = MAX_PRIORITY
        for attempt in range(n_resamples):
            # refinement loop
            print 'Smoother iteration #: {}\n'.format(attempt)
            success = self._solve_opt_prob(plan, priority=priority,
                            callback=callback, active_ts=active_ts, verbose=verbose, resample = False, smoothing = True)
            if success:
                break
            plan.check_cnt_violation(tol = 1e-3)
            self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose, resample = True, smoothing = True)
        return success

    #@profile
    def _get_transfer_obj(self, plan, norm):
        """
            This function returns the expression e(x) = P|x - cur|^2
            Which says the optimized trajectory should be close to the
            previous trajectory.
            Where P is the KT x KT matrix, where Px is the difference of parameter's attributes' current value and parameter's next timestep value
        """

        transfer_objs = []
        if norm == 'min-vel':
            for param in plan.params.values():
                for attr_name in param.__dict__.iterkeys():
                    attr_type = param.get_attr_type(attr_name)
                    if issubclass(attr_type, Vector):
                        param_ll = self._param_to_ll[param]
                        if param.is_symbol():
                            T = 1
                            attr_val = getattr(param, attr_name)
                        else:
                            T = param_ll._horizon
                            attr_val = getattr(param, attr_name)[:, param_ll.active_ts[0]:param_ll.active_ts[1]+1]
                        K = attr_type.dim

                        # pose = param.pose
                        if DEBUG: assert (K, T) == attr_val.shape
                        KT = K*T
                        v = -1 * np.ones((KT - K, 1))
                        d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                        # [:,0] allows numpy to see v and d as one-dimensional so
                        # that numpy will create a diagonal matrix with v and d as a diagonal
                        P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                        # P = np.eye(KT)
                        Q = np.dot(np.transpose(P), P) if not param.is_symbol() else np.eye(KT)
                        cur_val = attr_val.reshape((KT, 1), order='F')
                        A = -2*cur_val.T.dot(Q)
                        b = cur_val.T.dot(Q.dot(cur_val))
                        transfer_coeff = self.transfer_coeff/float(plan.horizon)

                        # QuadExpr is 0.5*x^Tx + Ax + b
                        quad_expr = QuadExpr(2*transfer_coeff*Q,
                                             transfer_coeff*A, transfer_coeff*b)
                        ll_attr_val = getattr(param_ll, attr_name)
                        param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F')
                        sco_var = self.create_variable(param_ll_grb_vars, cur_val)
                        bexpr = BoundExpr(quad_expr, sco_var)
                        transfer_objs.append(bexpr)
        else:
            raise NotImplemented
        return transfer_objs

    #@profile
    def create_variable(self, grb_vars, init_vals, save=False):
        """
            if save is Ture
            Update the grb_init_mapping so that each grb_var is mapped to
            the right initial values.
            Then find the sco variables that includes the grb variables we are updating and change the corresponding initial values inside of it.
            if save is False
            Iterate the var_list and use the last initial value used for each gurobi, and construct the sco variables
        """
        sco_var, grb_val_map, ret_val = None, {}, []

        for grb, v in zip(grb_vars.flatten(), init_vals.flatten()):
            grb_name = grb.VarName
            if save: self.grb_init_mapping[grb_name] = v
            grb_val_map[grb_name] = self.grb_init_mapping.get(grb_name, v)
            ret_val.append(grb_val_map[grb_name])
            if grb_name in self._grb_to_var_ind.keys():
                for var, i in self._grb_to_var_ind[grb_name]:
                    var._value[i] = grb_val_map[grb_name]
                    if np.all(var._grb_vars is grb_vars):
                        sco_var = var

        if sco_var is None:
            sco_var = Variable(grb_vars, np.array(ret_val).reshape((len(ret_val), 1)))
            self.var_list.append(sco_var)
            for i, grb in enumerate(grb_vars.flatten()):
                index_val_list = self._grb_to_var_ind.get(grb.VarName, [])
                index_val_list.append((sco_var, i))
                self._grb_to_var_ind[grb.VarName] = index_val_list

        if DEBUG: self.check_sync()
        return sco_var

    #@profile
    def check_grb_sync(self, grb_name):
        for var, i in self._grb_to_var_ind[grb_name]:
            print var._grb_vars[i][0].VarName, var._value[i]

    #@profile
    def check_sync(self):
        """
            This function checks whether all sco variable are synchronized
        """
        grb_val_map = {}
        correctness = True
        for grb_name in self._grb_to_var_ind.keys():
            for var, i in self._grb_to_var_ind[grb_name]:
                try:
                    correctness = np.allclose(grb_val_map[grb_name], var._value[i], equal_nan=True)
                except KeyError:
                    grb_val_map[grb_name] = var._value[i]
                except:
                    print "something went wrong"
                    import ipdb; ipdb.set_trace()
                if not correctness:
                    import ipdb; ipdb.set_trace()


    def reset_variable(self):
        self.grb_init_mapping = {}
        self.var_list = []
        self._grb_to_var_ind = {}

    def monitor_update(self, plan, update_values, callback=None, n_resamples=5, active_ts=None, verbose=False):
        print 'Resolving after environment update...\n'
        if callback is not None: viewer = callback()
        if active_ts==None:
            active_ts = (0, plan.horizon-1)

        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # _free_attrs is paied attentioned in here
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()
        self._bexpr_to_pred = {}
        tol = 1e-3
        obj_bexprs = []
        rs_obj = self._update(plan, update_values)
        obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

        self._add_all_timesteps_of_actions(plan, priority=MAX_PRIORITY,
            add_nonlin=True, active_ts= active_ts, verbose=verbose)
        obj_bexprs.extend(rs_obj)
        self._add_obj_bexprs(obj_bexprs)
        initial_trust_region_size = 1e-2

        solv = Solver()
        solv.initial_trust_region_size = initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose)
        self._update_ll_params()

        if DEBUG: assert not plan.has_nan(active_ts)

        plan.restore_free_attrs()

        self.reset_variable()
        print "monitor_update\n"
        return success

    def _update(self, plan, update_values):
        bexprs = []
        for val, attr_inds in update_values:
            if val is not None:
                for p in attr_inds:
                    ## get the ll_param for p and gurobi variables
                    ll_p = self._param_to_ll[p]
                    n_vals, i = 0, 0
                    grb_vars = []
                    for attr, ind_arr, t in attr_inds[p]:
                        for j, grb_var in enumerate(getattr(ll_p, attr)[ind_arr, t-ll_p.active_ts[0]].flatten()):
                            Q = np.eye(1)
                            A = -2*val[p][i+j]*np.ones((1, 1))
                            b = np.ones((1, 1))*np.power(val[p][i+j], 2)
                            resample_coeff = self.rs_coeff/float(plan.horizon)
                            # QuadExpr is 0.5*x^Tx + Ax + b
                            quad_expr = QuadExpr(2*Q*resample_coeff, A*resample_coeff, b*resample_coeff)
                            v_arr = np.array([grb_var]).reshape((1, 1), order='F')
                            init_val = np.ones((1, 1))*val[p][i+j]
                            sco_var = self.create_variable(v_arr, np.array([val[p][i+j]]).reshape((1, 1)), save = True)
                            bexpr = BoundExpr(quad_expr, sco_var)
                            bexprs.append(bexpr)
                        i += len(ind_arr)
        return bexprs


    #@profile
    def _resample(self, plan, preds, sample_all = False):
        """
            This function first calls fail predicate's resample function,
            then, uses the resampled value to create a square difference cost
            function e(x) = |x - rs_val|^2 that will be minimized later.
            rs_val is the resampled value
        """
        bexprs = []
        val, attr_inds = None, None
        pred_type = {}
        for negated, pred, t in preds:
            ## returns a vector of new values and an
            ## attr_inds (OrderedDict) that gives the mapping
            ## to parameter attributes
            if pred_type.get(pred.get_type, False):
                continue
            val, attr_inds = pred.resample(negated, t, plan)
            if val is not None: pred_type[pred.get_type] = True
            ## if no resample defined for that pred, continue
            if val is not None:
                for p in attr_inds:
                    ## get the ll_param for p and gurobi variables
                    ll_p = self._param_to_ll[p]
                    n_vals, i = 0, 0
                    grb_vars = []
                    for attr, ind_arr, t in attr_inds[p]:
                        for j, grb_var in enumerate(getattr(ll_p, attr)[ind_arr, t-ll_p.active_ts[0]].flatten()):
                            Q = np.eye(1)
                            A = -2*val[p][i+j]*np.ones((1, 1))
                            b = np.ones((1, 1))*np.power(val[p][i+j], 2)
                            resample_coeff = self.rs_coeff/float(plan.horizon)
                            # QuadExpr is 0.5*x^Tx + Ax + b
                            quad_expr = QuadExpr(2*Q*resample_coeff, A*resample_coeff, b*resample_coeff)
                            v_arr = np.array([grb_var]).reshape((1, 1), order='F')
                            init_val = np.ones((1, 1))*val[p][i+j]
                            sco_var = self.create_variable(v_arr, np.array([val[p][i+j]]).reshape((1, 1)), save = True)
                            bexpr = BoundExpr(quad_expr, sco_var)
                            bexprs.append(bexpr)
                        i += len(ind_arr)
                if not sample_all:
                    break
        return bexprs

    #@profile
    def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True,
                       priority=MAX_PRIORITY, verbose=False):
        """
            This function creates constraints for the predicate and added to
            Prob class in sco.
        """
        ## for debugging
        ignore_preds = []
        priority = np.maximum(priority, 0)
        if not pred_dict['hl_info'] == "hl_state":
            start, end = pred_dict['active_timesteps']
            active_range = range(start, end+1)
            if verbose:
                print "pred being added: ", pred_dict
                print active_range, effective_timesteps
            negated = pred_dict['negated']
            pred = pred_dict['pred']

            if pred.get_type() in ignore_preds:
                return

            if pred.priority > priority: return
            if DEBUG: assert isinstance(pred, common_predicates.ExprPredicate)
            expr = pred.get_expr(negated)

            if expr is not None:
                if add_nonlin or isinstance(expr.expr, AffExpr):
                    for t in effective_timesteps:
                        if t in active_range:
                            if verbose:
                                print "expr being added at time ", t
                            var = self._spawn_sco_var_for_pred(pred, t)
                            bexpr = BoundExpr(expr, var)

                            # TODO: REMOVE line below, for tracing back predicate for debugging.
                            if DEBUG:
                                bexpr.source = (negated, pred, t)
                            self._bexpr_to_pred[bexpr] = (negated, pred, t)
                            groups = ['all']
                            if self.early_converge:
                                ## this will check for convergence per parameter
                                ## this is good if e.g., a single trajectory quickly
                                ## gets stuck
                                groups.extend([param.name for param in pred.params])
                            self._prob.add_cnt_expr(bexpr, groups)

    #@profile
    def _add_first_and_last_timesteps_of_actions(self, plan, priority = MAX_PRIORITY,
                                                 add_nonlin=False, active_ts=None, verbose=False):
        """
            Adding only non-linear constraints on the first and last timesteps of each action.
        """
        if active_ts is None:
            active_ts = (0, plan.horizon-1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            ## only add an action
            if action_start >= active_ts[1] and action_start > active_ts[0]: continue
            if action_end < active_ts[0]: continue
            for pred_dict in action.preds:
                if action_start >= active_ts[0]:
                    self._add_pred_dict(pred_dict, [action_start],
                                        priority=priority, add_nonlin=add_nonlin, verbose=verbose)
                if action_end <= active_ts[1]:
                    self._add_pred_dict(pred_dict, [action_end],
                                        priority=priority, add_nonlin=add_nonlin, verbose=verbose)
            ## add all of the linear ineqs
            timesteps = range(max(action_start+1, active_ts[0]),
                              min(action_end, active_ts[1]))
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict, timesteps, add_nonlin=False,
                                    priority=priority, verbose=verbose)

    #@profile
    def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY,
                                      add_nonlin=True, active_ts=None, verbose=False):
        """
            This function adds both linear and non-linear predicates from
            actions that are active within the range of active_ts.
        """
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            if action_start >= active_ts[1] and action_start > active_ts[0]: continue
            if action_end < active_ts[0]: continue

            timesteps = range(max(action_start, active_ts[0]),
                              min(action_end, active_ts[1])+1)
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict, timesteps, priority=priority,
                                    add_nonlin=add_nonlin, verbose=verbose)

    #@profile
    def _update_ll_params(self):
        """
            update plan's parameters from low level grb_vars.
            expected to be called after each optimization.
        """
        for ll_param in self._param_to_ll.values():
            ll_param.update_param()
        if self.child_solver:
            self.child_solver._update_ll_params()

    #@profile
    def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None):
        """
            This function creates low level parameters for each parameter in the plan,
            initialized he corresponding grb_vars for each attributes in each timestep,
            update the grb models
            adds in equality constraints,
            construct a dictionary as param-to-ll_param mapping.
        """
        if active_ts == None:
            active_ts=(0, plan.horizon-1)
        horizon = active_ts[1] - active_ts[0] + 1
        self._param_to_ll = {}
        self.ll_start = active_ts[0]
        for param in plan.params.values():
            ll_param = LLParam(model, param, horizon, active_ts)
            ll_param.create_grb_vars()
            self._param_to_ll[param] = ll_param
        model.update()
        for ll_param in self._param_to_ll.values():
            ll_param.batch_add_cnts()

    #@profile
    def _add_obj_bexprs(self, obj_bexprs):
        """
            This function adds objective bounded expressions to the Prob class
            in sco.
        """
        for bexpr in obj_bexprs:
            self._prob.add_obj_expr(bexpr)

    #@profile
    def _get_trajopt_obj(self, plan, active_ts=None):
        """
            This function selects parameter of type Robot and Can and returns
            the expression e(x) = |Px|^2
            Which optimize trajectory so that robot and can's attributes in
            current timestep is close to that of next timestep.
            forming a straight line between each end points.

            Where P is the KT x KT matrix, where Px is the difference of
            value in current timestep compare to next timestep
        """
        if active_ts == None:
            active_ts = (0, plan.horizon-1)
        start, end = active_ts
        traj_objs = []
        for param in plan.params.values():
            if param not in self._param_to_ll:
                continue
            if isinstance(param, Object):
                for attr_name in param.__dict__.iterkeys():
                    attr_type = param.get_attr_type(attr_name)
                    if issubclass(attr_type, Vector):
                        T = end - start + 1
                        K = attr_type.dim
                        attr_val = getattr(param, attr_name)
                        KT = K*T
                        v = -1 * np.ones((KT - K, 1))
                        d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                        # [:,0] allows numpy to see v and d as one-dimensional so
                        # that numpy will create a diagonal matrix with v and d as a diagonal
                        P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                        Q = np.dot(np.transpose(P), P)
                        Q *= self.trajopt_coeff/float(plan.horizon)

                        quad_expr = None
                        quad_expr = QuadExpr(Q, np.zeros((1,KT)), np.zeros((1,1)))
                        param_ll = self._param_to_ll[param]
                        ll_attr_val = getattr(param_ll, attr_name)
                        param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F')
                        attr_val = getattr(param, attr_name)
                        init_val = attr_val[:, start:end+1].reshape((KT, 1), order='F')
                        sco_var = self.create_variable(param_ll_grb_vars, init_val)
                        bexpr = BoundExpr(quad_expr, sco_var)
                        traj_objs.append(bexpr)
        return traj_objs
Example #7
0
    def _solve_opt_prob(self, plan, priority, callback=None, init=True,
                        active_ts=None, verbose=False, resample=False, smoothing = False):
        if callback is not None: viewer = callback()
        self.plan = plan
        vehicle = plan.params['vehicle']
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()
        initial_trust_region_size = self.initial_trust_region_size
        if resample:
            tol = 1e-3
            def  variable_helper():
                error_bin = []
                for sco_var in self._prob._vars:
                    for grb_var, val in zip(sco_var.get_grb_vars(), sco_var.get_value()):
                        grb_name = grb_var[0].VarName
                        one, two = grb_name.find('-'), grb_name.find('-(')
                        param_name = grb_name[1:one]
                        attr = grb_name[one+1:two]
                        index = eval(grb_name[two+1:-1])
                        param = plan.params[param_name]
                        if not np.allclose(val, getattr(param, attr)[index]):
                            error_bin.append((grb_name, val, getattr(param, attr)[index]))
                if len(error_bin) != 0:
                    print "something wrong"
                    if DEBUG: import ipdb; ipdb.set_trace()

            """
            When Optimization fails, resample new values for certain timesteps
            of the trajectory and solver as initialization
            """
            obj_bexprs = []

            ## this is an objective that places
            ## a high value on matching the resampled values
            failed_preds = plan.get_failed_preds(active_ts = active_ts, priority=priority, tol = tol)
            rs_obj = self._resample(plan, failed_preds, sample_all = True)
            # import ipdb; ipdb.set_trace()
            # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory.
            # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

            self._add_all_timesteps_of_actions(plan, priority=priority,
                add_nonlin=False, active_ts= active_ts, verbose=verbose)
            obj_bexprs.extend(rs_obj)
            self._add_obj_bexprs(obj_bexprs)
            initial_trust_region_size = 1e3
            # import ipdb; ipdb.set_trace()
        else:
            self._bexpr_to_pred = {}
            if priority == -2:
                """
                Initialize an linear trajectory while enforceing the linear constraints in the intermediate step.
                """
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_first_and_last_timesteps_of_actions(plan,
                    priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False)
                tol = 1e-1
                initial_trust_region_size = 1e3
            elif priority == -1:
                """
                Solve the optimization problem while enforcing every constraints.
                """
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_first_and_last_timesteps_of_actions(plan,
                    priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose,
                    add_nonlin=True)
                tol = 1e-1
            elif priority >= 0:
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True,
                                                   active_ts=active_ts, verbose=verbose)
                tol=1e-3

        solv = Solver()
        solv.initial_trust_region_size = initial_trust_region_size

        if smoothing:
            solv.initial_penalty_coeff = self.smooth_penalty_coeff
        else:
            solv.initial_penalty_coeff = self.init_penalty_coeff

        solv.max_merit_coeff_increases = self.max_merit_coeff_increases

        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose)
        if priority == MAX_PRIORITY:
            success = success or len(plan.get_failed_preds(tol=tol, active_ts=active_ts, priority=priority)) == 0
        self._update_ll_params()

        if DEBUG: assert not plan.has_nan(active_ts)

        if resample:
            # During resampling phases, there must be changes added to sampling_trace
            if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]:
                reward = 0
                if len(plan.get_failed_preds(active_ts = active_ts, priority=priority)) == 0:
                    reward = len(plan.actions)
                else:
                    failed_t = plan.get_failed_pred(active_ts=(0,active_ts[1]), priority=priority)[2]
                    for i in range(len(plan.actions)):
                        if failed_t > plan.actions[i].active_timesteps[1]:
                            reward += 1
                plan.sampling_trace[-1]['reward'] = reward
        ##Restore free_attrs values
        plan.restore_free_attrs()

        self.reset_variable()
        print "priority: {}\n".format(priority)

        return success
Example #8
0
class CanSolver(LLSolver):
    def __init__(self, early_converge=False):
        self.transfer_coeff = 1e1
        self.rs_coeff = 1e6
        self.initial_trust_region_size = 1e-1
        self.init_penalty_coeff = 1e1
        # self.init_penalty_coeff = 1e5
        self.max_merit_coeff_increases = 5
        self._param_to_ll = {}
        self.early_converge = early_converge
        self.child_solver = None

    def backtrack_solve(self, plan, callback=None, anum=0, verbose=False):
        if anum > len(plan.actions) - 1:
            return True
        a = plan.actions[anum]
        active_ts = a.active_timesteps
        inits = {}
        # Moveto -> Grasp -> Movetoholding -> Putdown
        if a.name == "moveto":
            ## moveto: (?robot - Robot ?start - RobotPose ?end - RobotPose)
            ## find possible values for the final pose
            rs_param = a.params[2]
        elif a.name == "movetoholding":
            ## movetoholding: (?robot - Robot ?start - RobotPose ?end - RobotPose ?c - Can)
            ## find possible values for the final pose
            rs_param = a.params[3]
        elif a.name == "grasp":
            ## grasp: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose)
            ## sample the ee_pose
            rs_param = a.params[5]
        elif a.name == "putdown":
            ## putdown: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose)
            rs_param = a.params[4]
        else:
            raise NotImplemented

        def recursive_solve():
            ## don't optimize over any params that are already set
            old_params_free = {}
            for p in plan.params.itervalues():
                old_param_map = {}
                if p.is_symbol():
                    if p not in a.params:
                        continue
                    for attr in attr_map[getattr(p, "_type")]:
                        old_param_map[attr] = p._free_attrs[attr].copy()
                        p._free_attrs[attr][:] = 0
                else:
                    for attr in attr_map[getattr(p, "_type")]:
                        old_param_map[attr] = p._free_attrs[attr][:, active_ts[1]].copy()
                        p._free_attrs[attr][:, active_ts[1]] = 0
                old_params_free[p] = old_param_map
            self.child_solver = CanSolver()
            if self.child_solver.backtrack_solve(plan, callback=callback, anum=anum + 1, verbose=verbose):
                return True
            ## reset free_attrs
            for p in a.params:
                if p.is_symbol():
                    if p not in a.params:
                        continue
                    for attr in attr_map[getattr(p, "_type")]:
                        p._free_attrs[attr] = old_params_free[p][attr]
                else:
                    for attr in attr_map[getattr(p, "_type")]:
                        p._free_attrs[attr][:, active_ts[1]] = old_params_free[p][attr]
            return False

        if rs_param.is_fixed(attr_map[getattr(rs_param, "_type")]):
            ## this parameter is fixed
            if callback is not None:
                callback_a = lambda: callback(a)
            else:
                callback_a = None
            self.child_solver = CanSolver()
            success = self.child_solver.solve(
                plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True
            )

            if not success:
                ## if planning fails we're done
                return False
            ## no other options, so just return here
            return recursive_solve()

        ## so that this won't be optimized over
        rs_free = {}
        for attr in attr_map[getattr(rs_param, "_type")]:
            rs_free[attr] = rs_param._free_attrs[attr].copy()
            rs_param._free_attrs[attr][:] = 0

        # Target is needed to sample the ee_pose, ee_pose is needed to sample the robot_pose
        # InContact, Robot, EEPose, Target
        targets = plan.get_param("InContact", 2, {1: rs_param}, negated=False)
        # EEReachable, Robot, RobotPose, EEPose
        robot_pose = plan.get_param("EEReachable", 1, {2: rs_param}, negated=False)

        if len(targets) > 1:
            import pdb

            pdb.set_trace()
        if callback is not None:
            callback_a = lambda: callback(a)
        else:
            callback_a = None

        robot_poses = []

        if len(targets) == 0 or np.all([targets[0]._free_attrs[attr] for attr in attr_map[targets[0]]]):
            ## sample 4 possible poses
            coords = list(itertools.product(range(WIDTH), range(HEIGHT)))
            random.shuffle(coords)
            robot_poses = [np.array(x)[:, None] for x in coords[:4]]
        elif np.any([targets[0]._free_attrs[attr] for attr in attr_map[targets[0]]]):
            ## there shouldn't be only some free_attrs set
            raise NotImplementedError
        else:
            grasp_dirs = [np.array([0, -1]), np.array([1, 0]), np.array([0, 1]), np.array([-1, 0])]
            grasp_len = plan.params["pr2"].geom.radius + targets[0].geom.radius
            for g_dir in grasp_dirs:
                grasp = (g_dir * grasp_len).reshape((2, 1))
                robot_poses.append(targets[0].value + grasp)

        for rp in robot_poses:
            rs_param.value = rp
            success = False
            self.child_solver = CanSolver()
            success = self.child_solver.solve(
                plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True
            )
            if success:
                if recursive_solve():
                    break
                else:
                    success = False

        for attr in attr_map[getattr(rs_param, "_type")]:
            rs_param._free_attrs[attr] = rs_free[attr]

        return success

    def sample_ee_from_target(self, target):
        """
            Sample all possible EE Pose that pr2 can grasp with

            target: parameter of type Target
            return: list of tuple in the format of (ee_pos, ee_rot)
        """
        possible_ee_poses = []
        targ_pos, targ_rot = target.value.flatten(), target.rotation.flatten()
        ee_pos = targ_pos.copy()
        target_trans = OpenRAVEBody.transform_from_obj_pose(targ_pos, targ_rot)
        # rotate can's local z-axis by the amount of linear spacing between 0 to 2pi
        angle_range = np.linspace(0, np.pi * 2, num=SAMPLE_SIZE)
        for rot in angle_range:
            target_trans = OpenRAVEBody.transform_from_obj_pose(targ_pos, targ_rot)
            # rotate new ee_pose around can's rotation axis
            rot_mat = matrixFromAxisAngle([0, 0, rot])
            ee_trans = target_trans.dot(rot_mat)
            ee_rot = OpenRAVEBody.obj_pose_from_transform(ee_trans)[3:]
            possible_ee_poses.append((ee_pos, ee_rot))
        return possible_ee_poses

    def sample_rp_from_ee(self, env, rs_param, ee_pose):
        """
            Using Inverse Kinematics to solve all possible robot armpose and basepose that reaches this ee_pose

            robot_pose: list of full robot pose with format of(basepose, backHeight, lArmPose, lGripper, rArmPose, rGripper)
        """
        ee_pos, ee_rot = ee_pose[0], ee_pose[1]
        base_raidus = np.linspace(-1, 1, num=BASE_SAMPLE_SIZE)
        poses = list(itertools.product(base_raidus, base_raidus))
        # sample
        dummy_body = OpenRAVEBody(env, rs_param.name, PR2())
        possible_robot_pose = []
        for pos in poses:
            bp = pos + ee_pos[:2]
            vec = ee_pos[:2] - bp
            vec = vec / np.linalg.norm(vec)
            theta = math.atan2(vec[1], vec[0])
            bp = np.array([bp[0], bp[1], theta])
            dummy_body.set_pose(bp)
            arm_poses = robot_body.ik_arm_pose(ee_pos, ee_rot)
            for arm_pose in arm_poses:
                possible_robot_pose.append(
                    (bp, ee_arm_poses[0], rs_param.lArmPose, rs_param.lGripper, ee_arm_poses[1:], rs_param.rGripper)
                )

        import ipdb

        ipdb.set_trace()
        dummy_body.delete()
        return possible_robot_pose

    def closest_arm_pose(self, arm_poses, cur_arm_poses):
        pass

    def solve(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False, force_init=False):
        success = False

        if force_init or not plan.initialized:
            ## solve at priority -1 to get an initial value for the parameters
            self._solve_opt_prob(plan, priority=-1, callback=callback, active_ts=active_ts, verbose=verbose)
            plan.initialized = True
        success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose)
        if success:
            return success

        for _ in range(n_resamples):
            ## refinement loop
            ## priority 0 resamples the first failed predicate in the plan
            ## and then solves a transfer optimization that only includes linear constraints
            self._solve_opt_prob(plan, priority=0, callback=callback, active_ts=active_ts, verbose=verbose)
            success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose)
            if success:
                return success
        return success

    def _solve_opt_prob(
        self, plan, priority, callback=None, init=True, init_from_prev=False, active_ts=None, verbose=False
    ):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose)
            tol = 1e-1
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon - 1)

            failed_preds = plan.get_failed_preds()
            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = self._resample(plan, failed_preds)
            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_transfer_obj(plan, "min-vel"))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)
            self._add_first_and_last_timesteps_of_actions(plan, priority=-1, add_nonlin=True, verbose=verbose)
            self._add_all_timesteps_of_actions(plan, priority=0, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority == 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose)
            tol = 1e-4

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob, method="penalty_sqp", tol=tol, verbose=True)
        import ipdb

        ipdb.set_trace()
        self._update_ll_params()
        return success

    def _get_transfer_obj(self, plan, norm):
        transfer_objs = []
        if norm == "min-vel":
            for param in plan.params.values():
                if param._type in ["Robot", "Can"]:
                    T = plan.horizon
                    K = 2
                    pose = param.pose
                    assert (K, T) == pose.shape
                    KT = K * T
                    v = -1 * np.ones((KT - K, 1))
                    d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                    # [:,0] allows numpy to see v and d as one-dimensional so
                    # that numpy will create a diagonal matrix with v and d as a diagonal
                    P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                    Q = np.dot(np.transpose(P), P)
                    cur_pose = pose.reshape((KT, 1), order="F")
                    A = -2 * cur_pose.T.dot(Q)
                    b = cur_pose.T.dot(Q.dot(cur_pose))
                    # QuadExpr is 0.5*x^Tx + Ax + b
                    quad_expr = QuadExpr(2 * self.transfer_coeff * Q, self.transfer_coeff * A, self.transfer_coeff * b)
                    ll_param = self._param_to_ll[param]
                    ll_grb_vars = ll_param.pose.reshape((KT, 1), order="F")
                    bexpr = BoundExpr(quad_expr, Variable(ll_grb_vars, cur_pose))
                    transfer_objs.append(bexpr)
        else:
            raise NotImplemented
        return transfer_objs

    def _resample(self, plan, preds):
        val, attr_inds = None, None
        for negated, pred, t in preds:
            ## returns a vector of new values and an
            ## attr_inds (OrderedDict) that gives the mapping
            ## to parameter attributes
            val, attr_inds = pred.resample(negated, t, plan)
            ## no resample defined for that pred
            if val is not None:
                break
        if val is None:
            return None
        t_local = t
        bexprs = []
        i = 0
        for p in attr_inds:
            ## get the ll_param for p and gurobi variables
            ll_p = self._param_to_ll[p]
            if p.is_symbol():
                t_local = 0
            n_vals = 0
            grb_vars = []
            for attr, ind_arr in attr_inds[p]:
                n_vals += len(ind_arr)
                grb_vars.extend(list(getattr(ll_p, attr)[ind_arr, t_local]))

            for j, grb_var in enumerate(grb_vars):
                ## create an objective saying stay close to this value
                ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2
                Q = np.eye(1)
                A = -2 * val[i + j] * np.ones((1, 1))
                b = np.ones((1, 1)) * np.power(val[i + j], 2)
                # QuadExpr is 0.5*x^Tx + Ax + b
                quad_expr = QuadExpr(2 * Q * self.rs_coeff, A * self.rs_coeff, b * self.rs_coeff)
                v_arr = np.array([grb_var]).reshape((1, 1), order="F")
                init_val = np.ones((1, 1)) * val[i + j]
                bexpr = BoundExpr(quad_expr, Variable(v_arr, val[i + j].reshape((1, 1))))

                bexprs.append(bexpr)

            i += n_vals
        return bexprs

    def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False):
        ## for debugging
        ignore_preds = []
        priority = np.maximum(priority, 0)
        if not pred_dict["hl_info"] == "hl_state":
            start, end = pred_dict["active_timesteps"]
            active_range = range(start, end + 1)
            if verbose:
                print "pred being added: ", pred_dict
                print active_range, effective_timesteps
            negated = pred_dict["negated"]
            pred = pred_dict["pred"]

            if pred.get_type() in ignore_preds:
                return

            if pred.priority > priority:
                return
            assert isinstance(pred, common_predicates.ExprPredicate)
            expr = pred.get_expr(negated)

            for t in effective_timesteps:
                if t in active_range:
                    if expr is not None:
                        if add_nonlin or isinstance(expr.expr, AffExpr):
                            if verbose:
                                print "expr being added at time ", t
                            var = self._spawn_sco_var_for_pred(pred, t)
                            bexpr = BoundExpr(expr, var)
                            # TODO: REMOVE line below, for tracing back predicate for debugging.
                            bexpr.pred = pred
                            self._bexpr_to_pred[bexpr] = (negated, pred, t)
                            groups = ["all"]
                            if self.early_converge:
                                ## this will check for convergence per parameter
                                ## this is good if e.g., a single trajectory quickly
                                ## gets stuck
                                groups.extend([param.name for param in pred.params])
                            self._prob.add_cnt_expr(bexpr, groups)

    def _add_first_and_last_timesteps_of_actions(
        self, plan, priority=MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False
    ):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            ## only add an action
            if action_start >= active_ts[1]:
                continue
            if action_end < active_ts[0]:
                continue
            for pred_dict in action.preds:
                if action_start >= active_ts[0]:
                    self._add_pred_dict(
                        pred_dict, [action_start], priority=priority, add_nonlin=add_nonlin, verbose=verbose
                    )
                if action_end <= active_ts[1]:
                    self._add_pred_dict(
                        pred_dict, [action_end], priority=priority, add_nonlin=add_nonlin, verbose=verbose
                    )
            ## add all of the linear ineqs
            timesteps = range(max(action_start + 1, active_ts[0]), min(action_end, active_ts[1]))
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose)

    def _add_all_timesteps_of_actions(
        self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False
    ):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            if action_start > active_ts[1]:
                continue
            if action_end < active_ts[0]:
                continue

            timesteps = range(max(action_start, active_ts[0]), min(action_end, active_ts[1]) + 1)
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose)

    def _update_ll_params(self):
        for ll_param in self._param_to_ll.values():
            ll_param.update_param()
        if self.child_solver:
            self.child_solver._update_ll_params()

    def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        horizon = active_ts[1] - active_ts[0] + 1
        self._param_to_ll = {}
        self.ll_start = active_ts[0]
        for param in plan.params.values():
            ll_param = LLParam(model, param, horizon, active_ts)
            ll_param.create_grb_vars()
            self._param_to_ll[param] = ll_param
        model.update()
        for ll_param in self._param_to_ll.values():
            ll_param.batch_add_cnts()

    def _add_obj_bexprs(self, obj_bexprs):
        for bexpr in obj_bexprs:
            self._prob.add_obj_expr(bexpr)

    def _get_trajopt_obj(self, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        start, end = active_ts
        traj_objs = []
        for param in plan.params.values():
            if param not in self._param_to_ll:
                continue
            if param._type in ["Robot", "Can"]:
                for attr_name in param.__dict__.iterkeys():
                    attr_type = param.get_attr_type(attr_name)
                    if issubclass(attr_type, Vector):
                        T = end - start + 1
                        K = attr_type.dim
                        attr_val = getattr(param, attr_name)
                        KT = K * T
                        v = -1 * np.ones((KT - K, 1))
                        d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                        # [:,0] allows numpy to see v and d as one-dimensional so
                        # that numpy will create a diagonal matrix with v and d as a diagonal
                        P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                        Q = np.dot(np.transpose(P), P)

                        quad_expr = QuadExpr(Q, np.zeros((1, KT)), np.zeros((1, 1)))
                        robot_ll = self._param_to_ll[param]
                        ll_attr_val = getattr(robot_ll, attr_name)
                        robot_ll_grb_vars = ll_attr_val.reshape((KT, 1), order="F")
                        # init_val = attr_val[:, start:end+1].reshape((KT, 1), order='F')
                        # bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars, init_val))
                        bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars))
                        traj_objs.append(bexpr)
        return traj_objs

    def _spawn_sco_var_for_pred(self, pred, t):

        i = 0
        x = np.empty(pred.x_dim, dtype=object)
        v = np.empty(pred.x_dim)
        for p in pred.attr_inds:
            for attr, ind_arr in pred.attr_inds[p]:
                n_vals = len(ind_arr)
                ll_p = self._param_to_ll[p]
                if p.is_symbol():
                    x[i : i + n_vals] = getattr(ll_p, attr)[ind_arr, 0]
                    v[i : i + n_vals] = getattr(p, attr)[ind_arr, 0]
                else:
                    x[i : i + n_vals] = getattr(ll_p, attr)[ind_arr, t - self.ll_start]
                    v[i : i + n_vals] = getattr(p, attr)[ind_arr, t]
                i += n_vals
        if pred.dynamic:
            ## include the variables from the next time step
            for p in pred.attr_inds:
                for attr, ind_arr in pred.attr_inds[p]:
                    n_vals = len(ind_arr)
                    ll_p = self._param_to_ll[p]
                    if p.is_symbol():
                        x[i : i + n_vals] = getattr(ll_p, attr)[ind_arr, 0]
                        v[i : i + n_vals] = getattr(p, attr)[ind_arr, 0]
                    else:
                        x[i : i + n_vals] = getattr(ll_p, attr)[ind_arr, t + 1 - self.ll_start]
                        v[i : i + n_vals] = getattr(p, attr)[ind_arr, t]
                    i += n_vals
        assert i >= pred.x_dim
        x = x.reshape((pred.x_dim, 1))
        v = v.reshape((pred.x_dim, 1))
        return Variable(x, v)
Example #9
0
    def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None,
                        verbose=False):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts==None:
            active_ts = (0, plan.horizon-1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -2:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False)
            # self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True)
            tol = 1e-1
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon-1)

            failed_preds = plan.get_failed_preds()
            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = []
            obj_bexprs.extend(self._resample(plan, failed_preds))
            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            # obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel'))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)

            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=-1, add_nonlin=True, verbose=verbose)
            # self._add_all_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False, verbose=verbose)

            self._add_all_timesteps_of_actions(
                plan, priority=1, add_nonlin=True, verbose=verbose)

            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=1, add_nonlin=True, verbose=verbose)
            # self._add_all_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority >= 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose)
            tol=1e-3

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True)
        self._update_ll_params()
        print "priority: {}".format(priority)
        # if callback is not None: callback(True)
        return success
Example #10
0
class NAMOSolver(LLSolver):
    def __init__(self, early_converge=True, transfer_norm='min-vel'):
        self.transfer_coeff = 1e1
        self.rs_coeff = 1e6
        self.init_penalty_coeff = 1e2
        self.child_solver = None
        self._param_to_ll = {}
        self._failed_groups = []

        self.early_converge = early_converge
        self.transfer_norm = transfer_norm

    def backtrack_solve(self, plan, callback=None, verbose=False):
        plan.save_free_attrs()
        success = self._backtrack_solve(plan,
                                        callback,
                                        anum=0,
                                        verbose=verbose)
        plan.restore_free_attrs()
        return success

    def _backtrack_solve(self, plan, callback=None, anum=0, verbose=False):
        if anum > len(plan.actions) - 1:
            return True
        a = plan.actions[anum]
        active_ts = a.active_timesteps
        inits = {}
        if a.name == 'moveto':
            ## find possible values for the final pose
            rs_param = a.params[2]
        elif a.name == 'movetoholding':
            ## find possible values for the final pose
            rs_param = a.params[2]
        elif a.name == 'grasp':
            ## sample the grasp/grasp_pose
            rs_param = a.params[4]
        elif a.name == 'putdown':
            ## sample the end pose
            rs_param = a.params[4]
        else:
            raise NotImplemented

        def recursive_solve():
            ## don't optimize over any params that are already set
            old_params_free = {}
            for p in plan.params.itervalues():
                if p.is_symbol():
                    if p not in a.params: continue
                    old_params_free[p] = p._free_attrs['value'].copy()
                    p._free_attrs['value'][:] = 0
                else:
                    old_params_free[p] = p._free_attrs[
                        'pose'][:, active_ts[1]].copy()
                    p._free_attrs['pose'][:, active_ts[1]] = 0
            self.child_solver = NAMOSolver()
            if self.child_solver._backtrack_solve(plan,
                                                  callback=callback,
                                                  anum=anum + 1,
                                                  verbose=verbose):
                return True
            ## reset free_attrs
            for p in a.params:
                if p.is_symbol():
                    if p not in a.params: continue
                    p._free_attrs['value'] = old_params_free[p]
                else:
                    p._free_attrs['pose'][:, active_ts[1]] = old_params_free[p]
            return False

        if not np.all(rs_param._free_attrs['value']):
            ## this parameter is fixed
            if callback is not None:
                callback_a = lambda: callback(a)
            else:
                callback_a = None
            self.child_solver = NAMOSolver()
            success = self.child_solver.solve(plan,
                                              callback=callback_a,
                                              n_resamples=0,
                                              active_ts=active_ts,
                                              verbose=verbose,
                                              force_init=True)

            if not success:
                ## if planning fails we're done
                return False
            ## no other options, so just return here
            return recursive_solve()

        ## so that this won't be optimized over
        rs_free = rs_param._free_attrs['value'].copy()
        rs_param._free_attrs['value'][:] = 0

        targets = plan.get_param('InContact', 2, {1: rs_param}, negated=False)
        if len(targets) > 1:
            import pdb
            pdb.set_trace()

        if callback is not None:
            callback_a = lambda: callback(a)
        else:
            callback_a = None

        robot_poses = []

        if len(targets) == 0 or np.all(targets[0]._free_attrs['value']):
            ## sample 4 possible poses
            coords = list(itertools.product(range(WIDTH), range(HEIGHT)))
            random.shuffle(coords)
            robot_poses = [np.array(x)[:, None] for x in coords[:4]]
        elif np.any(targets[0]._free_attrs['value']):
            ## there shouldn't be only some free_attrs set
            raise NotImplementedError
        else:
            grasp_dirs = [
                np.array([0, -1]),
                np.array([1, 0]),
                np.array([0, 1]),
                np.array([-1, 0])
            ]
            grasp_len = plan.params['pr2'].geom.radius + targets[
                0].geom.radius - dsafe
            for g_dir in grasp_dirs:
                grasp = (g_dir * grasp_len).reshape((2, 1))
                robot_poses.append(targets[0].value + grasp)

        for rp in robot_poses:
            rs_param.value = rp
            success = False
            self.child_solver = NAMOSolver()
            success = self.child_solver.solve(plan,
                                              callback=callback_a,
                                              n_resamples=0,
                                              active_ts=active_ts,
                                              verbose=verbose,
                                              force_init=True)
            if success:
                if recursive_solve():
                    break
                else:
                    success = False
        rs_param._free_attrs['value'] = rs_free
        return success

    def solve(self,
              plan,
              callback=None,
              n_resamples=5,
              active_ts=None,
              verbose=False,
              force_init=False):
        success = False

        if force_init or not plan.initialized:
            ## solve at priority -1 to get an initial value for the parameters
            self._solve_opt_prob(plan,
                                 priority=-1,
                                 callback=callback,
                                 active_ts=active_ts,
                                 verbose=verbose)
            plan.initialized = True
        success = self._solve_opt_prob(plan,
                                       priority=1,
                                       callback=callback,
                                       active_ts=active_ts,
                                       verbose=verbose)
        success = plan.satisfied(active_ts)
        if success:
            return success

        for _ in range(n_resamples):
            ## refinement loop
            ## priority 0 resamples the first failed predicate in the plan
            ## and then solves a transfer optimization that only includes linear constraints
            self._solve_opt_prob(plan,
                                 priority=0,
                                 callback=callback,
                                 active_ts=active_ts,
                                 verbose=verbose)
            success = self._solve_opt_prob(plan,
                                           priority=1,
                                           callback=callback,
                                           active_ts=active_ts,
                                           verbose=verbose)
            success = plan.satisfied(active_ts)
            if success:
                return success
        return success

    # @profile
    def _solve_opt_prob(self,
                        plan,
                        priority,
                        callback=None,
                        active_ts=None,
                        verbose=False,
                        resample=True):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                                                          priority=-1,
                                                          active_ts=active_ts,
                                                          verbose=verbose)
            tol = 1e-2
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon - 1)

            obj_bexprs = []

            if resample:
                failed_preds = plan.get_failed_preds()
                ## this is an objective that places
                ## a high value on matching the resampled values
                obj_bexprs.extend(self._resample(plan, failed_preds))

            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)
            self._add_first_and_last_timesteps_of_actions(plan,
                                                          priority=-1,
                                                          add_nonlin=True,
                                                          verbose=verbose)
            self._add_all_timesteps_of_actions(plan,
                                               priority=0,
                                               add_nonlin=False,
                                               verbose=verbose)
            tol = 1e-2
        elif priority == 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan,
                                               priority=1,
                                               add_nonlin=True,
                                               active_ts=active_ts,
                                               verbose=verbose)
            tol = 1e-4

        solv = Solver()
        solv.initial_penalty_coeff = self.init_penalty_coeff
        success = solv.solve(self._prob,
                             method='penalty_sqp',
                             tol=tol,
                             verbose=verbose)
        self._update_ll_params()
        self._failed_groups = self._prob.nonconverged_groups
        return success

    def get_value(self, plan, penalty_coeff=1e0):
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan)
        model.update()
        self._bexpr_to_pred = {}

        obj_bexprs = self._get_trajopt_obj(plan)
        self._add_obj_bexprs(obj_bexprs)
        self._add_all_timesteps_of_actions(plan,
                                           priority=1,
                                           add_nonlin=True,
                                           verbose=False)
        return self._prob.get_value(penalty_coeff)

    def _get_transfer_obj(self, plan, norm):
        transfer_objs = []
        if norm in ['min-vel', 'l2']:
            for param in plan.params.values():
                # if param._type in ['Robot', 'Can']:
                K = 2
                if param.is_symbol():
                    T = 1
                    pose = param.value
                else:
                    T = plan.horizon
                    pose = param.pose
                assert (K, T) == pose.shape
                KT = K * T
                if norm == 'min-vel' and not param.is_symbol():
                    v = -1 * np.ones((KT - K, 1))
                    d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                    # [:,0] allows numpy to see v and d as one-dimensional so
                    # that numpy will create a diagonal matrix with v and d as a diagonal
                    P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                    Q = np.dot(np.transpose(P), P)
                else:  ## l2
                    Q = np.eye(KT)
                cur_pose = pose.reshape((KT, 1), order='F')
                A = -2 * cur_pose.T.dot(Q)
                b = cur_pose.T.dot(Q.dot(cur_pose))
                # QuadExpr is 0.5*x^Tx + Ax + b
                quad_expr = QuadExpr(2 * self.transfer_coeff * Q,
                                     self.transfer_coeff * A,
                                     self.transfer_coeff * b)
                ll_param = self._param_to_ll[param]
                if param.is_symbol():
                    ll_grb_vars = ll_param.value.reshape((KT, 1), order='F')
                else:
                    ll_grb_vars = ll_param.pose.reshape((KT, 1), order='F')
                bexpr = BoundExpr(quad_expr, Variable(ll_grb_vars, cur_pose))
                transfer_objs.append(bexpr)
        elif norm == 'straightline':
            return self._get_trajopt_obj(plan)
        else:
            raise NotImplementedError
        return transfer_objs

    def _resample(self, plan, preds):
        val, attr_inds = None, None
        for negated, pred, t in preds:
            ## returns a vector of new values and an
            ## attr_inds (OrderedDict) that gives the mapping
            ## to parameter attributes
            if (self.early_converge and self._failed_groups != ['all']
                    and self._failed_groups != [] and not np.any([
                        param.name in self._failed_groups
                        for param in pred.params
                    ])):
                ## using early converge and one group didn't converge
                ## but no parameter from this pred in that group
                continue

            val, attr_inds = pred.resample(negated, t, plan)
            ## no resample defined for that pred
            if val is not None: break
        if val is None:
            # import pdb; pdb.set_trace()
            return []

        t_local = t
        bexprs = []
        i = 0
        for p in attr_inds:
            ## get the ll_param for p and gurobi variables
            ll_p = self._param_to_ll[p]
            if p.is_symbol(): t_local = 0
            n_vals = 0
            grb_vars = []
            for attr, ind_arr in attr_inds[p]:
                n_vals += len(ind_arr)
                grb_vars.extend(list(getattr(ll_p, attr)[ind_arr, t_local]))

            for j, grb_var in enumerate(grb_vars):
                ## create an objective saying stay close to this value
                ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2
                Q = np.eye(1)
                A = -2 * val[i + j] * np.ones((1, 1))
                b = np.ones((1, 1)) * np.power(val[i + j], 2)
                # QuadExpr is 0.5*x^Tx + Ax + b
                quad_expr = QuadExpr(2 * Q * self.rs_coeff, A * self.rs_coeff,
                                     b * self.rs_coeff)
                v_arr = np.array([grb_var]).reshape((1, 1), order='F')
                init_val = np.ones((1, 1)) * val[i + j]
                bexpr = BoundExpr(quad_expr,
                                  Variable(v_arr, val[i + j].reshape((1, 1))))

                bexprs.append(bexpr)

            i += n_vals
        return bexprs

    def _add_pred_dict(self,
                       pred_dict,
                       effective_timesteps,
                       add_nonlin=True,
                       priority=MAX_PRIORITY,
                       verbose=False):
        # verbose=True
        ## for debugging
        ignore_preds = []
        priority = np.maximum(priority, 0)
        if not pred_dict['hl_info'] == "hl_state":
            start, end = pred_dict['active_timesteps']
            active_range = range(start, end + 1)
            if verbose:
                print "pred being added: ", pred_dict
                print active_range, effective_timesteps
            negated = pred_dict['negated']
            pred = pred_dict['pred']

            if pred.get_type() in ignore_preds:
                return

            if pred.priority > priority: return
            assert isinstance(pred, common_predicates.ExprPredicate)
            expr = pred.get_expr(negated)

            for t in effective_timesteps:
                if t in active_range:
                    if expr is not None:
                        if add_nonlin or isinstance(expr.expr, AffExpr):
                            if verbose:
                                print "expr being added at time ", t
                            var = self._spawn_sco_var_for_pred(pred, t)
                            bexpr = BoundExpr(expr, var)
                            self._bexpr_to_pred[bexpr] = (negated, pred, t)
                            groups = ['all']
                            if self.early_converge:
                                ## this will check for convergence per parameter
                                ## this is good if e.g., a single trajectory quickly
                                ## gets stuck
                                groups.extend(
                                    [param.name for param in pred.params])
                            self._prob.add_cnt_expr(bexpr, groups)

    def _add_first_and_last_timesteps_of_actions(self,
                                                 plan,
                                                 priority=MAX_PRIORITY,
                                                 add_nonlin=False,
                                                 active_ts=None,
                                                 verbose=False):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            ## only add an action
            if action_start >= active_ts[1] and action_start > active_ts[0]:
                continue
            if action_end < active_ts[0]: continue
            for pred_dict in action.preds:
                if action_start >= active_ts[0]:
                    self._add_pred_dict(pred_dict, [action_start],
                                        priority=priority,
                                        add_nonlin=add_nonlin,
                                        verbose=verbose)
                if action_end <= active_ts[1]:
                    self._add_pred_dict(pred_dict, [action_end],
                                        priority=priority,
                                        add_nonlin=add_nonlin,
                                        verbose=verbose)
            ## add all of the linear ineqs
            timesteps = range(max(action_start + 1, active_ts[0]),
                              min(action_end, active_ts[1]))
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict,
                                    timesteps,
                                    add_nonlin=False,
                                    priority=priority,
                                    verbose=verbose)

    def _add_all_timesteps_of_actions(self,
                                      plan,
                                      priority=MAX_PRIORITY,
                                      add_nonlin=True,
                                      active_ts=None,
                                      verbose=False):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            if action_start >= active_ts[1] and action_start > active_ts[0]:
                continue
            if action_end < active_ts[0]: continue

            timesteps = range(max(action_start, active_ts[0]),
                              min(action_end, active_ts[1]) + 1)
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict,
                                    timesteps,
                                    priority=priority,
                                    add_nonlin=add_nonlin,
                                    verbose=verbose)

    def _update_ll_params(self):
        for ll_param in self._param_to_ll.values():
            ll_param.update_param()
        if self.child_solver:
            self.child_solver._update_ll_params()

    def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        horizon = active_ts[1] - active_ts[0] + 1
        self._param_to_ll = {}
        self.ll_start = active_ts[0]
        for param in plan.params.values():
            ll_param = LLParam(model, param, horizon, active_ts)
            ll_param.create_grb_vars()
            self._param_to_ll[param] = ll_param
        model.update()
        for ll_param in self._param_to_ll.values():
            ll_param.batch_add_cnts()

    def _add_obj_bexprs(self, obj_bexprs):
        for bexpr in obj_bexprs:
            self._prob.add_obj_expr(bexpr)

    def _get_trajopt_obj(self, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        start, end = active_ts
        traj_objs = []
        for param in plan.params.values():
            if param not in self._param_to_ll:
                continue
            if param._type in ['Robot', 'Can']:
                T = end - start + 1
                K = 2
                pose = param.pose
                KT = K * T
                v = -1 * np.ones((KT - K, 1))
                d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                # [:,0] allows numpy to see v and d as one-dimensional so
                # that numpy will create a diagonal matrix with v and d as a diagonal
                P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                Q = np.dot(np.transpose(P), P)

                Q *= TRAJOPT_COEFF

                quad_expr = QuadExpr(Q, np.zeros((1, KT)), np.zeros((1, 1)))
                robot_ll = self._param_to_ll[param]
                robot_ll_grb_vars = robot_ll.pose.reshape((KT, 1), order='F')
                init_val = param.pose[:, start:end + 1].reshape((KT, 1),
                                                                order='F')
                bexpr = BoundExpr(quad_expr,
                                  Variable(robot_ll_grb_vars, init_val))
                traj_objs.append(bexpr)
        return traj_objs

    def _spawn_sco_var_for_pred(self, pred, t):

        i = 0
        x = np.empty(pred.x_dim, dtype=object)
        v = np.empty(pred.x_dim)
        for p in pred.attr_inds:
            for attr, ind_arr in pred.attr_inds[p]:
                n_vals = len(ind_arr)
                ll_p = self._param_to_ll[p]
                if p.is_symbol():
                    x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, 0]
                    v[i:i + n_vals] = getattr(p, attr)[ind_arr, 0]
                else:
                    x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr,
                                                          t - self.ll_start]
                    v[i:i + n_vals] = getattr(p, attr)[ind_arr, t]
                i += n_vals
        if pred.dynamic:
            ## include the variables from the next time step
            for p in pred.attr_inds:
                for attr, ind_arr in pred.attr_inds[p]:
                    n_vals = len(ind_arr)
                    ll_p = self._param_to_ll[p]
                    if p.is_symbol():
                        x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, 0]
                        v[i:i + n_vals] = getattr(p, attr)[ind_arr, 0]
                    else:
                        x[i:i + n_vals] = getattr(ll_p,
                                                  attr)[ind_arr,
                                                        t + 1 - self.ll_start]
                        v[i:i + n_vals] = getattr(p, attr)[ind_arr, t + 1]
                    i += n_vals
        assert i >= pred.x_dim
        x = x.reshape((pred.x_dim, 1))
        v = v.reshape((pred.x_dim, 1))
        return Variable(x, v)
Example #11
0
    def _solve_opt_prob(self,
                        plan,
                        priority,
                        callback=None,
                        active_ts=None,
                        verbose=False,
                        resample=True):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                                                          priority=-1,
                                                          active_ts=active_ts,
                                                          verbose=verbose)
            tol = 1e-2
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon - 1)

            obj_bexprs = []

            if resample:
                failed_preds = plan.get_failed_preds()
                ## this is an objective that places
                ## a high value on matching the resampled values
                obj_bexprs.extend(self._resample(plan, failed_preds))

            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)
            self._add_first_and_last_timesteps_of_actions(plan,
                                                          priority=-1,
                                                          add_nonlin=True,
                                                          verbose=verbose)
            self._add_all_timesteps_of_actions(plan,
                                               priority=0,
                                               add_nonlin=False,
                                               verbose=verbose)
            tol = 1e-2
        elif priority == 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan,
                                               priority=1,
                                               add_nonlin=True,
                                               active_ts=active_ts,
                                               verbose=verbose)
            tol = 1e-4

        solv = Solver()
        solv.initial_penalty_coeff = self.init_penalty_coeff
        success = solv.solve(self._prob,
                             method='penalty_sqp',
                             tol=tol,
                             verbose=verbose)
        self._update_ll_params()
        self._failed_groups = self._prob.nonconverged_groups
        return success
Example #12
0
class CanSolver(LLSolver):
    def __init__(self, early_converge=False):
        self.transfer_coeff = 1e1
        self.rs_coeff = 1e10
        self.initial_trust_region_size = 1e-2
        self.init_penalty_coeff = 1e1
        # self.init_penalty_coeff = 1e5
        self.max_merit_coeff_increases = 5
        self._param_to_ll = {}
        self.early_converge = early_converge
        self.child_solver = None
        self.solve_priorities = [2]

    def _solve_helper(self, plan, callback, active_ts, verbose):
        # certain constraints should be solved first
        success = False
        for priority in self.solve_priorities:
            success = self._solve_opt_prob(plan,
                                           priority=priority,
                                           callback=callback,
                                           active_ts=active_ts,
                                           verbose=verbose)
            # if not success:
            #     return success
        # return True
        return success

    def backtrack_solve(self, plan, callback=None, anum=0, verbose=False):
        if anum > len(plan.actions) - 1:
            return True
        a = plan.actions[anum]
        active_ts = a.active_timesteps
        inits = {}
        # Moveto -> Grasp -> Movetoholding -> Putdown
        robot = a.params[0]
        lookup_preds = 'EEReachable'
        if a.name == 'moveto':
            ## moveto: (?robot - Robot ?start - RobotPose ?end - RobotPose)
            ## sample ?end - RobotPose
            rs_param = a.params[2]
        elif a.name == 'movetoholding':
            ## movetoholding: (?robot - Robot ?start - RobotPose ?end - RobotPose ?c - Can)
            ## sample ?end - RobotPose
            rs_param = a.params[2]
        elif a.name == 'grasp':
            ## grasp: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose)
            ## sample ?ee - EEPose
            rs_param = a.params[4]
            lookup_preds = 'InContact'
        elif a.name == 'putdown':
            ## putdown: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose)
            ## sample ?ep - RobotPose
            rs_param = a.params[5]
        else:
            raise NotImplemented

        def recursive_solve():
            ## don't optimize over any params that are already set
            old_params_free = {}
            for p in plan.params.itervalues():
                old_param_map = {}
                if p.is_symbol():
                    if p not in a.params: continue
                    for attr in attr_map[getattr(p, '_type')]:
                        old_param_map[attr] = p._free_attrs[attr].copy()
                        p._free_attrs[attr][:] = 0
                else:
                    for attr in attr_map[getattr(p, '_type')]:
                        old_param_map[attr] = p._free_attrs[
                            attr][:, active_ts[1]].copy()
                        p._free_attrs[attr][:, active_ts[1]] = 0
                old_params_free[p] = old_param_map
            self.child_solver = CanSolver()
            if self.child_solver.backtrack_solve(plan,
                                                 callback=callback,
                                                 anum=anum + 1,
                                                 verbose=verbose):
                return True
            ## reset free_attrs
            for p in a.params:
                if p.is_symbol():
                    if p not in a.params: continue
                    for attr in attr_map[getattr(p, '_type')]:
                        p._free_attrs[attr] = old_params_free[p][attr]
                else:
                    for attr in attr_map[getattr(p, '_type')]:
                        p._free_attrs[attr][:, active_ts[1]] = old_params_free[
                            p][attr]
            return False

        if rs_param.is_fixed(attr_map[getattr(rs_param, '_type')]):
            ## this parameter is fixed
            if callback is not None:
                callback_a = lambda: callback(a)
            else:
                callback_a = None
            self.child_solver = CanSolver()
            success = self.child_solver.solve(plan,
                                              callback=callback_a,
                                              n_resamples=0,
                                              active_ts=active_ts,
                                              verbose=verbose,
                                              force_init=True)

            if not success:
                ## if planning fails we're done
                return False
            ## no other options, so just return here
            return recursive_solve()

        ## so that this won't be optimized over
        rs_free = {}

        for attr in attr_map[getattr(rs_param, '_type')]:
            rs_free[attr] = rs_param._free_attrs[attr].copy()
            rs_param._free_attrs[attr][:] = 0

        if callback is not None:
            callback_a = lambda: callback(a)
        else:
            callback_a = None

        ######################################################################
        # Want robot_poses to be a list of dict, mapping from attr to value
        robot_poses = []

        if a.name == 'moveto':  #rs_param is ?end - RobotPose
            ee_poses = plan.get_param(lookup_preds, 2, {1: rs_param})
            # get a collision free base_pose, with all other pose stay the same
        elif a.name == 'movetoholding':  #rs_param is ?end - RobotPose
            ee_poses = plan.get_param(lookup_preds, 2, {1: rs_param})
            targets = plan.get_param('InContact', 2, {1: ee_poses})
            # sample ee_pose associated to this target and sample robot_pose for it
        elif a.name == 'grasp':  #rs_param is ?ee - EEPose, robot_pose
            ee_poses = [rs_param]
            targets = plan.get_param(lookup_preds, 2, {1: rs_param})
            # sample ee_pose associated to this target and sample robot_pose for it
        elif a.name == 'putdown':
            ee_poses = plan.get_param(lookup_preds, 2, {1: rs_param})
            targets = []
            # get a collision free base_pose, with all other pose stay the same
        else:
            raise NotImplemented
        # import ipdb; ipdb.set_trace()

        if len(ee_poses) > 1:
            import pdb
            pdb.set_trace()

        if len(ee_poses) == 0 or np.all([
                ee_poses[0]._free_attrs[attr]
                for attr in attr_map[ee_poses[0]._type]
        ]):
            # Currently just sample reasonable robot end poses for the moveto action
            t = active_ts[0]
            ee_pose = ee_poses[0]
            targets = plan.get_param('InContact', 2, {1: ee_pose})
            if len(targets) == 0 or len(targets) > 1:
                import ipdb
                ipdb.set_trace()
            sampled_ee_poses = sampling.get_ee_from_target(
                targets[0].value, targets[0].rotation)
            cur_robot_pose = {
                'backHeight': robot.backHeight[:, t],
                'lArmPose': robot.lArmPose[:, t],
                'lGripper': robot.lGripper[:, t],
                'rArmPose': robot.rArmPose[:, t],
                'rGripper': robot.rGripper[:, t]
            }
            for ee_pose in sampled_ee_poses:
                possible_base = sampling.get_base_poses_around_pos(
                    t, robot, ee_pose[0], BASE_SAMPLE_SIZE, 0.6)
                robot_base = sampling.closest_base_poses(
                    possible_base, robot.pose[:, t])
                robot_pose = cur_robot_pose.copy()
                robot_pose['value'] = robot_base
                robot_poses.append(robot_pose)

        elif np.any([
                ee_poses[0]._free_attrs[attr]
                for attr in attr_map[ee_poses[0]._type]
        ]):
            ## there shouldn't be only some free_attrs set
            raise NotImplementedError
        else:
            ee_pose = ee_poses[0]
            targets = plan.get_param('InContact', 2, {1: ee_pose})
            possible_ee_poses = sampling.get_ee_from_target(
                targets[0].value, targets[0].rotation)
            for ee_pos, ee_rot in possible_ee_poses:
                robot_poses.append({'value': ee_pos, 'rotation': ee_rot})
            # import ipdb; ipdb.set_trace()

        ##########################################################################

        for rp in robot_poses:
            for attr in attr_map[rs_param._type]:
                dim = len(rp[attr])
                setattr(rs_param, attr, rp[attr].reshape((dim, 1)))
            success = False
            self.child_solver = CanSolver()
            success = self.child_solver.solve(plan,
                                              callback=callback_a,
                                              n_resamples=0,
                                              active_ts=active_ts,
                                              verbose=verbose,
                                              force_init=True)
            if success:
                if recursive_solve():
                    break
                else:
                    success = False

        for attr in attr_map[getattr(rs_param, '_type')]:
            rs_param._free_attrs[attr] = rs_free[attr]

        return success

    def sample_rp_from_ee(t, plan, ee_poses, robot):
        target_pose = target.value[:, 0]
        dummy_body = OpenRAVEBody(plan.env, robot.name + '_dummy', robot.geom)
        possible_rps = []
        for ee_pose in ee_poses:
            base_pose = sampling.get_col_free_base_pose_around_target(
                t, plan, ee_pose.value, robot).reshape((1, 3))
            ik_arm_poses = dummy_body.ik_arm_pose(ee_pose[0], ee_pose[1])
            cur_arm_poses = np.c_[robot.backHeight, robot.rArmPose].reshape(
                (8, ))
            chosen_pose = sampling.closest_arm_pose(ik_arm_poses,
                                                    cur_arm_poses)
            torso_pose, arm_pose = chosen_pose[0], chosen_pose[1:]
            possible_rp = np.hstack(
                (base_pose, torso_pose, robot.lArmPose[:, t],
                 robot.lGripper[:, t], rArmPose, robot.rGripper[:, t]))
            possible_rps.append(possible_rp)

        dummy_body.delete()
        if len(possible_rps) <= 0:
            print "something went wrong"
            # import ipdb; ipdb.set_trace()

        return possible_rps

    def solve(self,
              plan,
              callback=None,
              n_resamples=5,
              active_ts=None,
              verbose=False,
              force_init=False):
        success = False

        if force_init or not plan.initialized:
            ## solve at priority -1 to get an initial value for the parameters
            self._solve_opt_prob(plan,
                                 priority=-2,
                                 callback=callback,
                                 active_ts=active_ts,
                                 verbose=verbose)
            self._solve_opt_prob(plan,
                                 priority=-1,
                                 callback=callback,
                                 active_ts=active_ts,
                                 verbose=verbose)
            plan.initialized = True

        success = self._solve_helper(plan,
                                     callback=callback,
                                     active_ts=active_ts,
                                     verbose=verbose)
        # fp = plan.get_failed_preds()
        # if len(fp) == 0:
        #     return True
        if success:
            return success

        for _ in range(n_resamples):
            ## refinement loop
            ## priority 0 resamples the first failed predicate in the plan
            ## and then solves a transfer optimization that only includes linear constraints
            self._solve_opt_prob(plan,
                                 priority=0,
                                 callback=callback,
                                 active_ts=active_ts,
                                 verbose=verbose)

            # self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose)
            success = self._solve_opt_prob(plan,
                                           priority=2,
                                           callback=callback,
                                           active_ts=active_ts,
                                           verbose=verbose)
            # fp = plan.get_failed_preds()
            # if len(fp) == 0:
            #     return True
            if success:
                return success
        return success

    def _solve_opt_prob(self,
                        plan,
                        priority,
                        callback=None,
                        init=True,
                        active_ts=None,
                        verbose=False):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -2:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(
                plan,
                priority=MAX_PRIORITY,
                active_ts=active_ts,
                verbose=verbose,
                add_nonlin=False)
            # self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(
                plan,
                priority=MAX_PRIORITY,
                active_ts=active_ts,
                verbose=verbose,
                add_nonlin=True)
            tol = 1e-1
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon - 1)

            failed_preds = plan.get_failed_preds()
            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = []
            obj_bexprs.extend(self._resample(plan, failed_preds))
            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            # obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel'))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)

            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=-1, add_nonlin=True, verbose=verbose)
            # self._add_all_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False, verbose=verbose)

            self._add_all_timesteps_of_actions(plan,
                                               priority=1,
                                               add_nonlin=True,
                                               verbose=verbose)

            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=1, add_nonlin=True, verbose=verbose)
            # self._add_all_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority >= 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan,
                                               priority=priority,
                                               add_nonlin=True,
                                               active_ts=active_ts,
                                               verbose=verbose)
            tol = 1e-3

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob,
                             method='penalty_sqp',
                             tol=tol,
                             verbose=True)
        self._update_ll_params()
        print "priority: {}".format(priority)
        # if callback is not None: callback(True)
        return success

    def _get_transfer_obj(self, plan, norm):
        transfer_objs = []
        if norm == 'min-vel':
            for param in plan.params.values():
                # if param._type in ['Robot', 'Can', 'EEPose']:
                for attr_name in param.__dict__.iterkeys():
                    attr_type = param.get_attr_type(attr_name)
                    if issubclass(attr_type, Vector):
                        if param.is_symbol():
                            T = 1
                        else:
                            T = plan.horizon
                        K = attr_type.dim
                        attr_val = getattr(param, attr_name)

                        # pose = param.pose
                        assert (K, T) == attr_val.shape
                        KT = K * T
                        v = -1 * np.ones((KT - K, 1))
                        d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                        # [:,0] allows numpy to see v and d as one-dimensional so
                        # that numpy will create a diagonal matrix with v and d as a diagonal
                        P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                        Q = np.dot(np.transpose(P), P)
                        cur_val = attr_val.reshape((KT, 1), order='F')
                        A = -2 * cur_val.T.dot(Q)
                        b = cur_val.T.dot(Q.dot(cur_val))
                        # QuadExpr is 0.5*x^Tx + Ax + b
                        quad_expr = QuadExpr(2 * self.transfer_coeff * Q,
                                             self.transfer_coeff * A,
                                             self.transfer_coeff * b)
                        param_ll = self._param_to_ll[param]
                        ll_attr_val = getattr(param_ll, attr_name)
                        param_ll_grb_vars = ll_attr_val.reshape((KT, 1),
                                                                order='F')
                        bexpr = BoundExpr(quad_expr,
                                          Variable(param_ll_grb_vars, cur_val))
                        transfer_objs.append(bexpr)
        else:
            raise NotImplemented
        return transfer_objs

    def _resample(self, plan, preds):
        val, attr_inds = None, None
        for negated, pred, t in preds:
            ## returns a vector of new values and an
            ## attr_inds (OrderedDict) that gives the mapping
            ## to parameter attributes
            val, attr_inds = pred.resample(negated, t, plan)
            ## no resample defined for that pred
            if val is not None: break
        if val is None:
            return []

        bexprs = []
        i = 0
        for p in attr_inds:
            ## get the ll_param for p and gurobi variables
            ll_p = self._param_to_ll[p]
            n_vals = 0
            grb_vars = []
            for attr, ind_arr, t in attr_inds[p]:
                n_vals += len(ind_arr)
                grb_vars.extend(list(
                    getattr(ll_p, attr)[ind_arr, t].flatten()))

            for j, grb_var in enumerate(grb_vars):
                ## create an objective saying stay close to this value
                ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2
                Q = np.eye(1)
                A = -2 * val[i + j] * np.ones((1, 1))
                b = np.ones((1, 1)) * np.power(val[i + j], 2)
                # QuadExpr is 0.5*x^Tx + Ax + b
                quad_expr = QuadExpr(2 * Q * self.rs_coeff, A * self.rs_coeff,
                                     b * self.rs_coeff)
                v_arr = np.array([grb_var]).reshape((1, 1), order='F')
                init_val = np.ones((1, 1)) * val[i + j]
                bexpr = BoundExpr(quad_expr,
                                  Variable(v_arr, val[i + j].reshape((1, 1))))

                bexprs.append(bexpr)

            i += n_vals
        return bexprs

    def _add_pred_dict(self,
                       pred_dict,
                       effective_timesteps,
                       add_nonlin=True,
                       priority=MAX_PRIORITY,
                       verbose=False):
        ## for debugging
        ignore_preds = []
        priority = np.maximum(priority, 0)
        if not pred_dict['hl_info'] == "hl_state":
            start, end = pred_dict['active_timesteps']
            active_range = range(start, end + 1)
            if verbose:
                print "pred being added: ", pred_dict
                print active_range, effective_timesteps
            negated = pred_dict['negated']
            pred = pred_dict['pred']

            if pred.get_type() in ignore_preds:
                return

            if pred.priority > priority: return
            assert isinstance(pred, common_predicates.ExprPredicate)
            expr = pred.get_expr(negated)

            for t in effective_timesteps:
                if t in active_range:
                    if expr is not None:
                        if add_nonlin or isinstance(expr.expr, AffExpr):
                            if verbose:
                                print "expr being added at time ", t
                            var = self._spawn_sco_var_for_pred(pred, t)
                            bexpr = BoundExpr(expr, var)
                            # TODO: REMOVE line below, for tracing back predicate for debugging.
                            bexpr.pred = pred
                            self._bexpr_to_pred[bexpr] = (negated, pred, t)
                            groups = ['all']
                            if self.early_converge:
                                ## this will check for convergence per parameter
                                ## this is good if e.g., a single trajectory quickly
                                ## gets stuck
                                groups.extend(
                                    [param.name for param in pred.params])
                            self._prob.add_cnt_expr(bexpr, groups)

    def _add_first_and_last_timesteps_of_actions(self,
                                                 plan,
                                                 priority=MAX_PRIORITY,
                                                 add_nonlin=False,
                                                 active_ts=None,
                                                 verbose=False):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            ## only add an action
            if action_start >= active_ts[1] and action_start > active_ts[0]:
                continue
            if action_end < active_ts[0]: continue
            for pred_dict in action.preds:
                if action_start >= active_ts[0]:
                    self._add_pred_dict(pred_dict, [action_start],
                                        priority=priority,
                                        add_nonlin=add_nonlin,
                                        verbose=verbose)
                if action_end <= active_ts[1]:
                    self._add_pred_dict(pred_dict, [action_end],
                                        priority=priority,
                                        add_nonlin=add_nonlin,
                                        verbose=verbose)
            ## add all of the linear ineqs
            timesteps = range(max(action_start + 1, active_ts[0]),
                              min(action_end, active_ts[1]))
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict,
                                    timesteps,
                                    add_nonlin=False,
                                    priority=priority,
                                    verbose=verbose)

    def _add_all_timesteps_of_actions(self,
                                      plan,
                                      priority=MAX_PRIORITY,
                                      add_nonlin=True,
                                      active_ts=None,
                                      verbose=False):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            if action_start >= active_ts[1] and action_start > active_ts[0]:
                continue
            if action_end < active_ts[0]: continue

            timesteps = range(max(action_start, active_ts[0]),
                              min(action_end, active_ts[1]) + 1)
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict,
                                    timesteps,
                                    priority=priority,
                                    add_nonlin=add_nonlin,
                                    verbose=verbose)

    def _update_ll_params(self):
        for ll_param in self._param_to_ll.values():
            ll_param.update_param()
        if self.child_solver:
            self.child_solver._update_ll_params()

    def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        horizon = active_ts[1] - active_ts[0] + 1
        self._param_to_ll = {}
        self.ll_start = active_ts[0]
        for param in plan.params.values():
            ll_param = LLParam(model, param, horizon, active_ts)
            ll_param.create_grb_vars()
            self._param_to_ll[param] = ll_param
        model.update()
        for ll_param in self._param_to_ll.values():
            ll_param.batch_add_cnts()

    def _add_obj_bexprs(self, obj_bexprs):
        for bexpr in obj_bexprs:
            self._prob.add_obj_expr(bexpr)

    def _get_trajopt_obj(self, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        start, end = active_ts
        traj_objs = []
        for param in plan.params.values():
            if param not in self._param_to_ll:
                continue
            if param._type in ['Robot', 'Can']:
                for attr_name in param.__dict__.iterkeys():
                    attr_type = param.get_attr_type(attr_name)
                    if issubclass(attr_type, Vector):
                        T = end - start + 1
                        K = attr_type.dim
                        attr_val = getattr(param, attr_name)
                        KT = K * T
                        v = -1 * np.ones((KT - K, 1))
                        d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                        # [:,0] allows numpy to see v and d as one-dimensional so
                        # that numpy will create a diagonal matrix with v and d as a diagonal
                        P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                        Q = np.dot(np.transpose(P), P)
                        Q *= TRAJOPT_COEFF

                        quad_expr = None
                        if attr_name == 'pose' and param._type == 'Robot':
                            quad_expr = QuadExpr(BASE_MOVE_COEFF * Q,
                                                 np.zeros((1, KT)),
                                                 np.zeros((1, 1)))
                        else:
                            quad_expr = QuadExpr(Q, np.zeros((1, KT)),
                                                 np.zeros((1, 1)))
                        param_ll = self._param_to_ll[param]
                        ll_attr_val = getattr(param_ll, attr_name)
                        param_ll_grb_vars = ll_attr_val.reshape((KT, 1),
                                                                order='F')
                        attr_val = getattr(param, attr_name)
                        init_val = attr_val[:, start:end + 1].reshape(
                            (KT, 1), order='F')
                        bexpr = BoundExpr(
                            quad_expr, Variable(param_ll_grb_vars, init_val))
                        # bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars))
                        traj_objs.append(bexpr)
        return traj_objs
Example #13
0
    def _solve_opt_prob(self,
                        plan,
                        priority,
                        callback=None,
                        init=True,
                        active_ts=None,
                        verbose=False):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -2:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(
                plan,
                priority=MAX_PRIORITY,
                active_ts=active_ts,
                verbose=verbose,
                add_nonlin=False)
            # self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(
                plan,
                priority=MAX_PRIORITY,
                active_ts=active_ts,
                verbose=verbose,
                add_nonlin=True)
            tol = 1e-1
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon - 1)

            failed_preds = plan.get_failed_preds()
            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = []
            obj_bexprs.extend(self._resample(plan, failed_preds))
            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            # obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel'))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)

            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=-1, add_nonlin=True, verbose=verbose)
            # self._add_all_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False, verbose=verbose)

            self._add_all_timesteps_of_actions(plan,
                                               priority=1,
                                               add_nonlin=True,
                                               verbose=verbose)

            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=1, add_nonlin=True, verbose=verbose)
            # self._add_all_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority >= 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan,
                                               priority=priority,
                                               add_nonlin=True,
                                               active_ts=active_ts,
                                               verbose=verbose)
            tol = 1e-3

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob,
                             method='penalty_sqp',
                             tol=tol,
                             verbose=True)
        self._update_ll_params()
        print "priority: {}".format(priority)
        # if callback is not None: callback(True)
        return success
Example #14
0
class CanSolver(LLSolver):
    def __init__(self, early_converge=False):
        self.transfer_coeff = 1e1
        self.rs_coeff = 1e6
        self.initial_trust_region_size = 1e-1
        self.init_penalty_coeff = 1e1
        # self.init_penalty_coeff = 1e5
        self.max_merit_coeff_increases = 5
        self._param_to_ll = {}
        self.early_converge = early_converge
        self.child_solver = None

    def backtrack_solve(self, plan, callback=None, anum=0, verbose=False):
        if anum > len(plan.actions) - 1:
            return True
        a = plan.actions[anum]
        active_ts = a.active_timesteps
        inits = {}
        # Moveto -> Grasp -> Movetoholding -> Putdown
        if a.name == 'moveto':
            ## moveto: (?robot - Robot ?start - RobotPose ?end - RobotPose)
            ## find possible values for the final pose
            rs_param = a.params[2]
        elif a.name == 'movetoholding':
            ## movetoholding: (?robot - Robot ?start - RobotPose ?end - RobotPose ?c - Can)
            ## find possible values for the final pose
            rs_param = a.params[3]
        elif a.name == 'grasp':
            ## grasp: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose)
            ## sample the ee_pose
            rs_param = a.params[5]
        elif a.name == 'putdown':
            ## putdown: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose)
            rs_param = a.params[4]
        else:
            raise NotImplemented

        def recursive_solve():
            ## don't optimize over any params that are already set
            old_params_free = {}
            for p in plan.params.itervalues():
                old_param_map = {}
                if p.is_symbol():
                    if p not in a.params: continue
                    for attr in attr_map[getattr(p, '_type')]:
                        old_param_map[attr] = p._free_attrs[attr].copy()
                        p._free_attrs[attr][:] = 0
                else:
                    for attr in attr_map[getattr(p, '_type')]:
                        old_param_map[attr] = p._free_attrs[
                            attr][:, active_ts[1]].copy()
                        p._free_attrs[attr][:, active_ts[1]] = 0
                old_params_free[p] = old_param_map
            self.child_solver = CanSolver()
            if self.child_solver.backtrack_solve(plan,
                                                 callback=callback,
                                                 anum=anum + 1,
                                                 verbose=verbose):
                return True
            ## reset free_attrs
            for p in a.params:
                if p.is_symbol():
                    if p not in a.params: continue
                    for attr in attr_map[getattr(p, '_type')]:
                        p._free_attrs[attr] = old_params_free[p][attr]
                else:
                    for attr in attr_map[getattr(p, '_type')]:
                        p._free_attrs[attr][:, active_ts[1]] = old_params_free[
                            p][attr]
            return False

        if rs_param.is_fixed(attr_map[getattr(rs_param, '_type')]):
            ## this parameter is fixed
            if callback is not None:
                callback_a = lambda: callback(a)
            else:
                callback_a = None
            self.child_solver = CanSolver()
            success = self.child_solver.solve(plan,
                                              callback=callback_a,
                                              n_resamples=0,
                                              active_ts=active_ts,
                                              verbose=verbose,
                                              force_init=True)

            if not success:
                ## if planning fails we're done
                return False
            ## no other options, so just return here
            return recursive_solve()

        ## so that this won't be optimized over
        rs_free = {}
        for attr in attr_map[getattr(rs_param, '_type')]:
            rs_free[attr] = rs_param._free_attrs[attr].copy()
            rs_param._free_attrs[attr][:] = 0

        # Target is needed to sample the ee_pose, ee_pose is needed to sample the robot_pose
        # InContact, Robot, EEPose, Target
        targets = plan.get_param('InContact', 2, {1: rs_param}, negated=False)
        # EEReachable, Robot, RobotPose, EEPose
        robot_pose = plan.get_param('EEReachable',
                                    1, {2: rs_param},
                                    negated=False)

        if len(targets) > 1:
            import pdb
            pdb.set_trace()
        if callback is not None:
            callback_a = lambda: callback(a)
        else:
            callback_a = None

        robot_poses = []

        if len(targets) == 0 or np.all(
            [targets[0]._free_attrs[attr] for attr in attr_map[targets[0]]]):
            ## sample 4 possible poses
            coords = list(itertools.product(range(WIDTH), range(HEIGHT)))
            random.shuffle(coords)
            robot_poses = [np.array(x)[:, None] for x in coords[:4]]
        elif np.any(
            [targets[0]._free_attrs[attr] for attr in attr_map[targets[0]]]):
            ## there shouldn't be only some free_attrs set
            raise NotImplementedError
        else:
            grasp_dirs = [
                np.array([0, -1]),
                np.array([1, 0]),
                np.array([0, 1]),
                np.array([-1, 0])
            ]
            grasp_len = plan.params['pr2'].geom.radius + targets[0].geom.radius
            for g_dir in grasp_dirs:
                grasp = (g_dir * grasp_len).reshape((2, 1))
                robot_poses.append(targets[0].value + grasp)

        for rp in robot_poses:
            rs_param.value = rp
            success = False
            self.child_solver = CanSolver()
            success = self.child_solver.solve(plan,
                                              callback=callback_a,
                                              n_resamples=0,
                                              active_ts=active_ts,
                                              verbose=verbose,
                                              force_init=True)
            if success:
                if recursive_solve():
                    break
                else:
                    success = False

        for attr in attr_map[getattr(rs_param, '_type')]:
            rs_param._free_attrs[attr] = rs_free[attr]

        return success

    def sample_ee_from_target(self, target):
        """
            Sample all possible EE Pose that pr2 can grasp with

            target: parameter of type Target
            return: list of tuple in the format of (ee_pos, ee_rot)
        """
        possible_ee_poses = []
        targ_pos, targ_rot = target.value.flatten(), target.rotation.flatten()
        ee_pos = targ_pos.copy()
        target_trans = OpenRAVEBody.transform_from_obj_pose(targ_pos, targ_rot)
        # rotate can's local z-axis by the amount of linear spacing between 0 to 2pi
        angle_range = np.linspace(0, np.pi * 2, num=SAMPLE_SIZE)
        for rot in angle_range:
            target_trans = OpenRAVEBody.transform_from_obj_pose(
                targ_pos, targ_rot)
            # rotate new ee_pose around can's rotation axis
            rot_mat = matrixFromAxisAngle([0, 0, rot])
            ee_trans = target_trans.dot(rot_mat)
            ee_rot = OpenRAVEBody.obj_pose_from_transform(ee_trans)[3:]
            possible_ee_poses.append((ee_pos, ee_rot))
        return possible_ee_poses

    def sample_rp_from_ee(self, env, rs_param, ee_pose):
        """
            Using Inverse Kinematics to solve all possible robot armpose and basepose that reaches this ee_pose

            robot_pose: list of full robot pose with format of(basepose, backHeight, lArmPose, lGripper, rArmPose, rGripper)
        """
        ee_pos, ee_rot = ee_pose[0], ee_pose[1]
        base_raidus = np.linspace(-1, 1, num=BASE_SAMPLE_SIZE)
        poses = list(itertools.product(base_raidus, base_raidus))
        # sample
        dummy_body = OpenRAVEBody(env, rs_param.name, PR2())
        possible_robot_pose = []
        for pos in poses:
            bp = pos + ee_pos[:2]
            vec = ee_pos[:2] - bp
            vec = vec / np.linalg.norm(vec)
            theta = math.atan2(vec[1], vec[0])
            bp = np.array([bp[0], bp[1], theta])
            dummy_body.set_pose(bp)
            arm_poses = robot_body.ik_arm_pose(ee_pos, ee_rot)
            for arm_pose in arm_poses:
                possible_robot_pose.append(
                    (bp, ee_arm_poses[0], rs_param.lArmPose, rs_param.lGripper,
                     ee_arm_poses[1:], rs_param.rGripper))

        import ipdb
        ipdb.set_trace()
        dummy_body.delete()
        return possible_robot_pose

    def closest_arm_pose(self, arm_poses, cur_arm_poses):
        pass

    def solve(self,
              plan,
              callback=None,
              n_resamples=5,
              active_ts=None,
              verbose=False,
              force_init=False):
        success = False

        if force_init or not plan.initialized:
            ## solve at priority -1 to get an initial value for the parameters
            self._solve_opt_prob(plan,
                                 priority=-1,
                                 callback=callback,
                                 active_ts=active_ts,
                                 verbose=verbose)
            plan.initialized = True
        success = self._solve_opt_prob(plan,
                                       priority=1,
                                       callback=callback,
                                       active_ts=active_ts,
                                       verbose=verbose)
        if success:
            return success

        for _ in range(n_resamples):
            ## refinement loop
            ## priority 0 resamples the first failed predicate in the plan
            ## and then solves a transfer optimization that only includes linear constraints
            self._solve_opt_prob(plan,
                                 priority=0,
                                 callback=callback,
                                 active_ts=active_ts,
                                 verbose=verbose)
            success = self._solve_opt_prob(plan,
                                           priority=1,
                                           callback=callback,
                                           active_ts=active_ts,
                                           verbose=verbose)
            if success:
                return success
        return success

    def _solve_opt_prob(self,
                        plan,
                        priority,
                        callback=None,
                        init=True,
                        init_from_prev=False,
                        active_ts=None,
                        verbose=False):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                                                          priority=-1,
                                                          active_ts=active_ts,
                                                          verbose=verbose)
            tol = 1e-1
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon - 1)

            failed_preds = plan.get_failed_preds()
            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = self._resample(plan, failed_preds)
            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel'))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)
            self._add_first_and_last_timesteps_of_actions(plan,
                                                          priority=-1,
                                                          add_nonlin=True,
                                                          verbose=verbose)
            self._add_all_timesteps_of_actions(plan,
                                               priority=0,
                                               add_nonlin=False,
                                               verbose=verbose)
            tol = 1e-1
        elif priority == 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan,
                                               priority=1,
                                               add_nonlin=True,
                                               active_ts=active_ts,
                                               verbose=verbose)
            tol = 1e-4

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob,
                             method='penalty_sqp',
                             tol=tol,
                             verbose=True)
        import ipdb
        ipdb.set_trace()
        self._update_ll_params()
        return success

    def _get_transfer_obj(self, plan, norm):
        transfer_objs = []
        if norm == 'min-vel':
            for param in plan.params.values():
                if param._type in ['Robot', 'Can']:
                    T = plan.horizon
                    K = 2
                    pose = param.pose
                    assert (K, T) == pose.shape
                    KT = K * T
                    v = -1 * np.ones((KT - K, 1))
                    d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                    # [:,0] allows numpy to see v and d as one-dimensional so
                    # that numpy will create a diagonal matrix with v and d as a diagonal
                    P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                    Q = np.dot(np.transpose(P), P)
                    cur_pose = pose.reshape((KT, 1), order='F')
                    A = -2 * cur_pose.T.dot(Q)
                    b = cur_pose.T.dot(Q.dot(cur_pose))
                    # QuadExpr is 0.5*x^Tx + Ax + b
                    quad_expr = QuadExpr(2 * self.transfer_coeff * Q,
                                         self.transfer_coeff * A,
                                         self.transfer_coeff * b)
                    ll_param = self._param_to_ll[param]
                    ll_grb_vars = ll_param.pose.reshape((KT, 1), order='F')
                    bexpr = BoundExpr(quad_expr,
                                      Variable(ll_grb_vars, cur_pose))
                    transfer_objs.append(bexpr)
        else:
            raise NotImplemented
        return transfer_objs

    def _resample(self, plan, preds):
        val, attr_inds = None, None
        for negated, pred, t in preds:
            ## returns a vector of new values and an
            ## attr_inds (OrderedDict) that gives the mapping
            ## to parameter attributes
            val, attr_inds = pred.resample(negated, t, plan)
            ## no resample defined for that pred
            if val is not None: break
        if val is None:
            return None
        t_local = t
        bexprs = []
        i = 0
        for p in attr_inds:
            ## get the ll_param for p and gurobi variables
            ll_p = self._param_to_ll[p]
            if p.is_symbol(): t_local = 0
            n_vals = 0
            grb_vars = []
            for attr, ind_arr in attr_inds[p]:
                n_vals += len(ind_arr)
                grb_vars.extend(list(getattr(ll_p, attr)[ind_arr, t_local]))

            for j, grb_var in enumerate(grb_vars):
                ## create an objective saying stay close to this value
                ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2
                Q = np.eye(1)
                A = -2 * val[i + j] * np.ones((1, 1))
                b = np.ones((1, 1)) * np.power(val[i + j], 2)
                # QuadExpr is 0.5*x^Tx + Ax + b
                quad_expr = QuadExpr(2 * Q * self.rs_coeff, A * self.rs_coeff,
                                     b * self.rs_coeff)
                v_arr = np.array([grb_var]).reshape((1, 1), order='F')
                init_val = np.ones((1, 1)) * val[i + j]
                bexpr = BoundExpr(quad_expr,
                                  Variable(v_arr, val[i + j].reshape((1, 1))))

                bexprs.append(bexpr)

            i += n_vals
        return bexprs

    def _add_pred_dict(self,
                       pred_dict,
                       effective_timesteps,
                       add_nonlin=True,
                       priority=MAX_PRIORITY,
                       verbose=False):
        ## for debugging
        ignore_preds = []
        priority = np.maximum(priority, 0)
        if not pred_dict['hl_info'] == "hl_state":
            start, end = pred_dict['active_timesteps']
            active_range = range(start, end + 1)
            if verbose:
                print "pred being added: ", pred_dict
                print active_range, effective_timesteps
            negated = pred_dict['negated']
            pred = pred_dict['pred']

            if pred.get_type() in ignore_preds:
                return

            if pred.priority > priority: return
            assert isinstance(pred, common_predicates.ExprPredicate)
            expr = pred.get_expr(negated)

            for t in effective_timesteps:
                if t in active_range:
                    if expr is not None:
                        if add_nonlin or isinstance(expr.expr, AffExpr):
                            if verbose:
                                print "expr being added at time ", t
                            var = self._spawn_sco_var_for_pred(pred, t)
                            bexpr = BoundExpr(expr, var)
                            # TODO: REMOVE line below, for tracing back predicate for debugging.
                            bexpr.pred = pred
                            self._bexpr_to_pred[bexpr] = (negated, pred, t)
                            groups = ['all']
                            if self.early_converge:
                                ## this will check for convergence per parameter
                                ## this is good if e.g., a single trajectory quickly
                                ## gets stuck
                                groups.extend(
                                    [param.name for param in pred.params])
                            self._prob.add_cnt_expr(bexpr, groups)

    def _add_first_and_last_timesteps_of_actions(self,
                                                 plan,
                                                 priority=MAX_PRIORITY,
                                                 add_nonlin=False,
                                                 active_ts=None,
                                                 verbose=False):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            ## only add an action
            if action_start >= active_ts[1]: continue
            if action_end < active_ts[0]: continue
            for pred_dict in action.preds:
                if action_start >= active_ts[0]:
                    self._add_pred_dict(pred_dict, [action_start],
                                        priority=priority,
                                        add_nonlin=add_nonlin,
                                        verbose=verbose)
                if action_end <= active_ts[1]:
                    self._add_pred_dict(pred_dict, [action_end],
                                        priority=priority,
                                        add_nonlin=add_nonlin,
                                        verbose=verbose)
            ## add all of the linear ineqs
            timesteps = range(max(action_start + 1, active_ts[0]),
                              min(action_end, active_ts[1]))
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict,
                                    timesteps,
                                    add_nonlin=False,
                                    priority=priority,
                                    verbose=verbose)

    def _add_all_timesteps_of_actions(self,
                                      plan,
                                      priority=MAX_PRIORITY,
                                      add_nonlin=True,
                                      active_ts=None,
                                      verbose=False):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            if action_start > active_ts[1]: continue
            if action_end < active_ts[0]: continue

            timesteps = range(max(action_start, active_ts[0]),
                              min(action_end, active_ts[1]) + 1)
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict,
                                    timesteps,
                                    priority=priority,
                                    add_nonlin=add_nonlin,
                                    verbose=verbose)

    def _update_ll_params(self):
        for ll_param in self._param_to_ll.values():
            ll_param.update_param()
        if self.child_solver:
            self.child_solver._update_ll_params()

    def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        horizon = active_ts[1] - active_ts[0] + 1
        self._param_to_ll = {}
        self.ll_start = active_ts[0]
        for param in plan.params.values():
            ll_param = LLParam(model, param, horizon, active_ts)
            ll_param.create_grb_vars()
            self._param_to_ll[param] = ll_param
        model.update()
        for ll_param in self._param_to_ll.values():
            ll_param.batch_add_cnts()

    def _add_obj_bexprs(self, obj_bexprs):
        for bexpr in obj_bexprs:
            self._prob.add_obj_expr(bexpr)

    def _get_trajopt_obj(self, plan, active_ts=None):
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)
        start, end = active_ts
        traj_objs = []
        for param in plan.params.values():
            if param not in self._param_to_ll:
                continue
            if param._type in ['Robot', 'Can']:
                for attr_name in param.__dict__.iterkeys():
                    attr_type = param.get_attr_type(attr_name)
                    if issubclass(attr_type, Vector):
                        T = end - start + 1
                        K = attr_type.dim
                        attr_val = getattr(param, attr_name)
                        KT = K * T
                        v = -1 * np.ones((KT - K, 1))
                        d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                        # [:,0] allows numpy to see v and d as one-dimensional so
                        # that numpy will create a diagonal matrix with v and d as a diagonal
                        P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                        Q = np.dot(np.transpose(P), P)

                        quad_expr = QuadExpr(Q, np.zeros((1, KT)),
                                             np.zeros((1, 1)))
                        robot_ll = self._param_to_ll[param]
                        ll_attr_val = getattr(robot_ll, attr_name)
                        robot_ll_grb_vars = ll_attr_val.reshape((KT, 1),
                                                                order='F')
                        # init_val = attr_val[:, start:end+1].reshape((KT, 1), order='F')
                        # bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars, init_val))
                        bexpr = BoundExpr(quad_expr,
                                          Variable(robot_ll_grb_vars))
                        traj_objs.append(bexpr)
        return traj_objs

    def _spawn_sco_var_for_pred(self, pred, t):

        i = 0
        x = np.empty(pred.x_dim, dtype=object)
        v = np.empty(pred.x_dim)
        for p in pred.attr_inds:
            for attr, ind_arr in pred.attr_inds[p]:
                n_vals = len(ind_arr)
                ll_p = self._param_to_ll[p]
                if p.is_symbol():
                    x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, 0]
                    v[i:i + n_vals] = getattr(p, attr)[ind_arr, 0]
                else:
                    x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr,
                                                          t - self.ll_start]
                    v[i:i + n_vals] = getattr(p, attr)[ind_arr, t]
                i += n_vals
        if pred.dynamic:
            ## include the variables from the next time step
            for p in pred.attr_inds:
                for attr, ind_arr in pred.attr_inds[p]:
                    n_vals = len(ind_arr)
                    ll_p = self._param_to_ll[p]
                    if p.is_symbol():
                        x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, 0]
                        v[i:i + n_vals] = getattr(p, attr)[ind_arr, 0]
                    else:
                        x[i:i + n_vals] = getattr(ll_p,
                                                  attr)[ind_arr,
                                                        t + 1 - self.ll_start]
                        v[i:i + n_vals] = getattr(p, attr)[ind_arr, t]
                    i += n_vals
        assert i >= pred.x_dim
        x = x.reshape((pred.x_dim, 1))
        v = v.reshape((pred.x_dim, 1))
        return Variable(x, v)
Example #15
0
    def _solve_opt_prob(self, plan, priority, callback=None, init=True,
                        active_ts=None, verbose=False):
        robot = plan.params['baxter']
        body = plan.env.GetRobot("baxter")
        if callback is not None:
            viewer = callback()
        def draw(t):
            viewer.draw_plan_ts(plan, t)


        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # _free_attrs is paied attentioned in here
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()


        self._bexpr_to_pred = {}
        if priority == -2:
            """
            Initialize an linear trajectory while enforceing the linear constraints in the intermediate step.
            """
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False)
            tol = 1e-1
        elif priority == -1:
            """
            Solve the optimization problem while enforcing every constraints.
            """
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose,
                add_nonlin=True)
            tol = 1e-1
        elif priority == 0:
            """
            When Optimization fails, resample new values for certain timesteps
            of the trajectory and solver as initialization
            """
            ## this should only get called with a full plan for now
            # assert active_ts == (0, plan.horizon-1)
            failed_preds = plan.get_failed_preds()

            # print "{} predicates fails, resampling process begin...\n \
            #        Checking {}".format(len(failed_preds), failed_preds[0])

            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = []
            rs_obj = self._resample(plan, failed_preds)
            obj_bexprs.extend(rs_obj)
            # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory.
            # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=1,
                add_nonlin=True, active_ts= active_ts, verbose=verbose)
            tol = 1e-3

        elif priority >= 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True,
                                               active_ts=active_ts, verbose=verbose)
            tol=1e-3

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True)
        self._update_ll_params()
        print "priority: {}".format(priority)


        if priority == 0:
            # During resampling phases, there must be changes added to sampling_trace
            if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]:
                reward = 0
                if len(plan.get_failed_preds()) == 0:
                    reward = len(plan.actions)
                else:
                    failed_t = plan.get_failed_pred()[2]
                    for i in range(len(plan.actions)):
                        if failed_t > plan.actions[i].active_timesteps[1]:
                            reward += 1
                plan.sampling_trace[-1]['reward'] = reward
        ##Restore free_attrs values
        plan.restore_free_attrs()
        return success
Example #16
0
class RobotLLSolver(LLSolver):
    def __init__(self, early_converge=False, transfer_norm='min-vel'):
        self.transfer_coeff = 1e1
        self.rs_coeff = 1e10
        self.initial_trust_region_size = 1e-2
        self.init_penalty_coeff = 1e1
        # self.init_penalty_coeff = 1e5
        self.max_merit_coeff_increases = 5
        self._param_to_ll = {}
        self.early_converge=early_converge
        self.child_solver = None
        self.solve_priorities = [2]
        self.transfer_norm = transfer_norm


    def _solve_helper(self, plan, callback, active_ts, verbose):
        # certain constraints should be solved first
        success = False
        for priority in self.solve_priorities:
            success = self._solve_opt_prob(plan, priority=priority,
                            callback=callback, active_ts=active_ts,
                            verbose=verbose)
            # if not success:
            #     return success
        # return True
        return success

    def solve(self, plan, callback=None, n_resamples=20, active_ts=None,
              verbose=False, force_init=False):
        success = False
        if force_init or not plan.initialized:
             ## solve at priority -1 to get an initial value for the parameters
            self._solve_opt_prob(plan, priority=-2, callback=callback,
                active_ts=active_ts, verbose=verbose)
            self._solve_opt_prob(plan, priority=-1, callback=callback,
                active_ts=active_ts, verbose=verbose)
            plan.initialized=True
        success = self._solve_helper(plan, callback=callback,
            active_ts=active_ts, verbose=verbose)

        # self.saver = PlanSerializer()
        # self.saver.write_plan_to_hdf5("temp_plan.hdf5", plan)

        if success or len(plan.get_failed_preds()) == 0:
            return True
            # return success

        for _ in range(n_resamples):
            ## refinement loop
            ## priority 0 resamples the first failed predicate in the plan
            ## and then solves a transfer optimization that only includes linear constraints

            self._solve_opt_prob(plan, priority=0, callback=callback,
                                 active_ts=active_ts, verbose=verbose)
            success = self._solve_opt_prob(plan, priority=1,
                            callback=callback, active_ts=active_ts, verbose=verbose)
            if success or len(plan.get_failed_preds()) == 0:
                print "Optimization success after {} resampling.".format(_)
                return _
        return success

    def _solve_opt_prob(self, plan, priority, callback=None, init=True,
                        active_ts=None, verbose=False):
        robot = plan.params['baxter']
        body = plan.env.GetRobot("baxter")
        if callback is not None:
            viewer = callback()
        def draw(t):
            viewer.draw_plan_ts(plan, t)


        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # _free_attrs is paied attentioned in here
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()


        self._bexpr_to_pred = {}
        if priority == -2:
            """
            Initialize an linear trajectory while enforceing the linear constraints in the intermediate step.
            """
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False)
            tol = 1e-1
        elif priority == -1:
            """
            Solve the optimization problem while enforcing every constraints.
            """
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose,
                add_nonlin=True)
            tol = 1e-1
        elif priority == 0:
            """
            When Optimization fails, resample new values for certain timesteps
            of the trajectory and solver as initialization
            """
            ## this should only get called with a full plan for now
            # assert active_ts == (0, plan.horizon-1)
            failed_preds = plan.get_failed_preds()

            # print "{} predicates fails, resampling process begin...\n \
            #        Checking {}".format(len(failed_preds), failed_preds[0])

            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = []
            rs_obj = self._resample(plan, failed_preds)
            obj_bexprs.extend(rs_obj)
            # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory.
            # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=1,
                add_nonlin=True, active_ts= active_ts, verbose=verbose)
            tol = 1e-3

        elif priority >= 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True,
                                               active_ts=active_ts, verbose=verbose)
            tol=1e-3

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True)
        self._update_ll_params()
        print "priority: {}".format(priority)


        if priority == 0:
            # During resampling phases, there must be changes added to sampling_trace
            if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]:
                reward = 0
                if len(plan.get_failed_preds()) == 0:
                    reward = len(plan.actions)
                else:
                    failed_t = plan.get_failed_pred()[2]
                    for i in range(len(plan.actions)):
                        if failed_t > plan.actions[i].active_timesteps[1]:
                            reward += 1
                plan.sampling_trace[-1]['reward'] = reward
        ##Restore free_attrs values
        plan.restore_free_attrs()
        return success


    def _get_transfer_obj(self, plan, norm):
        """
            This function returns the expression e(x) = P|x - cur|^2
            Which says the optimized trajectory should be close to the
            previous trajectory.
            Where P is the KT x KT matrix, where Px is the difference of parameter's attributes' current value and parameter's next timestep value
        """

        transfer_objs = []
        if norm == 'min-vel':
            for param in plan.params.values():
                # if param._type in ['Robot', 'Can', 'EEPose']:
                for attr_name in param.__dict__.iterkeys():
                    attr_type = param.get_attr_type(attr_name)
                    if issubclass(attr_type, Vector):
                        if param.is_symbol():
                            T = 1
                        else:
                            T = plan.horizon
                        K = attr_type.dim
                        attr_val = getattr(param, attr_name)

                        # pose = param.pose
                        assert (K, T) == attr_val.shape
                        KT = K*T
                        v = -1 * np.ones((KT - K, 1))
                        d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                        # [:,0] allows numpy to see v and d as one-dimensional so
                        # that numpy will create a diagonal matrix with v and d as a diagonal
                        P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                        # P = np.eye(KT)
                        Q = np.dot(np.transpose(P), P)
                        cur_val = attr_val.reshape((KT, 1), order='F')
                        A = -2*cur_val.T.dot(Q)
                        b = cur_val.T.dot(Q.dot(cur_val))
                        # QuadExpr is 0.5*x^Tx + Ax + b
                        quad_expr = QuadExpr(2*self.transfer_coeff*Q,
                                             self.transfer_coeff*A, self.transfer_coeff*b)
                        param_ll = self._param_to_ll[param]
                        ll_attr_val = getattr(param_ll, attr_name)
                        param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F')
                        bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars, cur_val))
                        transfer_objs.append(bexpr)
        else:
            raise NotImplemented
        return transfer_objs

    def _resample(self, plan, preds):
        """
            This function first calls fail predicate's resample function,
            then, uses the resampled value to create a square difference cost
            function e(x) = |x - rs_val|^2 that will be minimized later.
            rs_val is the resampled value
        """
        val, attr_inds = None, None
        for negated, pred, t in preds:
            ## returns a vector of new values and an
            ## attr_inds (OrderedDict) that gives the mapping
            ## to parameter attributes
            val, attr_inds = pred.resample(negated, t, plan)
            ## if no resample defined for that pred, continue
            if val is not None: break
        if val is None:
            return []

        bexprs, i = [], 0
        for p in attr_inds:
            ## get the ll_param for p and gurobi variables
            ll_p = self._param_to_ll[p]
            n_vals = 0
            grb_vars = []
            for attr, ind_arr, t in attr_inds[p]:
                n_vals += len(ind_arr)
                grb_vars.extend(
                    list(getattr(ll_p, attr)[ind_arr, t].flatten()))

            for j, grb_var in enumerate(grb_vars):
                ## create an objective saying stay close to the resampled value
                ## e(x) = (x - val[i+j])**2
                ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2
                Q = np.eye(1)
                A = -2*val[i+j]*np.ones((1, 1))
                b = np.ones((1, 1))*np.power(val[i+j], 2)
                # QuadExpr is 0.5*x^Tx + Ax + b
                quad_expr = QuadExpr(2*Q*self.rs_coeff, A*self.rs_coeff, b*self.rs_coeff)
                v_arr = np.array([grb_var]).reshape((1, 1), order='F')
                init_val = np.ones((1, 1))*val[i+j]
                bexpr = BoundExpr(quad_expr,
                                  Variable(v_arr, val[i+j].reshape((1, 1))))
                bexprs.append(bexpr)
            i += n_vals
        return bexprs

    def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True,
                       priority=MAX_PRIORITY, verbose=False):
        """
            This function creates constraints for the predicate and added to
            Prob class in sco.
        """
        ## for debugging
        ignore_preds = []
        priority = np.maximum(priority, 0)
        if not pred_dict['hl_info'] == "hl_state":
            start, end = pred_dict['active_timesteps']
            active_range = range(start, end+1)
            if verbose:
                print "pred being added: ", pred_dict
                print active_range, effective_timesteps
            negated = pred_dict['negated']
            pred = pred_dict['pred']

            if pred.get_type() in ignore_preds:
                return

            if pred.priority > priority: return
            assert isinstance(pred, common_predicates.ExprPredicate)
            expr = pred.get_expr(negated)

            for t in effective_timesteps:
                if t in active_range:
                    if expr is not None:
                        if add_nonlin or isinstance(expr.expr, AffExpr):
                            if verbose:
                                print "expr being added at time ", t
                            var = self._spawn_sco_var_for_pred(pred, t)
                            bexpr = BoundExpr(expr, var)
                            # TODO: REMOVE line below, for tracing back predicate for debugging.
                            bexpr.pred = pred
                            self._bexpr_to_pred[bexpr] = (negated, pred, t)
                            groups = ['all']
                            if self.early_converge:
                                ## this will check for convergence per parameter
                                ## this is good if e.g., a single trajectory quickly
                                ## gets stuck
                                groups.extend([param.name for param in pred.params])
                            self._prob.add_cnt_expr(bexpr, groups)

    def _add_first_and_last_timesteps_of_actions(self, plan, priority = MAX_PRIORITY,
                                                 add_nonlin=False, active_ts=None, verbose=False):
        """
            This function adds all linear predicates and first and last timestep
            non-linear predicates from actions that are active within the range of active_ts.
        """
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            ## only add an action
            if action_start >= active_ts[1] and action_start > active_ts[0]: continue
            if action_end < active_ts[0]: continue
            for pred_dict in action.preds:
                if action_start >= active_ts[0]:
                    self._add_pred_dict(pred_dict, [action_start],
                                        priority=priority, add_nonlin=add_nonlin, verbose=verbose)
                if action_end <= active_ts[1]:
                    self._add_pred_dict(pred_dict, [action_end],
                                        priority=priority, add_nonlin=add_nonlin, verbose=verbose)
            ## add all of the linear ineqs
            timesteps = range(max(action_start+1, active_ts[0]),
                              min(action_end, active_ts[1]))
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict, timesteps, add_nonlin=False,
                                    priority=priority, verbose=verbose)

    def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY,
                                      add_nonlin=True, active_ts=None, verbose=False):
        """
            This function adds both linear and non-linear predicates from
            actions that are active within the range of active_ts.
        """
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        for action in plan.actions:
            action_start, action_end = action.active_timesteps
            if action_start >= active_ts[1] and action_start > active_ts[0]: continue
            if action_end < active_ts[0]: continue

            timesteps = range(max(action_start, active_ts[0]),
                              min(action_end, active_ts[1])+1)
            for pred_dict in action.preds:
                self._add_pred_dict(pred_dict, timesteps, priority=priority,
                                    add_nonlin=add_nonlin, verbose=verbose)

    def _update_ll_params(self):
        """
            update plan's parameters from low level grb_vars.
            expected to be called after each optimization.
        """
        for ll_param in self._param_to_ll.values():
            ll_param.update_param()
        if self.child_solver:
            self.child_solver._update_ll_params()

    def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None):
        """
            This function creates low level parameters for each parameter in the plan,
            initialized he corresponding grb_vars for each attributes in each timestep,
            update the grb models
            adds in equality constraints,
            construct a dictionary as param-to-ll_param mapping.
        """
        if active_ts == None:
            active_ts=(0, plan.horizon-1)
        horizon = active_ts[1] - active_ts[0] + 1
        self._param_to_ll = {}
        self.ll_start = active_ts[0]
        for param in plan.params.values():
            ll_param = LLParam(model, param, horizon, active_ts)
            ll_param.create_grb_vars()
            self._param_to_ll[param] = ll_param
        model.update()
        for ll_param in self._param_to_ll.values():
            ll_param.batch_add_cnts()

    def _add_obj_bexprs(self, obj_bexprs):
        """
            This function adds objective bounded expressions to the Prob class
            in sco.
        """
        for bexpr in obj_bexprs:
            self._prob.add_obj_expr(bexpr)

    def _get_trajopt_obj(self, plan, active_ts=None):
        """
            This function selects parameter of type Robot and Can and returns
            the expression e(x) = |Px|^2
            Which optimize trajectory so that robot and can's attributes in
            current timestep is close to that of next timestep.
            forming a straight line between each end points.

            Where P is the KT x KT matrix, where Px is the difference of
            value in current timestep compare to next timestep
        """
        if active_ts == None:
            active_ts = (0, plan.horizon-1)
        start, end = active_ts
        traj_objs = []
        for param in plan.params.values():
            if param not in self._param_to_ll:
                continue
            if param._type in ['Robot', 'Can']:
                for attr_name in param.__dict__.iterkeys():
                    attr_type = param.get_attr_type(attr_name)
                    if issubclass(attr_type, Vector):
                        T = end - start + 1
                        K = attr_type.dim
                        attr_val = getattr(param, attr_name)
                        KT = K*T
                        v = -1 * np.ones((KT - K, 1))
                        d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1))))
                        # [:,0] allows numpy to see v and d as one-dimensional so
                        # that numpy will create a diagonal matrix with v and d as a diagonal
                        P = np.diag(v[:, 0], K) + np.diag(d[:, 0])
                        Q = np.dot(np.transpose(P), P)
                        Q *= TRAJOPT_COEFF

                        quad_expr = None
                        if attr_name == 'pose' and param._type == 'Robot':
                            quad_expr = QuadExpr(BASE_MOVE_COEFF*Q,
                                                 np.zeros((1,KT)),
                                                 np.zeros((1,1)))
                        else:
                            quad_expr = QuadExpr(Q, np.zeros((1,KT)), np.zeros((1,1)))
                        param_ll = self._param_to_ll[param]
                        ll_attr_val = getattr(param_ll, attr_name)
                        param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F')
                        attr_val = getattr(param, attr_name)
                        init_val = attr_val[:, start:end+1].reshape((KT, 1), order='F')
                        bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars, init_val))
                        # bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars))
                        traj_objs.append(bexpr)
        return traj_objs