Exemple #1
0
    def get_value(self, plan, penalty_coeff=1e0):
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan)
        model.update()
        self._bexpr_to_pred = {}

        obj_bexprs = self._get_trajopt_obj(plan)
        self._add_obj_bexprs(obj_bexprs)
        self._add_all_timesteps_of_actions(plan,
                                           priority=1,
                                           add_nonlin=True,
                                           verbose=False)
        return self._prob.get_value(penalty_coeff)
Exemple #2
0
    def monitor_update(self, plan, update_values, callback=None, n_resamples=5, active_ts=None, verbose=False):
        print 'Resolving after environment update...\n'
        if callback is not None: viewer = callback()
        if active_ts==None:
            active_ts = (0, plan.horizon-1)

        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # _free_attrs is paied attentioned in here
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()
        self._bexpr_to_pred = {}
        tol = 1e-3
        obj_bexprs = []
        rs_obj = self._update(plan, update_values)
        obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

        self._add_all_timesteps_of_actions(plan, priority=MAX_PRIORITY,
            add_nonlin=True, active_ts= active_ts, verbose=verbose)
        obj_bexprs.extend(rs_obj)
        self._add_obj_bexprs(obj_bexprs)
        initial_trust_region_size = 1e-2

        solv = Solver()
        solv.initial_trust_region_size = initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose)
        self._update_ll_params()

        if DEBUG: assert not plan.has_nan(active_ts)

        plan.restore_free_attrs()

        self.reset_variable()
        print "monitor_update\n"
        return success
Exemple #3
0
    def _solve_opt_prob(self, plan, priority, callback=None, init=True,
                        active_ts=None, verbose=False, resample=False, smoothing = False):
        if callback is not None: viewer = callback()
        self.plan = plan
        vehicle = plan.params['vehicle']
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()
        initial_trust_region_size = self.initial_trust_region_size
        if resample:
            tol = 1e-3
            def  variable_helper():
                error_bin = []
                for sco_var in self._prob._vars:
                    for grb_var, val in zip(sco_var.get_grb_vars(), sco_var.get_value()):
                        grb_name = grb_var[0].VarName
                        one, two = grb_name.find('-'), grb_name.find('-(')
                        param_name = grb_name[1:one]
                        attr = grb_name[one+1:two]
                        index = eval(grb_name[two+1:-1])
                        param = plan.params[param_name]
                        if not np.allclose(val, getattr(param, attr)[index]):
                            error_bin.append((grb_name, val, getattr(param, attr)[index]))
                if len(error_bin) != 0:
                    print "something wrong"
                    if DEBUG: import ipdb; ipdb.set_trace()

            """
            When Optimization fails, resample new values for certain timesteps
            of the trajectory and solver as initialization
            """
            obj_bexprs = []

            ## this is an objective that places
            ## a high value on matching the resampled values
            failed_preds = plan.get_failed_preds(active_ts = active_ts, priority=priority, tol = tol)
            rs_obj = self._resample(plan, failed_preds, sample_all = True)
            # import ipdb; ipdb.set_trace()
            # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory.
            # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

            self._add_all_timesteps_of_actions(plan, priority=priority,
                add_nonlin=False, active_ts= active_ts, verbose=verbose)
            obj_bexprs.extend(rs_obj)
            self._add_obj_bexprs(obj_bexprs)
            initial_trust_region_size = 1e3
            # import ipdb; ipdb.set_trace()
        else:
            self._bexpr_to_pred = {}
            if priority == -2:
                """
                Initialize an linear trajectory while enforceing the linear constraints in the intermediate step.
                """
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_first_and_last_timesteps_of_actions(plan,
                    priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False)
                tol = 1e-1
                initial_trust_region_size = 1e3
            elif priority == -1:
                """
                Solve the optimization problem while enforcing every constraints.
                """
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_first_and_last_timesteps_of_actions(plan,
                    priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose,
                    add_nonlin=True)
                tol = 1e-1
            elif priority >= 0:
                obj_bexprs = self._get_trajopt_obj(plan, active_ts)
                self._add_obj_bexprs(obj_bexprs)
                self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True,
                                                   active_ts=active_ts, verbose=verbose)
                tol=1e-3

        solv = Solver()
        solv.initial_trust_region_size = initial_trust_region_size

        if smoothing:
            solv.initial_penalty_coeff = self.smooth_penalty_coeff
        else:
            solv.initial_penalty_coeff = self.init_penalty_coeff

        solv.max_merit_coeff_increases = self.max_merit_coeff_increases

        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose)
        if priority == MAX_PRIORITY:
            success = success or len(plan.get_failed_preds(tol=tol, active_ts=active_ts, priority=priority)) == 0
        self._update_ll_params()

        if DEBUG: assert not plan.has_nan(active_ts)

        if resample:
            # During resampling phases, there must be changes added to sampling_trace
            if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]:
                reward = 0
                if len(plan.get_failed_preds(active_ts = active_ts, priority=priority)) == 0:
                    reward = len(plan.actions)
                else:
                    failed_t = plan.get_failed_pred(active_ts=(0,active_ts[1]), priority=priority)[2]
                    for i in range(len(plan.actions)):
                        if failed_t > plan.actions[i].active_timesteps[1]:
                            reward += 1
                plan.sampling_trace[-1]['reward'] = reward
        ##Restore free_attrs values
        plan.restore_free_attrs()

        self.reset_variable()
        print "priority: {}\n".format(priority)

        return success
Exemple #4
0
    def _solve_opt_prob(self,
                        plan,
                        priority,
                        callback=None,
                        active_ts=None,
                        verbose=False,
                        resample=True):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                                                          priority=-1,
                                                          active_ts=active_ts,
                                                          verbose=verbose)
            tol = 1e-2
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon - 1)

            obj_bexprs = []

            if resample:
                failed_preds = plan.get_failed_preds()
                ## this is an objective that places
                ## a high value on matching the resampled values
                obj_bexprs.extend(self._resample(plan, failed_preds))

            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)
            self._add_first_and_last_timesteps_of_actions(plan,
                                                          priority=-1,
                                                          add_nonlin=True,
                                                          verbose=verbose)
            self._add_all_timesteps_of_actions(plan,
                                               priority=0,
                                               add_nonlin=False,
                                               verbose=verbose)
            tol = 1e-2
        elif priority == 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan,
                                               priority=1,
                                               add_nonlin=True,
                                               active_ts=active_ts,
                                               verbose=verbose)
            tol = 1e-4

        solv = Solver()
        solv.initial_penalty_coeff = self.init_penalty_coeff
        success = solv.solve(self._prob,
                             method='penalty_sqp',
                             tol=tol,
                             verbose=verbose)
        self._update_ll_params()
        self._failed_groups = self._prob.nonconverged_groups
        return success
Exemple #5
0
    def _solve_opt_prob(self,
                        plan,
                        priority,
                        callback=None,
                        init=True,
                        active_ts=None,
                        verbose=False):
        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts == None:
            active_ts = (0, plan.horizon - 1)

        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # param_to_ll_old = self._param_to_ll.copy()
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()

        self._bexpr_to_pred = {}

        if priority == -2:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(
                plan,
                priority=MAX_PRIORITY,
                active_ts=active_ts,
                verbose=verbose,
                add_nonlin=False)
            # self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority == -1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(
                plan,
                priority=MAX_PRIORITY,
                active_ts=active_ts,
                verbose=verbose,
                add_nonlin=True)
            tol = 1e-1
        elif priority == 0:
            ## this should only get called with a full plan for now
            assert active_ts == (0, plan.horizon - 1)

            failed_preds = plan.get_failed_preds()
            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = []
            obj_bexprs.extend(self._resample(plan, failed_preds))
            ## solve an optimization movement primitive to
            ## transfer current trajectories
            obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            # obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel'))
            self._add_obj_bexprs(obj_bexprs)
            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False)

            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=-1, add_nonlin=True, verbose=verbose)
            # self._add_all_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False, verbose=verbose)

            self._add_all_timesteps_of_actions(plan,
                                               priority=1,
                                               add_nonlin=True,
                                               verbose=verbose)

            # self._add_first_and_last_timesteps_of_actions(
            #     plan, priority=1, add_nonlin=True, verbose=verbose)
            # self._add_all_timesteps_of_actions(
            #     plan, priority=0, add_nonlin=False, verbose=verbose)
            tol = 1e-1
        elif priority >= 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan,
                                               priority=priority,
                                               add_nonlin=True,
                                               active_ts=active_ts,
                                               verbose=verbose)
            tol = 1e-3

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob,
                             method='penalty_sqp',
                             tol=tol,
                             verbose=True)
        self._update_ll_params()
        print "priority: {}".format(priority)
        # if callback is not None: callback(True)
        return success
    def _solve_opt_prob(self, plan, priority, callback=None, init=True,
                        active_ts=None, verbose=False):
        robot = plan.params['baxter']
        body = plan.env.GetRobot("baxter")
        if callback is not None:
            viewer = callback()
        def draw(t):
            viewer.draw_plan_ts(plan, t)


        ## active_ts is the inclusive timesteps to include
        ## in the optimization
        if active_ts==None:
            active_ts = (0, plan.horizon-1)
        plan.save_free_attrs()
        model = grb.Model()
        model.params.OutputFlag = 0
        self._prob = Prob(model, callback=callback)
        # _free_attrs is paied attentioned in here
        self._spawn_parameter_to_ll_mapping(model, plan, active_ts)
        model.update()


        self._bexpr_to_pred = {}
        if priority == -2:
            """
            Initialize an linear trajectory while enforceing the linear constraints in the intermediate step.
            """
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False)
            tol = 1e-1
        elif priority == -1:
            """
            Solve the optimization problem while enforcing every constraints.
            """
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_first_and_last_timesteps_of_actions(plan,
                priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose,
                add_nonlin=True)
            tol = 1e-1
        elif priority == 0:
            """
            When Optimization fails, resample new values for certain timesteps
            of the trajectory and solver as initialization
            """
            ## this should only get called with a full plan for now
            # assert active_ts == (0, plan.horizon-1)
            failed_preds = plan.get_failed_preds()

            # print "{} predicates fails, resampling process begin...\n \
            #        Checking {}".format(len(failed_preds), failed_preds[0])

            ## this is an objective that places
            ## a high value on matching the resampled values
            obj_bexprs = []
            rs_obj = self._resample(plan, failed_preds)
            obj_bexprs.extend(rs_obj)
            # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory.
            # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts))
            obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm))

            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=1,
                add_nonlin=True, active_ts= active_ts, verbose=verbose)
            tol = 1e-3

        elif priority >= 1:
            obj_bexprs = self._get_trajopt_obj(plan, active_ts)
            self._add_obj_bexprs(obj_bexprs)
            self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True,
                                               active_ts=active_ts, verbose=verbose)
            tol=1e-3

        solv = Solver()
        solv.initial_trust_region_size = self.initial_trust_region_size
        solv.initial_penalty_coeff = self.init_penalty_coeff
        solv.max_merit_coeff_increases = self.max_merit_coeff_increases
        success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True)
        self._update_ll_params()
        print "priority: {}".format(priority)


        if priority == 0:
            # During resampling phases, there must be changes added to sampling_trace
            if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]:
                reward = 0
                if len(plan.get_failed_preds()) == 0:
                    reward = len(plan.actions)
                else:
                    failed_t = plan.get_failed_pred()[2]
                    for i in range(len(plan.actions)):
                        if failed_t > plan.actions[i].active_timesteps[1]:
                            reward += 1
                plan.sampling_trace[-1]['reward'] = reward
        ##Restore free_attrs values
        plan.restore_free_attrs()
        return success