Beispiel #1
0
    def sample_policy(self, x0, policy, T=None, **policy_args):
        if T is None:
            T = policy._T
        policy_sample = Sample(meta_data=params, T=T)
        noise = policy_args.get('noise', ZeroNoise(params))

        rate = rospy.Rate(1. / params['dt'])
        policy_sample.set_X(x0, t=0)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            u_t = policy.act(x_t, o_t, t, noise=noise)
            if params['probcoll']['planner_type'] != 'teleop': # TODO hack
                self.execute_control(u_t)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)

            # propagate dynamics
            if t < T-1:
                x_tp1 = self._dynamics.evolve(x_t, u_t)
                policy_sample.set_X(x_tp1, t=t+1)
            rate.sleep()

            # see if collision in the past cycle
            policy_sample.set_O([int(self.coll_callback.get() is not None)], t=t, sub_obs='collision')

        return policy_sample
    def _run_itr_analyze(self, itr):
        T = 1
        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            for rep in xrange(self._conditions.repeats):
                self._logger.info('\t\tStarting cond {0} rep {1}'.format(
                    cond, rep))
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                mpc_policy = self._create_mpc(itr, x0)
                control_noise = self._create_control_noise(
                )  # create each time b/c may not be memoryless

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout = self._agent.sample_policy(x0,
                                                        mpc_policy,
                                                        noise=control_noise,
                                                        T=1)

                    u = rollout.get_U(t=0)
                    o = rollout.get_O(t=0)
                    u_label = u

                    sample_T.set_U(u_label, t=t)
                    sample_T.set_O(o, t=t)

                    if hasattr(mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                samples.append(sample_T)
                world_infos.append(self._get_world_info())
                mpc_infos.append(mpc_policy.get_info())

                assert (samples[-1].isfinite())
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)
Beispiel #3
0
    def sample_policy(self,
                      x0,
                      policy,
                      T=None,
                      use_noise=True,
                      only_noise=False,
                      **policy_args):
        if T is None:
            T = policy._T
        rate = rospy.Rate(1. / params['dt'])
        policy_sample = Sample(meta_data=params, T=T)
        policy_sample.set_X(x0, t=0)
        policy_sample_no_noise = Sample(meta_data=params, T=T)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            if self.sim:
                x_t = self._get_sim_state(x_t)
            start = time.time()
            u_t, u_t_no_noise = policy.act(x_t, o_t, t, only_noise=only_noise)
            self._logger.debug(time.time() - start)
            # only execute control if no collision
            # TODO is this necessary
            if int(o_t[policy_sample.get_O_idxs(sub_obs='collision')][0]) == 0:
                if use_noise:
                    self.execute_control(u_t)
                else:
                    self.execute_control(u_t_no_noise)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)
            if not only_noise:
                policy_sample_no_noise.set_U(u_t_no_noise, t=t)

            # In sim we do not have cycles
            if self.sim:
                policy_sample.set_O([int(self.sim_coll)],
                                    t=t,
                                    sub_obs='collision')
            else:
                # propagate dynamics
                if t < T - 1:
                    x_tp1 = self._dynamics.evolve(x_t, u_t)
                    policy_sample.set_X(x_tp1, t=t + 1)

                rate.sleep()
                # see if collision in the past cycle
                policy_sample.set_O(
                    [int(self.coll_callback.get() is not None)],
                    t=t,
                    sub_obs='collision')

        return policy_sample, policy_sample_no_noise
Beispiel #4
0
 def _get_sim_state(self, xt):
     state_sample = Sample(meta_data=params, T=1)
     state_msg = self.sim_state
     state = np.array([
         state_msg.position.x, state_msg.position.y, state_msg.position.z,
         state_msg.orientation.x, state_msg.orientation.y,
         state_msg.orientation.z, state_msg.orientation.w
     ])
     state_sample.set_X(state[:3], t=0, sub_state='position')
     state_sample.set_X(state[3:], t=0, sub_state='orientation')
     x = np.nan_to_num(xt) + np.nan_to_num(state_sample.get_X(t=0))
     return x
Beispiel #5
0
    def sample_policy(self,
                      x0,
                      policy,
                      T=None,
                      only_noise=False,
                      **policy_args):
        """
        Run the policy and collect the trajectory data

        :param x0: initial state
        :param policy: to execute
        :param policy_args: e.g. ref_traj, noise, etc
        :rtype Sample
        """
        if T is None:
            T = policy._T
        policy_sample = Sample(meta_data=params, T=T)
        policy_sample.set_X(x0, t=0)
        policy_sample_no_noise = Sample(meta_data=params, T=T)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            u_t, u_t_no_noise = policy.act(x_t, o_t, t)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)
            if not only_noise:
                policy_sample_no_noise.set_U(u_t_no_noise, t=t)

            # propagate dynamics
            if t < T - 1:
                x_tp1 = self._dynamics.evolve(x_t, u_t)
                policy_sample.set_X(x_tp1, t=t + 1)

        return policy_sample, policy_sample_no_noise
    def act(self, x, obs, t, noise, ref_traj=None):
        ### create desired path (radiates outward)
        T, dt, theta, speed = self._T, self._meta_data[
            'dt'], self.theta, self.speed
        traj = Sample(meta_data=self._meta_data, T=T)

        ### initialize
        traj.set_X(x, t=0)
        traj.set_O(obs, t=0)

        ### create desired path (radiates outward)
        linearvel = [speed * np.cos(theta), speed * np.sin(theta)]
        for t in xrange(T - 1):
            x_t = traj.get_X(t=t)
            x_tp1 = self._dynamics.evolve(x_t, linearvel)

            traj.set_X(x_tp1, t=t + 1)
            traj.set_U(linearvel, t=t)
        traj.set_U(linearvel, t=-1)

        self._curr_traj = traj

        u = traj.get_U(t=0)
        return u + noise.sample(u)
Beispiel #7
0
    def _cem_step(self,
                  x0,
                  obs,
                  mean,
                  cov,
                  M,
                  K,
                  lower_bound,
                  upper_bound,
                  dU,
                  T,
                  dynamics,
                  cost_funcs,
                  plot_env=None):
        """
        :param mean: mean of trajectory controls Gaussian
        :param obs: observation
        :param cov: covariance of trajectory controls Gaussian
        :param M: sample M controls
        :param K: keep K lowest cost trajectories
        :return: mean, cov
        """
        ### sample trajectories
        M_controls = []
        while len(M_controls) < M:
            control = np.random.multivariate_normal(mean, cov)
            if np.all(control < upper_bound) and np.all(control > lower_bound):
                M_controls.append(control)
        samples = []
        for U_flat in M_controls:
            sample = Sample(meta_data=self._meta_data, T=T)
            U = U_flat.reshape(T, dU)
            U[:, -1] = 0.  # TODO hack
            sample.set_U(U, t=slice(0, T))
            sample.set_O(obs, t=0)
            sample.set_X(x0, t=0)
            for sub_control, u_sub in self._config['fixed'].items():
                sample.set_U(np.array([list(u_sub) * T]).T,
                             t=slice(0, T),
                             sub_control=sub_control)
            sample.rollout(dynamics)
            samples.append(sample)

        ### keep lowest K cost trajectories
        costs = [0.] * len(samples)
        for cost_func in cost_funcs:
            if hasattr(cost_func, 'eval_batch'):
                costs = [
                    c + cost_func_approx.J for c, cost_func_approx in zip(
                        costs, cost_func.eval_batch(samples))
                ]
            else:
                costs = [
                    c + cost_func.eval(sample).J
                    for c, sample in zip(costs, samples)
                ]
        samples_costs = zip(samples, costs)
        samples_costs = sorted(samples_costs, key=lambda x: x[1])
        K_samples = [x[0] for x in samples_costs[:K]]

        ### fit Gaussian
        data = np.vstack([s.get_U().ravel() for s in K_samples])
        new_mean = np.mean(data, axis=0)
        new_cov = np.cov(data, rowvar=0)

        if plot_env:
            plot_env.rave_env.clear_plots()
            colors = [(0, 1, 0)] * K + [(1, 0, 0)] * (M - K)
            for sample, color in zip([x[0] for x in samples_costs], colors):
                for t in xrange(T - 1):
                    plot_env.rave_env.plot_segment(
                        sample.get_X(t=t, sub_state='position'),
                        sample.get_X(t=t + 1, sub_state='position'),
                        color=color)

        return new_mean, new_cov
Beispiel #8
0
    def _run_rollout(self, itr):
        T = params['probcoll']['T']
        label_with_noise = params['probcoll']['label_with_noise']

        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            rep = 0
            while rep < self._conditions.repeats:
                self._logger.info(
                    '\t\tStarting cond {0} rep {1} itr {2}'.format(
                        cond, rep, itr))
                start = time.time()
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout, rollout_no_noise = self._agent.sample_policy(
                        x0, self._mpc_policy, T=1, only_noise=label_with_noise)

                    o = rollout.get_O(t=0)
                    if label_with_noise:
                        u = rollout.get_U(t=0)
                    else:
                        u = rollout_no_noise.get_U(t=0)

                    sample_T.set_U(u, t=t)
                    sample_T.set_O(o, t=t)

                    if not self._use_dynamics:
                        sample_T.set_X(rollout.get_X(t=0), t=t)

                    if self._world.is_collision(sample_T, t=t):
                        self._logger.warning(
                            '\t\t\tCrashed at t={0}'.format(t))
                        break

                    if self._use_dynamics:
                        if t < T - 1:
                            x_tp1 = self._dynamics.evolve(x0, u)
                            sample_T.set_X(x_tp1, t=t + 1)

                    if hasattr(self._mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, self._mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                sample = sample_T.match(slice(0, t + 1))
                world_info = self._get_world_info()
                mpc_info = self._mpc_policy.get_info()

                if not self._is_good_rollout(sample, t):
                    self._logger.warning(
                        '\t\t\tNot good rollout. Repeating rollout.'.format(t))
                    continue

                samples.append(sample)
                world_infos.append(world_info)
                mpc_infos.append(mpc_info)

                assert (samples[-1].isfinite())
                elapsed = time.time() - start
                self._logger.info(
                    '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)'
                    .format(cond, rep, elapsed, t * params['dt'] / elapsed))
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)

        self._reset_world(itr, 0, 0)  # leave the world as it was
Beispiel #9
0
    def _run_testing(self, itr):
        if (itr != 0 and (itr == self._max_iter - 1 \
                or itr % params['world']['testing']['itr_freq'] == 0)):
            self._logger.info('Itr {0} testing'.format(itr))
            if self._agent.sim:
                #                if self._async_on:
                #                    self._logger.debug('Recovering probcoll model')
                #                    self._probcoll_model.recover()
                T = params['probcoll']['T']
                conditions = params['world']['testing']['positions']
                samples = []
                reset_pos, reset_quat = self._agent.get_sim_state_data()
                for cond in xrange(len(conditions)):
                    self._logger.info('\t\tTesting cond {0} itr {1}'.format(
                        cond, itr))
                    start = time.time()
                    self._agent.execute_control(None,
                                                reset=True,
                                                pos=conditions[cond])
                    x0 = self._conditions.get_cond(0)
                    sample_T = Sample(meta_data=params, T=T)
                    sample_T.set_X(x0, t=0)
                    for t in xrange(T):
                        x0 = sample_T.get_X(t=t)

                        rollout, rollout_no_noise = self._agent.sample_policy(
                            x0, self._mpc_policy, T=1, use_noise=False)

                        o = rollout.get_O(t=0)
                        u = rollout_no_noise.get_U(t=0)

                        sample_T.set_U(u, t=t)
                        sample_T.set_O(o, t=t)

                        if not self._use_dynamics:
                            sample_T.set_X(rollout.get_X(t=0), t=t)

                        if self._world.is_collision(sample_T, t=t):
                            self._logger.warning(
                                '\t\t\tCrashed at t={0}'.format(t))
                            break

                        if self._use_dynamics:
                            if t < T - 1:
                                x_tp1 = self._dynamics.evolve(x0, u)
                                sample_T.set_X(x_tp1, t=t + 1)

                    else:
                        self._logger.info('\t\t\tLasted for t={0}'.format(t))

                    sample = sample_T.match(slice(0, t + 1))

                    if not self._is_good_rollout(sample, t):
                        self._logger.warning(
                            '\t\t\tNot good rollout. Repeating rollout.'.
                            format(t))
                        continue

                    samples.append(sample)

                    assert (samples[-1].isfinite())
                    elapsed = time.time() - start
                    self._logger.info(
                        '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)'
                        .format(cond, elapsed, t * params['dt'] / elapsed))

                self._itr_save_samples(itr, samples, prefix='testing_')
                self._agent.execute_control(None,
                                            reset=True,
                                            pos=reset_pos,
                                            quat=reset_quat)
            else:
                pass