Example #1
0
    def sample_policy(self, x0, policy, T=None, **policy_args):
        if T is None:
            T = policy._T
        policy_sample = Sample(meta_data=params, T=T)
        noise = policy_args.get('noise', ZeroNoise(params))

        rate = rospy.Rate(1. / params['dt'])
        policy_sample.set_X(x0, t=0)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            u_t = policy.act(x_t, o_t, t, noise=noise)
            if params['probcoll']['planner_type'] != 'teleop': # TODO hack
                self.execute_control(u_t)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)

            # propagate dynamics
            if t < T-1:
                x_tp1 = self._dynamics.evolve(x_t, u_t)
                policy_sample.set_X(x_tp1, t=t+1)
            rate.sleep()

            # see if collision in the past cycle
            policy_sample.set_O([int(self.coll_callback.get() is not None)], t=t, sub_obs='collision')

        return policy_sample
Example #2
0
    def _evaluate_probcoll_model_on_samples(self, bootstrap, itr, samples,
                                            mpcs):
        ### load NN
        H = params['model']['T']
        model_file = self._itr_model_file(itr)
        if not ProbcollModelPointquad.checkpoint_exists(model_file):
            return [None] * len(samples), [None] * len(samples)

        bootstrap.load(model_file=model_file)

        means, stds = [], []
        for sample, mpc in zip(samples, mpcs):
            eval_samples = []
            for o_t, U in zip(sample.get_O(), mpc['controls']):
                eval_sample = Sample(T=H, meta_data=params)
                eval_sample.set_U(U, t=slice(0, H))
                eval_sample.set_O(o_t, t=0)
                eval_samples.append(eval_sample)

            mean, std = bootstrap.eval_sample_batch(eval_samples,
                                                    num_avg=10,
                                                    pre_activation=True)
            means.append(mean)
            stds.append(std)

        return means, stds
    def _run_itr_analyze(self, itr):
        T = 1
        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            for rep in xrange(self._conditions.repeats):
                self._logger.info('\t\tStarting cond {0} rep {1}'.format(
                    cond, rep))
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                mpc_policy = self._create_mpc(itr, x0)
                control_noise = self._create_control_noise(
                )  # create each time b/c may not be memoryless

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout = self._agent.sample_policy(x0,
                                                        mpc_policy,
                                                        noise=control_noise,
                                                        T=1)

                    u = rollout.get_U(t=0)
                    o = rollout.get_O(t=0)
                    u_label = u

                    sample_T.set_U(u_label, t=t)
                    sample_T.set_O(o, t=t)

                    if hasattr(mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                samples.append(sample_T)
                world_infos.append(self._get_world_info())
                mpc_infos.append(mpc_policy.get_info())

                assert (samples[-1].isfinite())
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)
Example #4
0
    def sample_policy(self,
                      x0,
                      policy,
                      T=None,
                      use_noise=True,
                      only_noise=False,
                      **policy_args):
        if T is None:
            T = policy._T
        rate = rospy.Rate(1. / params['dt'])
        policy_sample = Sample(meta_data=params, T=T)
        policy_sample.set_X(x0, t=0)
        policy_sample_no_noise = Sample(meta_data=params, T=T)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            if self.sim:
                x_t = self._get_sim_state(x_t)
            start = time.time()
            u_t, u_t_no_noise = policy.act(x_t, o_t, t, only_noise=only_noise)
            self._logger.debug(time.time() - start)
            # only execute control if no collision
            # TODO is this necessary
            if int(o_t[policy_sample.get_O_idxs(sub_obs='collision')][0]) == 0:
                if use_noise:
                    self.execute_control(u_t)
                else:
                    self.execute_control(u_t_no_noise)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)
            if not only_noise:
                policy_sample_no_noise.set_U(u_t_no_noise, t=t)

            # In sim we do not have cycles
            if self.sim:
                policy_sample.set_O([int(self.sim_coll)],
                                    t=t,
                                    sub_obs='collision')
            else:
                # propagate dynamics
                if t < T - 1:
                    x_tp1 = self._dynamics.evolve(x_t, u_t)
                    policy_sample.set_X(x_tp1, t=t + 1)

                rate.sleep()
                # see if collision in the past cycle
                policy_sample.set_O(
                    [int(self.coll_callback.get() is not None)],
                    t=t,
                    sub_obs='collision')

        return policy_sample, policy_sample_no_noise
Example #5
0
    def reset(self, back_up, itr=None, cond=None, rep=None, record=True):
#        self._agent.execute_control(None) # stop the car
        assert(itr is not None and cond is not None and rep is not None)

        # TODO add sim backup
        ### back car up straight and slow
        if back_up:
            sample = Sample(meta_data=params, T=2)
            sample.set_U([np.random.uniform(*self.wp['back_up']['cmd_steer'])], t=0, sub_control='cmd_steer')
            sample.set_U([self.wp['back_up']['cmd_vel']], t=0, sub_control='cmd_vel')
            u = sample.get_U(t=0)
            if self._agent.sim:
                if self._agent.sim_coll and self._agent.sim_last_coll:
                    self._logger.info('Resetting the car')
                    self._agent.execute_control(None, reset=True)
                elif self._agent.sim_coll:
                    # TODO add backup logic for not sim
                    self._logger.info('Backing the car up')
                    for _ in xrange(int(self.wp['back_up']['duration'] / params['dt'])): 
                        self._agent.execute_control(u)
                        if self._agent.sim_coll:
                            break
                    for _ in xrange(int(1.0 / params['dt'])):
                        self._agent.execute_control(None)
            else:
                self._logger.info('Backing the car up')
                start = time.time()
                while time.time() - start < self.wp['back_up']['duration']:
                    self._agent.execute_control(u)

                self._agent.execute_control(None)

                time.sleep(1.0)
        else:
            self._agent.execute_control(None, reset=True)
            ### TODO add a flag
            self._ros_reset_pub.publish(std_msgs.msg.Empty())

        ### record
        if record:
            self._start_record_bag(itr, cond, rep)
        
        if not self._agent.sim:
            ### reset indicators
            self._ros_collision.get()
            self.num_collisions = 0
Example #6
0
    def _create_primitives(self):
        des_vel = params['planning']['cost_velocity']['velocity']
        weights = params['planning']['cost_velocity']['weights']

        assert (weights[1] == 0 and weights[2] == 0)

        thetas = np.linspace(-np.pi / 2., np.pi / 2., 19)
        speeds = np.linspace(0.1, 1.0, 10) * des_vel[0]

        samples = []
        for theta in thetas:
            for speed in speeds:
                sample = Sample(T=self._H)
                linearvel = [speed * np.cos(theta), speed * np.sin(theta), 0.]
                sample.set_U(linearvel, t=slice(0, self._H))
                samples.append(sample)

        return samples
Example #7
0
    def sample_policy(self,
                      x0,
                      policy,
                      T=None,
                      only_noise=False,
                      **policy_args):
        """
        Run the policy and collect the trajectory data

        :param x0: initial state
        :param policy: to execute
        :param policy_args: e.g. ref_traj, noise, etc
        :rtype Sample
        """
        if T is None:
            T = policy._T
        policy_sample = Sample(meta_data=params, T=T)
        policy_sample.set_X(x0, t=0)
        policy_sample_no_noise = Sample(meta_data=params, T=T)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            u_t, u_t_no_noise = policy.act(x_t, o_t, t)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)
            if not only_noise:
                policy_sample_no_noise.set_U(u_t_no_noise, t=t)

            # propagate dynamics
            if t < T - 1:
                x_tp1 = self._dynamics.evolve(x_t, u_t)
                policy_sample.set_X(x_tp1, t=t + 1)

        return policy_sample, policy_sample_no_noise
    def act(self, x, obs, t, noise, ref_traj=None):
        ### create desired path (radiates outward)
        T, dt, theta, speed = self._T, self._meta_data[
            'dt'], self.theta, self.speed
        traj = Sample(meta_data=self._meta_data, T=T)

        ### initialize
        traj.set_X(x, t=0)
        traj.set_O(obs, t=0)

        ### create desired path (radiates outward)
        linearvel = [speed * np.cos(theta), speed * np.sin(theta)]
        for t in xrange(T - 1):
            x_t = traj.get_X(t=t)
            x_tp1 = self._dynamics.evolve(x_t, linearvel)

            traj.set_X(x_tp1, t=t + 1)
            traj.set_U(linearvel, t=t)
        traj.set_U(linearvel, t=-1)

        self._curr_traj = traj

        u = traj.get_U(t=0)
        return u + noise.sample(u)
Example #9
0
    def plan(self,
             x0,
             obs,
             dynamics,
             cost_funcs,
             dU,
             T,
             init_traj=None,
             plot_env=None,
             init_cov=None,
             config=None):
        """
        :type init_traj: Sample
        :type dynamics: Dynamics
        :type cost_func: Cost
        """
        if config is None:
            config = self._config
        self.means, self.covs = [], []

        if init_traj is not None:
            mean = init_traj.get_U().ravel()
        elif 'init_u' in config:
            mean = config['init_u'] * T
        else:
            mean = np.zeros(dU * T)

        lower_bound = config['bounds'][
            'min'] * T if 'bounds' in config else -np.inf * np.ones(dU * T)
        upper_bound = config['bounds'][
            'max'] * T if 'bounds' in config else np.inf * np.ones(dU * T)

        ### run one iteration of CEM with extra samples
        # if init_cov is None:
        cov = config['init_var'] * np.eye(dU * T)
        # else:
        #     cov = config['init_var'] * init_cov
        mean, cov = self._cem_step(x0,
                                   obs,
                                   mean,
                                   cov,
                                   config['init_M'],
                                   config['K'],
                                   lower_bound,
                                   upper_bound,
                                   dU,
                                   T,
                                   dynamics,
                                   cost_funcs,
                                   plot_env=plot_env)
        self.means.append(mean)
        self.covs.append(cov)

        if plot_env:
            pass  # raw_input('CEM iter 0')

        ### remaining iterations
        for iter in xrange(1, config['iters']):
            mean, cov = self._cem_step(x0,
                                       obs,
                                       mean,
                                       cov,
                                       config['M'],
                                       config['K'],
                                       lower_bound,
                                       upper_bound,
                                       dU,
                                       T,
                                       dynamics,
                                       cost_funcs,
                                       plot_env=plot_env)
            self.means.append(mean)
            self.covs.append(cov)

            if plot_env:
                pass  # raw_input('CEM iter {0}'.format(iter))

        sample = Sample(meta_data=self._meta_data, T=T)
        U = mean.reshape(T, dU)
        U[:, -1] = 0.  # TODO hack
        sample.set_U(U, t=slice(0, T))
        sample.set_O(obs, t=0)
        sample.set_X(x0, t=0)
        sample.rollout(dynamics)

        return sample
Example #10
0
    def _cem_step(self,
                  x0,
                  obs,
                  mean,
                  cov,
                  M,
                  K,
                  lower_bound,
                  upper_bound,
                  dU,
                  T,
                  dynamics,
                  cost_funcs,
                  plot_env=None):
        """
        :param mean: mean of trajectory controls Gaussian
        :param obs: observation
        :param cov: covariance of trajectory controls Gaussian
        :param M: sample M controls
        :param K: keep K lowest cost trajectories
        :return: mean, cov
        """
        ### sample trajectories
        M_controls = []
        while len(M_controls) < M:
            control = np.random.multivariate_normal(mean, cov)
            if np.all(control < upper_bound) and np.all(control > lower_bound):
                M_controls.append(control)
        samples = []
        for U_flat in M_controls:
            sample = Sample(meta_data=self._meta_data, T=T)
            U = U_flat.reshape(T, dU)
            U[:, -1] = 0.  # TODO hack
            sample.set_U(U, t=slice(0, T))
            sample.set_O(obs, t=0)
            sample.set_X(x0, t=0)
            for sub_control, u_sub in self._config['fixed'].items():
                sample.set_U(np.array([list(u_sub) * T]).T,
                             t=slice(0, T),
                             sub_control=sub_control)
            sample.rollout(dynamics)
            samples.append(sample)

        ### keep lowest K cost trajectories
        costs = [0.] * len(samples)
        for cost_func in cost_funcs:
            if hasattr(cost_func, 'eval_batch'):
                costs = [
                    c + cost_func_approx.J for c, cost_func_approx in zip(
                        costs, cost_func.eval_batch(samples))
                ]
            else:
                costs = [
                    c + cost_func.eval(sample).J
                    for c, sample in zip(costs, samples)
                ]
        samples_costs = zip(samples, costs)
        samples_costs = sorted(samples_costs, key=lambda x: x[1])
        K_samples = [x[0] for x in samples_costs[:K]]

        ### fit Gaussian
        data = np.vstack([s.get_U().ravel() for s in K_samples])
        new_mean = np.mean(data, axis=0)
        new_cov = np.cov(data, rowvar=0)

        if plot_env:
            plot_env.rave_env.clear_plots()
            colors = [(0, 1, 0)] * K + [(1, 0, 0)] * (M - K)
            for sample, color in zip([x[0] for x in samples_costs], colors):
                for t in xrange(T - 1):
                    plot_env.rave_env.plot_segment(
                        sample.get_X(t=t, sub_state='position'),
                        sample.get_X(t=t + 1, sub_state='position'),
                        color=color)

        return new_mean, new_cov
Example #11
0
def rosbag_to_sample(rosbag_fname):
    print('Parsing {0}'.format(rosbag_fname))

    bebop_topics = params['bebop']['topics']

    ### read all messages
    bag = rosbag.Bag(rosbag_fname, 'r')
    msg_dict = collections.defaultdict(list)
    start_time = None
    for topic, msg, t in bag.read_messages():
        if start_time is None:
            start_time = t.to_sec()
        else:
            assert(start_time <= t.to_sec())
        msg_dict[topic].append((t.to_sec() - start_time, msg))
    bag.close()
    # get_measured_vel(rosbag_fname, msg_dict) # necessary hack
    if len(msg_dict[bebop_topics['start_rollout']]) == 0:
        print('\tNo start_rollout, returning None')
        return None
    ### sort by time
    for k, v in msg_dict.items():
        msg_dict[k] = sorted(v, key=lambda x: x[0])
    ### prune start and end
    is_coll = (bebop_topics['collision'] in msg_dict.keys())
    start_time = msg_dict[bebop_topics['start_rollout']][0][0]
    end_time = msg_dict[bebop_topics['collision']][0][0] if is_coll else np.inf
    for topic, time_msg in msg_dict.items():
        msg_dict[topic] = [(t, msg) for t, msg in time_msg if start_time <= t <= end_time]

    ### necessary info for Samples
    dt = params['dt']
    T = params['prediction']['dagger']['T']

    ### space by dt
    image_time_msgs = spaced_messages(msg_dict[bebop_topics['image']], dt)[-T:]
    cmd_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['cmd_vel']], dt)[-T:]
    # measured_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['measured_vel']], dt)[-T:]

    ### create sample
    sample = Sample(T=len(image_time_msgs), meta_data=params)
    cvb = cv_bridge.CvBridge()
    for t, (image_time_msg, cmd_vel_time_msg) in enumerate(zip(image_time_msgs, cmd_vel_time_msgs)):
        image_msg = image_time_msg[1]
        cmd_vel_msg = cmd_vel_time_msg[1]

        ### image
        im = AgentBebop2d.process_image(image_msg, cvb)
        sample.set_O(im.ravel(), t=t, sub_obs='camera')

        ### collision
        sample.set_O([0], t=t, sub_obs='collision')

        ### linearvel
        sample.set_X([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t)
        sample.set_U([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t)
    sample.set_O([int(is_coll)], t=-1, sub_obs='collision')
    # sample.set_O([int(np.random.random() > 0.5)], t=-1, sub_obs='collision') # tmp

    assert(sample.isfinite())

    return sample
Example #12
0
    def _run_rollout(self, itr):
        T = params['probcoll']['T']
        label_with_noise = params['probcoll']['label_with_noise']

        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            rep = 0
            while rep < self._conditions.repeats:
                self._logger.info(
                    '\t\tStarting cond {0} rep {1} itr {2}'.format(
                        cond, rep, itr))
                start = time.time()
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout, rollout_no_noise = self._agent.sample_policy(
                        x0, self._mpc_policy, T=1, only_noise=label_with_noise)

                    o = rollout.get_O(t=0)
                    if label_with_noise:
                        u = rollout.get_U(t=0)
                    else:
                        u = rollout_no_noise.get_U(t=0)

                    sample_T.set_U(u, t=t)
                    sample_T.set_O(o, t=t)

                    if not self._use_dynamics:
                        sample_T.set_X(rollout.get_X(t=0), t=t)

                    if self._world.is_collision(sample_T, t=t):
                        self._logger.warning(
                            '\t\t\tCrashed at t={0}'.format(t))
                        break

                    if self._use_dynamics:
                        if t < T - 1:
                            x_tp1 = self._dynamics.evolve(x0, u)
                            sample_T.set_X(x_tp1, t=t + 1)

                    if hasattr(self._mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, self._mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                sample = sample_T.match(slice(0, t + 1))
                world_info = self._get_world_info()
                mpc_info = self._mpc_policy.get_info()

                if not self._is_good_rollout(sample, t):
                    self._logger.warning(
                        '\t\t\tNot good rollout. Repeating rollout.'.format(t))
                    continue

                samples.append(sample)
                world_infos.append(world_info)
                mpc_infos.append(mpc_info)

                assert (samples[-1].isfinite())
                elapsed = time.time() - start
                self._logger.info(
                    '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)'
                    .format(cond, rep, elapsed, t * params['dt'] / elapsed))
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)

        self._reset_world(itr, 0, 0)  # leave the world as it was
Example #13
0
    def _run_testing(self, itr):
        if (itr != 0 and (itr == self._max_iter - 1 \
                or itr % params['world']['testing']['itr_freq'] == 0)):
            self._logger.info('Itr {0} testing'.format(itr))
            if self._agent.sim:
                #                if self._async_on:
                #                    self._logger.debug('Recovering probcoll model')
                #                    self._probcoll_model.recover()
                T = params['probcoll']['T']
                conditions = params['world']['testing']['positions']
                samples = []
                reset_pos, reset_quat = self._agent.get_sim_state_data()
                for cond in xrange(len(conditions)):
                    self._logger.info('\t\tTesting cond {0} itr {1}'.format(
                        cond, itr))
                    start = time.time()
                    self._agent.execute_control(None,
                                                reset=True,
                                                pos=conditions[cond])
                    x0 = self._conditions.get_cond(0)
                    sample_T = Sample(meta_data=params, T=T)
                    sample_T.set_X(x0, t=0)
                    for t in xrange(T):
                        x0 = sample_T.get_X(t=t)

                        rollout, rollout_no_noise = self._agent.sample_policy(
                            x0, self._mpc_policy, T=1, use_noise=False)

                        o = rollout.get_O(t=0)
                        u = rollout_no_noise.get_U(t=0)

                        sample_T.set_U(u, t=t)
                        sample_T.set_O(o, t=t)

                        if not self._use_dynamics:
                            sample_T.set_X(rollout.get_X(t=0), t=t)

                        if self._world.is_collision(sample_T, t=t):
                            self._logger.warning(
                                '\t\t\tCrashed at t={0}'.format(t))
                            break

                        if self._use_dynamics:
                            if t < T - 1:
                                x_tp1 = self._dynamics.evolve(x0, u)
                                sample_T.set_X(x_tp1, t=t + 1)

                    else:
                        self._logger.info('\t\t\tLasted for t={0}'.format(t))

                    sample = sample_T.match(slice(0, t + 1))

                    if not self._is_good_rollout(sample, t):
                        self._logger.warning(
                            '\t\t\tNot good rollout. Repeating rollout.'.
                            format(t))
                        continue

                    samples.append(sample)

                    assert (samples[-1].isfinite())
                    elapsed = time.time() - start
                    self._logger.info(
                        '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)'
                        .format(cond, elapsed, t * params['dt'] / elapsed))

                self._itr_save_samples(itr, samples, prefix='testing_')
                self._agent.execute_control(None,
                                            reset=True,
                                            pos=reset_pos,
                                            quat=reset_quat)
            else:
                pass