def _create_mpc(self, itr, x0):
        """ Must initialize MPC """
        sample0 = Sample(meta_data=params, T=1)
        sample0.set_X(x0, t=0)
        self._update_world(sample0, 0)

        self._logger.info('\t\t\tCreating MPC')

        if self._planner_type == 'primitives':
            additional_costs = []
            mpc_policy = PrimitivesMPCPolicyBebop2d(
                self._trajopt,
                self._cost_probcoll,
                additional_costs=additional_costs,
                meta_data=params,
                use_threads=False,
                plot=True,
                epsilon_greedy=params['prediction']['dagger']
                ['epsilon_greedy'])
        elif self._planner_type == 'teleop':
            mpc_policy = TeleopMPCPolicyBebop2d(params)
        else:
            raise NotImplementedError(
                'planner_type {0} not implemented for bebop2d'.format(
                    self._planner_type))

        return mpc_policy
Esempio n. 2
0
    def closest_collision_distance(self, x):
        sample = Sample(meta_data=params, T=params['T'])
        pos = x[sample.get_X_idxs(sub_state='position')]
        quat = x[sample.get_X_idxs(sub_state='orientation')]
        pose = posquat_to_pose(pos, quat)

        robotpos_dist_contactpos = self.rave_env.closest_collision(pose=pose)
        if robotpos_dist_contactpos is None:
            return 1e3
        _, dist, _ = robotpos_dist_contactpos
        return dist
    def _run_itr_analyze(self, itr):
        T = 1
        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            for rep in xrange(self._conditions.repeats):
                self._logger.info('\t\tStarting cond {0} rep {1}'.format(
                    cond, rep))
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                mpc_policy = self._create_mpc(itr, x0)
                control_noise = self._create_control_noise(
                )  # create each time b/c may not be memoryless

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout = self._agent.sample_policy(x0,
                                                        mpc_policy,
                                                        noise=control_noise,
                                                        T=1)

                    u = rollout.get_U(t=0)
                    o = rollout.get_O(t=0)
                    u_label = u

                    sample_T.set_U(u_label, t=t)
                    sample_T.set_O(o, t=t)

                    if hasattr(mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                samples.append(sample_T)
                world_infos.append(self._get_world_info())
                mpc_infos.append(mpc_policy.get_info())

                assert (samples[-1].isfinite())
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)
Esempio n. 4
0
    def get_observation(self, x):
        obs_sample = Sample(meta_data=params, T=2)

        ### collision
        coll_time = self.coll_callback.get()
        is_coll = coll_time is not None
        obs_sample.set_O([int(is_coll)], t=0, sub_obs='collision')

        ### camera
        image_msgs = self.image_callback.get()
        assert(len(image_msgs) > 0)
        ### keep only those before the collision
        if is_coll:
            image_msgs_filt = [im for im in image_msgs if im.header.stamp.to_sec() < coll_time.to_sec()]
            if len(image_msgs_filt) == 0:
                image_msgs_filt = [image_msgs[-1]]
            image_msgs = image_msgs_filt
        image_msg = image_msgs[-1]
        im = AgentBebop2d.process_image(image_msg, self.cv_bridge)
        # im = np.zeros((params['O']['camera']['height'], params['O']['camera']['width']), dtype=np.float32)
        obs_sample.set_O(im.ravel(), t=0, sub_obs='camera')
        ### publish image for debugging
        self.im_debug_pub.publish(self.cv_bridge.cv2_to_imgmsg((255. * im).astype(np.uint8), 'mono8'))

        return obs_sample.get_O(t=0)
Esempio n. 5
0
 def _itr_load_testing_samples(self, itr):
     fname = os.path.join(self._itr_dir(itr),
                          'testing_samples_itr_{0}.npz'.format(itr))
     if os.path.exists(fname):
         return Sample.load(fname)
     elif not os.path.exists(self._itr_dir(itr)):
         raise Exception()
     else:
         return None
Esempio n. 6
0
    def _create_primitives(self):
        des_vel = params['planning']['cost_velocity']['velocity']
        weights = params['planning']['cost_velocity']['weights']

        assert (weights[1] == 0 and weights[2] == 0)

        thetas = np.linspace(-np.pi / 2., np.pi / 2., 19)
        speeds = np.linspace(0.1, 1.0, 10) * des_vel[0]

        samples = []
        for theta in thetas:
            for speed in speeds:
                sample = Sample(T=self._H)
                linearvel = [speed * np.cos(theta), speed * np.sin(theta), 0.]
                sample.set_U(linearvel, t=slice(0, self._H))
                samples.append(sample)

        return samples
Esempio n. 7
0
    def _evaluate_probcoll_model_on_samples(self, bootstrap, itr, samples,
                                            mpcs):
        ### load NN
        H = params['model']['T']
        model_file = self._itr_model_file(itr)
        if not ProbcollModelPointquad.checkpoint_exists(model_file):
            return [None] * len(samples), [None] * len(samples)

        bootstrap.load(model_file=model_file)

        means, stds = [], []
        for sample, mpc in zip(samples, mpcs):
            eval_samples = []
            for o_t, U in zip(sample.get_O(), mpc['controls']):
                eval_sample = Sample(T=H, meta_data=params)
                eval_sample.set_U(U, t=slice(0, H))
                eval_sample.set_O(o_t, t=0)
                eval_samples.append(eval_sample)

            mean, std = bootstrap.eval_sample_batch(eval_samples,
                                                    num_avg=10,
                                                    pre_activation=True)
            means.append(mean)
            stds.append(std)

        return means, stds
Esempio n. 8
0
 def _get_sim_state(self, xt):
     state_sample = Sample(meta_data=params, T=1)
     state_msg = self.sim_state
     state = np.array([
         state_msg.position.x, state_msg.position.y, state_msg.position.z,
         state_msg.orientation.x, state_msg.orientation.y,
         state_msg.orientation.z, state_msg.orientation.w
     ])
     state_sample.set_X(state[:3], t=0, sub_state='position')
     state_sample.set_X(state[3:], t=0, sub_state='orientation')
     x = np.nan_to_num(xt) + np.nan_to_num(state_sample.get_X(t=0))
     return x
Esempio n. 9
0
 def execute_control(self, u, reset=False, pos=None, quat=None):
     if u is not None:
         s = Sample(meta_data=params, T=2)
         steer = u[s.get_U_idxs('cmd_steer')][0]
         vel = u[s.get_U_idxs('cmd_vel')][0]
         self.cmd_steer_pub.publish(std_msgs.msg.Float32(steer))
         self.cmd_vel_pub.publish(std_msgs.msg.Float32(vel))
         if self.sim:
             data = self.srv(steer=steer, vel=vel, reset=reset)
     else:
         self.cmd_steer_pub.publish(std_msgs.msg.Float32(49.5))
         self.cmd_vel_pub.publish(std_msgs.msg.Float32(0.))
         if self.sim:
             if reset:
                 if quat is None:
                     quat = [0.0, 0.0, 0.0, 0.0]
                 if pos is None:
                     if len(params['world']['testing']['positions']) > 0:
                         pos = params['world']['testing']['positions'][
                             np.random.randint(
                                 len(params['world']['testing']
                                     ['positions']))]
                     else:
                         pos = [0.0, 0.0, 0.0]
                 pose = geometry_msgs.msg.Pose()
                 pose.position.x, pose.position.y, pose.position.z = pos
                 pose.orientation.x, pose.orientation.y, pose.orientation.z, pose.orientation.w = quat
                 data = self.srv(reset=True, pose=pose)
             else:
                 data = self.srv()
     if self.sim:
         self.sim_last_coll = self.sim_coll
         self.sim_coll = data.coll
         self.sim_image = data.image
         self.sim_depth = data.depth
         self.sim_back_image = data.back_image
         self.sim_back_depth = data.back_depth
         self.sim_state = data.pose
Esempio n. 10
0
    def get_observation(self, x):
        obs_sample = Sample(meta_data=params, T=2)

        if self.sim:
            is_coll = self.sim_coll
            image_msg = self.sim_image
            depth_msg = self.sim_depth
            back_image_msg = self.sim_back_image
            back_depth_msg = self.sim_back_depth
        else:
            ### collision
            coll_time = self.coll_callback.get()
            is_coll = coll_time is not None

            ### camera
            image_msgs = self.image_callback.get()
            assert (len(image_msgs) > 0)
            ### keep only those before the collision
            if is_coll:
                image_msgs_filt = [
                    im for im in image_msgs
                    if im.header.stamp.to_sec() < coll_time.to_sec()
                ]
                if len(image_msgs_filt) == 0:
                    image_msgs_filt = [image_msgs[-1]]
                image_msgs = image_msgs_filt
            image_msg = image_msgs[-1]

        if params['O'].get('use_depth', False):
            im = AgentRCcar.process_depth(depth_msg, self.cv_bridge)
            back_im = AgentRCcar.process_depth(back_depth_msg, self.cv_bridge)
        else:
            im = AgentRCcar.process_image(image_msg, self.cv_bridge)
            back_im = AgentRCcar.process_image(back_image_msg, self.cv_bridge)

        ros_image = self.cv_bridge.cv2_to_imgmsg(im, "mono8")
        self.pred_image_pub.publish(ros_image)
        obs_sample.set_O(im.ravel(), t=0, sub_obs='camera')
        obs_sample.set_O(back_im.ravel(), t=0, sub_obs='back_camera')
        obs_sample.set_O([int(is_coll)], t=0, sub_obs='collision')
        return obs_sample.get_O(t=0)
Esempio n. 11
0
    def reset(self, back_up, itr=None, cond=None, rep=None, record=True):
#        self._agent.execute_control(None) # stop the car
        assert(itr is not None and cond is not None and rep is not None)

        # TODO add sim backup
        ### back car up straight and slow
        if back_up:
            sample = Sample(meta_data=params, T=2)
            sample.set_U([np.random.uniform(*self.wp['back_up']['cmd_steer'])], t=0, sub_control='cmd_steer')
            sample.set_U([self.wp['back_up']['cmd_vel']], t=0, sub_control='cmd_vel')
            u = sample.get_U(t=0)
            if self._agent.sim:
                if self._agent.sim_coll and self._agent.sim_last_coll:
                    self._logger.info('Resetting the car')
                    self._agent.execute_control(None, reset=True)
                elif self._agent.sim_coll:
                    # TODO add backup logic for not sim
                    self._logger.info('Backing the car up')
                    for _ in xrange(int(self.wp['back_up']['duration'] / params['dt'])): 
                        self._agent.execute_control(u)
                        if self._agent.sim_coll:
                            break
                    for _ in xrange(int(1.0 / params['dt'])):
                        self._agent.execute_control(None)
            else:
                self._logger.info('Backing the car up')
                start = time.time()
                while time.time() - start < self.wp['back_up']['duration']:
                    self._agent.execute_control(u)

                self._agent.execute_control(None)

                time.sleep(1.0)
        else:
            self._agent.execute_control(None, reset=True)
            ### TODO add a flag
            self._ros_reset_pub.publish(std_msgs.msg.Empty())

        ### record
        if record:
            self._start_record_bag(itr, cond, rep)
        
        if not self._agent.sim:
            ### reset indicators
            self._ros_collision.get()
            self.num_collisions = 0
Esempio n. 12
0
    def _load_samples(self):
        samples_itrs = []

        itr = 0
        while True:
            try:
                samples_itrs.append(self._itr_load_samples(itr))
                itr += 1
            except:
                break

        self._logger.info('Loaded {0} iterations of samples'.format(
            len(samples_itrs)))

        ### load initial dataset
        init_data_folder = params['probcoll'].get('init_data', None)
        if init_data_folder is not None:
            if itr == 0:
                samples_itrs.append([])
            num_init_samples = 0

            fnames = [
                os.path.join(init_data_folder, fname)
                for fname in os.listdir(init_data_folder)
            ]
            for fname in fnames:
                try:
                    samples = Sample.load(fname)
                except:
                    continue

                self._logger.debug('Loaded samples from {0}'.format(fname))
                samples_itrs[0] += samples
                num_init_samples += len(samples)

            self._logger.info('Loaded initial dataset of {0} samples'.format(
                num_init_samples))

        return samples_itrs
Esempio n. 13
0
    def sample_policy(self,
                      x0,
                      policy,
                      T=None,
                      use_noise=True,
                      only_noise=False,
                      **policy_args):
        if T is None:
            T = policy._T
        rate = rospy.Rate(1. / params['dt'])
        policy_sample = Sample(meta_data=params, T=T)
        policy_sample.set_X(x0, t=0)
        policy_sample_no_noise = Sample(meta_data=params, T=T)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            if self.sim:
                x_t = self._get_sim_state(x_t)
            start = time.time()
            u_t, u_t_no_noise = policy.act(x_t, o_t, t, only_noise=only_noise)
            self._logger.debug(time.time() - start)
            # only execute control if no collision
            # TODO is this necessary
            if int(o_t[policy_sample.get_O_idxs(sub_obs='collision')][0]) == 0:
                if use_noise:
                    self.execute_control(u_t)
                else:
                    self.execute_control(u_t_no_noise)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)
            if not only_noise:
                policy_sample_no_noise.set_U(u_t_no_noise, t=t)

            # In sim we do not have cycles
            if self.sim:
                policy_sample.set_O([int(self.sim_coll)],
                                    t=t,
                                    sub_obs='collision')
            else:
                # propagate dynamics
                if t < T - 1:
                    x_tp1 = self._dynamics.evolve(x_t, u_t)
                    policy_sample.set_X(x_tp1, t=t + 1)

                rate.sleep()
                # see if collision in the past cycle
                policy_sample.set_O(
                    [int(self.coll_callback.get() is not None)],
                    t=t,
                    sub_obs='collision')

        return policy_sample, policy_sample_no_noise
Esempio n. 14
0
    def _run_testing(self, itr):
        if (itr != 0 and (itr == self._max_iter - 1 \
                or itr % params['world']['testing']['itr_freq'] == 0)):
            self._logger.info('Itr {0} testing'.format(itr))
            if self._agent.sim:
                #                if self._async_on:
                #                    self._logger.debug('Recovering probcoll model')
                #                    self._probcoll_model.recover()
                T = params['probcoll']['T']
                conditions = params['world']['testing']['positions']
                samples = []
                reset_pos, reset_quat = self._agent.get_sim_state_data()
                for cond in xrange(len(conditions)):
                    self._logger.info('\t\tTesting cond {0} itr {1}'.format(
                        cond, itr))
                    start = time.time()
                    self._agent.execute_control(None,
                                                reset=True,
                                                pos=conditions[cond])
                    x0 = self._conditions.get_cond(0)
                    sample_T = Sample(meta_data=params, T=T)
                    sample_T.set_X(x0, t=0)
                    for t in xrange(T):
                        x0 = sample_T.get_X(t=t)

                        rollout, rollout_no_noise = self._agent.sample_policy(
                            x0, self._mpc_policy, T=1, use_noise=False)

                        o = rollout.get_O(t=0)
                        u = rollout_no_noise.get_U(t=0)

                        sample_T.set_U(u, t=t)
                        sample_T.set_O(o, t=t)

                        if not self._use_dynamics:
                            sample_T.set_X(rollout.get_X(t=0), t=t)

                        if self._world.is_collision(sample_T, t=t):
                            self._logger.warning(
                                '\t\t\tCrashed at t={0}'.format(t))
                            break

                        if self._use_dynamics:
                            if t < T - 1:
                                x_tp1 = self._dynamics.evolve(x0, u)
                                sample_T.set_X(x_tp1, t=t + 1)

                    else:
                        self._logger.info('\t\t\tLasted for t={0}'.format(t))

                    sample = sample_T.match(slice(0, t + 1))

                    if not self._is_good_rollout(sample, t):
                        self._logger.warning(
                            '\t\t\tNot good rollout. Repeating rollout.'.
                            format(t))
                        continue

                    samples.append(sample)

                    assert (samples[-1].isfinite())
                    elapsed = time.time() - start
                    self._logger.info(
                        '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)'
                        .format(cond, elapsed, t * params['dt'] / elapsed))

                self._itr_save_samples(itr, samples, prefix='testing_')
                self._agent.execute_control(None,
                                            reset=True,
                                            pos=reset_pos,
                                            quat=reset_quat)
            else:
                pass
Esempio n. 15
0
    def sample_policy(self, x0, policy, T=None, **policy_args):
        if T is None:
            T = policy._T
        policy_sample = Sample(meta_data=params, T=T)
        noise = policy_args.get('noise', ZeroNoise(params))

        rate = rospy.Rate(1. / params['dt'])
        policy_sample.set_X(x0, t=0)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            u_t = policy.act(x_t, o_t, t, noise=noise)
            if params['probcoll']['planner_type'] != 'teleop': # TODO hack
                self.execute_control(u_t)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)

            # propagate dynamics
            if t < T-1:
                x_tp1 = self._dynamics.evolve(x_t, u_t)
                policy_sample.set_X(x_tp1, t=t+1)
            rate.sleep()

            # see if collision in the past cycle
            policy_sample.set_O([int(self.coll_callback.get() is not None)], t=t, sub_obs='collision')

        return policy_sample
Esempio n. 16
0
    def _evaluate_prediction_trajectory_model(self):
        ### get prediction model evaluations for all iterations
        itr = 0
        itrs_Xs, itrs_thetas, itrs_speeds, itrs_prob_means, itrs_prob_stds, itrs_costs = [], [], [], [], [], []
        while True:
            ### load NN
            model_file = self._itr_model_file(itr)
            if not os.path.exists(model_file):
                break
            bootstrap = ProbcollModelPointquad(read_only=True, finalize=False)
            bootstrap.load(model_file=model_file)

            wd = self._load_world_file(itr, 0, 0)
            self._world.env.clear()
            for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'],
                                            wd['cyl_heights']):
                self._world.env.add_cylinder(pose, radius, height)
            for pose, extents in zip(wd['box_poses'], wd['box_extents']):
                self._world.env.add_box(pose, extents)

            ### create MPC
            cp_params = params['probcoll']['cost_probcoll']
            cost_cp = CostProbcollPointquad(
                bootstrap,
                None,
                weight=float(cp_params['weight']),
                eval_cost=cp_params['eval_cost'],
                pre_activation=cp_params['pre_activation'])
            mpc_policy = PrimitivesMPCPolicyPointquad(self._trajopt,
                                                      cost_cp,
                                                      additional_costs=[],
                                                      meta_data=params,
                                                      use_threads=False,
                                                      plot=False)

            default_sample = Sample(meta_data=params, T=bootstrap.T)
            for k, v in params['probcoll']['conditions']['default'].items():
                default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k)

            ### evaluate on grid
            dx, dy = 20, 20
            samples = []
            itr_costs = []
            thetas, speeds = [], []
            for i, xpos in enumerate(np.linspace(-0.5, 1.5, dx)):
                for j, ypos in enumerate(np.linspace(0.6, -0.6, dy)):
                    s = copy.deepcopy(default_sample)

                    pos = [xpos, ypos, s.get_X(t=0, sub_state='position')[-1]]
                    s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position')
                    s.set_O(self._agent.get_observation(s.get_X(t=0),
                                                        noise=False),
                            t=slice(0, bootstrap.T))
                    x = s.get_X(t=0)
                    o = s.get_O(t=0)

                    # assume straight line trajectories
                    p_samples = []
                    p_speeds, p_thetas = [], []
                    for primitive in mpc_policy._mpc_policies:
                        p_speeds.append(primitive.speed)
                        p_thetas.append(primitive.theta)
                        primitive.act(x, o, t=0, noise=ZeroNoise(params))
                        p_samples.append(primitive._curr_traj)

                    prim_costs = mpc_policy._primitives_cost(p_samples)
                    min_cost_idx = np.argmin(prim_costs)

                    samples.append(p_samples[min_cost_idx])
                    itr_costs.append(prim_costs[min_cost_idx])
                    thetas.append(p_thetas[min_cost_idx])
                    speeds.append(p_speeds[min_cost_idx])

            ### evaluate on grid
            cp_params = params['probcoll']['cost_probcoll']
            eval_cost = cp_params['eval_cost']
            pre_activation = cp_params['pre_activation']
            probs_mean_batch, probs_std_batch = bootstrap.eval_sample_batch(
                samples, num_avg=10, pre_activation=pre_activation)

            bootstrap.close()

            sigmoid = lambda x: (1. / (1. + np.exp(-x)))
            # costs = []
            # for probs_mean, probs_std in zip(probs_mean_batch, probs_std_batch):
            #     cost = 0.
            #     for t in [bootstrap.T - 1]:
            #         speed = 1.
            #         cost += eval(eval_cost)
            #     costs.append(cost / bootstrap.T)
            # costs = np.array(costs)

            itrs_Xs.append(
                np.array(
                    [s.get_X(t=0, sub_state='position')[:2] for s in samples]))
            itrs_thetas.append(np.array(thetas))
            itrs_speeds.append(
                np.array(speeds) /
                (params['trajopt']['cost_velocity']['velocity'][0] * dx *
                 dy))  # normalize so fits into grid density
            # itrs_speeds.append(np.ones(len(speeds), dtype=float) / (dx * dy)) # TODO
            if pre_activation:
                probs_mean_batch = sigmoid(probs_mean_batch)
                # probs_std_batch = sigmoid(probs_std_batch)
                probs_std_batch /= probs_std_batch.max()
            itrs_prob_means.append(np.array(probs_mean_batch))
            itrs_prob_stds.append(np.array(probs_std_batch))
            itrs_costs.append(np.array(itr_costs))

            itr += 1

            # if itr >= 2: # TODO
            #     break

        return itrs_Xs, itrs_thetas, itrs_speeds, itrs_prob_means, itrs_prob_stds, itrs_costs
Esempio n. 17
0
def rosbag_to_sample(rosbag_fname):
    print('Parsing {0}'.format(rosbag_fname))

    bebop_topics = params['bebop']['topics']

    ### read all messages
    bag = rosbag.Bag(rosbag_fname, 'r')
    msg_dict = collections.defaultdict(list)
    start_time = None
    for topic, msg, t in bag.read_messages():
        if start_time is None:
            start_time = t.to_sec()
        else:
            assert(start_time <= t.to_sec())
        msg_dict[topic].append((t.to_sec() - start_time, msg))
    bag.close()
    # get_measured_vel(rosbag_fname, msg_dict) # necessary hack
    if len(msg_dict[bebop_topics['start_rollout']]) == 0:
        print('\tNo start_rollout, returning None')
        return None
    ### sort by time
    for k, v in msg_dict.items():
        msg_dict[k] = sorted(v, key=lambda x: x[0])
    ### prune start and end
    is_coll = (bebop_topics['collision'] in msg_dict.keys())
    start_time = msg_dict[bebop_topics['start_rollout']][0][0]
    end_time = msg_dict[bebop_topics['collision']][0][0] if is_coll else np.inf
    for topic, time_msg in msg_dict.items():
        msg_dict[topic] = [(t, msg) for t, msg in time_msg if start_time <= t <= end_time]

    ### necessary info for Samples
    dt = params['dt']
    T = params['prediction']['dagger']['T']

    ### space by dt
    image_time_msgs = spaced_messages(msg_dict[bebop_topics['image']], dt)[-T:]
    cmd_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['cmd_vel']], dt)[-T:]
    # measured_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['measured_vel']], dt)[-T:]

    ### create sample
    sample = Sample(T=len(image_time_msgs), meta_data=params)
    cvb = cv_bridge.CvBridge()
    for t, (image_time_msg, cmd_vel_time_msg) in enumerate(zip(image_time_msgs, cmd_vel_time_msgs)):
        image_msg = image_time_msg[1]
        cmd_vel_msg = cmd_vel_time_msg[1]

        ### image
        im = AgentBebop2d.process_image(image_msg, cvb)
        sample.set_O(im.ravel(), t=t, sub_obs='camera')

        ### collision
        sample.set_O([0], t=t, sub_obs='collision')

        ### linearvel
        sample.set_X([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t)
        sample.set_U([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t)
    sample.set_O([int(is_coll)], t=-1, sub_obs='collision')
    # sample.set_O([int(np.random.random() > 0.5)], t=-1, sub_obs='collision') # tmp

    assert(sample.isfinite())

    return sample
    def act(self, x, obs, t, noise, ref_traj=None):
        ### create desired path (radiates outward)
        T, dt, theta, speed = self._T, self._meta_data[
            'dt'], self.theta, self.speed
        traj = Sample(meta_data=self._meta_data, T=T)

        ### initialize
        traj.set_X(x, t=0)
        traj.set_O(obs, t=0)

        ### create desired path (radiates outward)
        linearvel = [speed * np.cos(theta), speed * np.sin(theta)]
        for t in xrange(T - 1):
            x_t = traj.get_X(t=t)
            x_tp1 = self._dynamics.evolve(x_t, linearvel)

            traj.set_X(x_tp1, t=t + 1)
            traj.set_U(linearvel, t=t)
        traj.set_U(linearvel, t=-1)

        self._curr_traj = traj

        u = traj.get_U(t=0)
        return u + noise.sample(u)
Esempio n. 19
0
        image_msg = image_time_msg[1]
        cmd_vel_msg = cmd_vel_time_msg[1]

        ### image
        im = AgentBebop2d.process_image(image_msg, cvb)
        sample.set_O(im.ravel(), t=t, sub_obs='camera')

        ### collision
        sample.set_O([0], t=t, sub_obs='collision')

        ### linearvel
        sample.set_X([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t)
        sample.set_U([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t)
    sample.set_O([int(is_coll)], t=-1, sub_obs='collision')
    # sample.set_O([int(np.random.random() > 0.5)], t=-1, sub_obs='collision') # tmp

    assert(sample.isfinite())

    return sample

if __name__ == '__main__':
    rospy.init_node('rosbag_to_sample', anonymous=True)

    yaml_path = os.path.join(FOLDER, 'params_bebop2d.yaml')
    load_params(yaml_path)

    samples = [rosbag_to_sample(os.path.join(FOLDER, f))
               for f in sorted(os.listdir(FOLDER)) if os.path.splitext(f)[1] == '.bag']
    samples = filter(lambda x: x is not None, samples)
    Sample.save(os.path.join(FOLDER, 'samples.npz'), samples)
Esempio n. 20
0
    def _evaluate_probcoll_model_groundtruth(self):
        """
        For each primitive, execute the primitive for T timesteps to obtain ground truth, and compare label
        """
        ### get prediction model evaluations for all iterations
        itr = 0
        itrs_gt_crashes, itrs_pred_crashes = [], []
        positions, speeds_angles = None, None
        while True:
            ### load NN
            model_file = self._itr_model_file(itr)
            if not os.path.exists(model_file):
                break
            bootstrap = ProbcollModelPointquad(read_only=True, finalize=False)
            bootstrap.load(model_file=model_file)

            wd = self._load_world_file(itr, 0, 0)
            self._world.env.clear()
            for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'],
                                            wd['cyl_heights']):
                self._world.env.add_cylinder(pose, radius, height)
            for pose, extents in zip(wd['box_poses'], wd['box_extents']):
                self._world.env.add_box(pose, extents)

            ### create MPC
            cp_params = params['probcoll']['cost']
            cost_cp = CostProbcollPointquad(
                bootstrap,
                None,
                weight=float(cp_params['weight']),
                eval_cost=cp_params['eval_cost'],
                pre_activation=cp_params['pre_activation'])
            mpc_policy = PrimitivesMPCPolicyPointquad(self._trajopt,
                                                      cost_cp,
                                                      additional_costs=[],
                                                      meta_data=params,
                                                      use_threads=False,
                                                      plot=False)

            default_sample = Sample(meta_data=params, T=bootstrap.T)
            for k, v in params['probcoll']['conditions']['default'].items():
                default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k)

            ### speeds/angles
            speeds_angles = [(p.speed, p.theta)
                             for p in mpc_policy._mpc_policies]
            positions = []

            ### evaluate on grid
            dx, dy = 12, 5
            rollouts = []
            for i, xpos in enumerate(np.linspace(-0.5, 1.5, dx)):
                for j, ypos in enumerate(np.linspace(0.6, -0.6, dy)):
                    s = copy.deepcopy(default_sample)

                    pos = [xpos, ypos, s.get_X(t=0, sub_state='position')[-1]]
                    s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position')
                    s.set_O(self._agent.get_observation(s.get_X(t=0),
                                                        noise=False),
                            t=slice(0, bootstrap.T))

                    if np.sum(s.get_O(sub_obs='collision', t=0)) > 0:
                        continue

                    positions.append((xpos, ypos))

                    x0 = s.get_X(t=0)
                    rollouts_ij = [
                        self._agent.sample_policy(x0,
                                                  p,
                                                  noise=ZeroNoise(params),
                                                  T=bootstrap.T)
                        for p in mpc_policy._mpc_policies
                    ]
                    rollouts.append(rollouts_ij)

            rollouts = np.array(rollouts)
            gt_crashes = np.array([
                r.get_O(sub_obs='collision').sum() > 0
                for r in rollouts.ravel()
            ]).reshape(rollouts.shape)

            sigmoid = lambda x: 1. / (1. + np.exp(-x))
            pred_crashes = np.array([
                sigmoid(mean) > 0.5 for mean in bootstrap.eval_sample_batch(
                    rollouts.ravel(), num_avg=10, pre_activation=True)[0]
            ]).reshape(rollouts.shape)

            bootstrap.close()

            itrs_gt_crashes.append(gt_crashes)
            itrs_pred_crashes.append(pred_crashes)

            itr += 1

        return itrs_gt_crashes, itrs_pred_crashes, positions, speeds_angles
Esempio n. 21
0
    def _cem_step(self,
                  x0,
                  obs,
                  mean,
                  cov,
                  M,
                  K,
                  lower_bound,
                  upper_bound,
                  dU,
                  T,
                  dynamics,
                  cost_funcs,
                  plot_env=None):
        """
        :param mean: mean of trajectory controls Gaussian
        :param obs: observation
        :param cov: covariance of trajectory controls Gaussian
        :param M: sample M controls
        :param K: keep K lowest cost trajectories
        :return: mean, cov
        """
        ### sample trajectories
        M_controls = []
        while len(M_controls) < M:
            control = np.random.multivariate_normal(mean, cov)
            if np.all(control < upper_bound) and np.all(control > lower_bound):
                M_controls.append(control)
        samples = []
        for U_flat in M_controls:
            sample = Sample(meta_data=self._meta_data, T=T)
            U = U_flat.reshape(T, dU)
            U[:, -1] = 0.  # TODO hack
            sample.set_U(U, t=slice(0, T))
            sample.set_O(obs, t=0)
            sample.set_X(x0, t=0)
            for sub_control, u_sub in self._config['fixed'].items():
                sample.set_U(np.array([list(u_sub) * T]).T,
                             t=slice(0, T),
                             sub_control=sub_control)
            sample.rollout(dynamics)
            samples.append(sample)

        ### keep lowest K cost trajectories
        costs = [0.] * len(samples)
        for cost_func in cost_funcs:
            if hasattr(cost_func, 'eval_batch'):
                costs = [
                    c + cost_func_approx.J for c, cost_func_approx in zip(
                        costs, cost_func.eval_batch(samples))
                ]
            else:
                costs = [
                    c + cost_func.eval(sample).J
                    for c, sample in zip(costs, samples)
                ]
        samples_costs = zip(samples, costs)
        samples_costs = sorted(samples_costs, key=lambda x: x[1])
        K_samples = [x[0] for x in samples_costs[:K]]

        ### fit Gaussian
        data = np.vstack([s.get_U().ravel() for s in K_samples])
        new_mean = np.mean(data, axis=0)
        new_cov = np.cov(data, rowvar=0)

        if plot_env:
            plot_env.rave_env.clear_plots()
            colors = [(0, 1, 0)] * K + [(1, 0, 0)] * (M - K)
            for sample, color in zip([x[0] for x in samples_costs], colors):
                for t in xrange(T - 1):
                    plot_env.rave_env.plot_segment(
                        sample.get_X(t=t, sub_state='position'),
                        sample.get_X(t=t + 1, sub_state='position'),
                        color=color)

        return new_mean, new_cov
Esempio n. 22
0
def plot_final_policy_combined(exp_groups,
                               des_mps,
                               des_speed,
                               min_speed,
                               ax=None,
                               fs=20.,
                               save_path=None):
    dt = 0.5

    if ax is None:
        f, ax = plt.subplots(1, 1, figsize=(5, 3.5))
    else:
        f = ax.get_figure()

    exp_groups.insert(0, [{
        'folder': os.path.join(EXP_FOLDER, 'random'),
        'label': 'Random policy',
        'color': cm.Greys(1.0)
    }])

    for exp_group_num, exp_group in enumerate(exp_groups):
        distance_travelled = []
        for exp in exp_group:
            samples_folder = os.path.join(exp['folder'],
                                          'prediction/final_policy/')
            samples_fnames = [
                os.path.join(samples_folder, fname)
                for fname in os.listdir(samples_folder) if fname[-4:] == '.npz'
            ]
            assert (len(samples_fnames) == 1)
            samples_fname = samples_fnames[0]

            samples = Sample.load(samples_fname)
            for sample in samples:
                distance = dt * (des_mps / des_speed) * (sample.get_U()[:, 1] -
                                                         min_speed).sum()
                distance_travelled.append(distance)

        width = 0.8 / len(exp_groups)
        ax.bar([exp_group_num * width], [np.mean(distance_travelled)],
               yerr=[np.std(distance_travelled)],
               width=width * 0.8,
               error_kw=dict(lw=4, capsize=5, capthick=2),
               color=exp_group[0]['color'],
               ecolor=cm.Greys(0.5),
               label=exp_group[0]['label'])

    # for exp_group in exp_groups:
    #     ax.plot([], [], lw=5., label=exp_group[0]['label'], color=exp_group[0]['color'])
    lgd = ax.legend(loc='upper center',
                    fontsize=fs * 0.8,
                    bbox_to_anchor=(1.35, 0.7))

    ax.set_xlim((-0.05, width * len(exp_groups)))
    ax.set_ylim((-0.3, ax.get_ylim()[1]))
    ax.set_ylabel(r'Distance travelled (m)', fontsize=fs)

    for tick in ax.xaxis.get_major_ticks() + ax.xaxis.get_minor_ticks():
        tick.label1.set_visible(False)
    for tick in ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(fs)

    if save_path is not None:
        f.savefig(save_path, bbox_extra_artists=(lgd, ), bbox_inches='tight')
Esempio n. 23
0
    def _evaluate_probcoll_model(self):
        ### get prediction model evaluations for all iterations
        itr = 0
        itrs_Xs, itrs_prob_means, itrs_prob_stds, itrs_costs = [], [], [], []
        while True:
            ### load NN
            model_file = self._itr_model_file(itr)
            if not os.path.exists(model_file):
                break
            bootstrap = ProbcollModelPointquad(read_only=True, finalize=False)
            bootstrap.load(model_file=model_file)

            wd = self._load_world_file(itr, 0, 0)
            self._world.env.clear()
            for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'],
                                            wd['cyl_heights']):
                self._world.env.add_cylinder(pose, radius, height)
            for pose, extents in zip(wd['box_poses'], wd['box_extents']):
                self._world.env.add_box(pose, extents)

            default_sample = Sample(meta_data=params, T=bootstrap.T)
            for k, v in params['probcoll']['conditions']['default'].items():
                default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k)

            ### evaluate on grid
            dx, dy = 30, 30
            positions = np.dstack(
                np.meshgrid(np.linspace(-0.5, 3.0, dx),
                            np.linspace(1.0, -1.0, dy))).reshape(dx * dy, 2)

            samples = []
            for pos in positions:
                s = copy.deepcopy(default_sample)

                pos = np.concatenate(
                    (pos, [s.get_X(t=0, sub_state='position')[-1]]))
                s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position')
                s.set_O(self._agent.get_observation(s.get_X(t=0), noise=False),
                        t=slice(0, bootstrap.T))
                s.set_U(np.zeros((bootstrap.T, bootstrap.dU)),
                        t=slice(0, bootstrap.T))

                samples.append(s)

            cp_params = params['probcoll']['cost_probcoll']
            eval_cost = cp_params['eval_cost']
            pre_activation = cp_params['pre_activation']
            probs_mean_batch, probs_std_batch = bootstrap.eval_sample_batch(
                samples, num_avg=10, pre_activation=pre_activation)

            bootstrap.close()

            sigmoid = lambda x: (1. / (1. + np.exp(-x)))
            costs = []
            for probs_mean, probs_std in zip(probs_mean_batch,
                                             probs_std_batch):
                cost = 0.
                for t in [bootstrap.T - 1]:
                    speed = 1.
                    cost += eval(eval_cost)
                costs.append(cost / bootstrap.T)
            costs = np.array(costs)

            # if itr == 3:
            #     import IPython; IPython.embed()

            Xs = np.array([
                s.get_X(t=0, sub_state='position')[:2] for s in samples
            ]).reshape(dx, dy, 2)
            itrs_Xs.append(Xs)
            if pre_activation:
                probs_mean_batch = sigmoid(probs_mean_batch)
                # probs_std_batch = sigmoid(probs_std_batch)
                probs_std_batch /= probs_std_batch.max()
            itrs_prob_means.append(
                probs_mean_batch.max(axis=1).reshape(dx, dy))
            itrs_prob_stds.append(probs_std_batch.max(axis=1).reshape(dx, dy))
            itrs_costs.append(costs.reshape(dx, dy))

            # if itr >= 3:
            #     break

            itr += 1

        return itrs_Xs, itrs_prob_means, itrs_prob_stds, itrs_costs
Esempio n. 24
0
def plot_final_policy(exp_groups,
                      des_mps,
                      des_speed,
                      min_speed,
                      ax=None,
                      fs=20.,
                      save_path=None):
    dt = 0.5

    if ax is None:
        f, ax = plt.subplots(1, 1, figsize=(10, 3.5))
    else:
        f = ax.get_figure()

    exp_groups.insert(0, [{
        'folder': os.path.join(EXP_FOLDER, 'random'),
        'label': 'Random policy',
        'color': cm.Greys(0.5)
    }])

    for exp_num, exp in enumerate(flatten_list(exp_groups)):
        samples_folder = os.path.join(exp['folder'],
                                      'prediction/final_policy/')
        samples_fnames = [
            os.path.join(samples_folder, fname)
            for fname in os.listdir(samples_folder) if fname[-4:] == '.npz'
        ]
        assert (len(samples_fnames) == 1)
        samples_fname = samples_fnames[0]

        samples = Sample.load(samples_fname)
        distance_travelled = []
        for sample in samples:
            distance = dt * (des_mps / des_speed) * (sample.get_U()[:, 1] -
                                                     min_speed).sum()
            distance_travelled.append(distance)

        # ax.plot(range(len(distance_travelled)), distance_travelled,
        #         linestyle='',
        #         marker=exp['marker'],
        #         color=exp['color'],
        #         mew=exp['mew'])

        ax.bar(np.arange(len(distance_travelled)) + 0.15 * exp_num + 0.1,
               distance_travelled,
               width=0.15,
               color=exp['color'])

    for exp_group in exp_groups:
        ax.plot([], [],
                lw=5.,
                label=exp_group[0]['label'],
                color=exp_group[0]['color'])
    lgd = ax.legend(loc='upper right', fontsize=fs * 0.8)

    ax.set_xlabel(r'Starting position', fontsize=fs)
    ax.set_ylabel(r'Distance travelled (m)', fontsize=fs)

    N = len(distance_travelled)
    ax.set_xticks(np.arange(N) + 0.5)
    ax.set_xticklabels([str(x) for x in range(N)])
    # Hide major tick labels and customize minor tick labels
    ax.xaxis.set_minor_formatter(ticker.NullFormatter())
    ax.xaxis.set_major_locator(ticker.FixedLocator(np.arange(N) + 0.5))
    ax.xaxis.set_major_formatter(
        ticker.FixedFormatter([r'{0}'.format(str(x)) for x in range(N)]))
    for tick in ax.xaxis.get_major_ticks() + ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(fs)

    if save_path is not None:
        f.savefig(save_path, bbox_extra_artists=(lgd, ), bbox_inches='tight')
Esempio n. 25
0
 def _itr_save_samples(self, itr, samples, prefix=''):
     Sample.save(self._itr_samples_file(itr, prefix=prefix), samples)
Esempio n. 26
0
    def _run_rollout(self, itr):
        T = params['probcoll']['T']
        label_with_noise = params['probcoll']['label_with_noise']

        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            rep = 0
            while rep < self._conditions.repeats:
                self._logger.info(
                    '\t\tStarting cond {0} rep {1} itr {2}'.format(
                        cond, rep, itr))
                start = time.time()
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout, rollout_no_noise = self._agent.sample_policy(
                        x0, self._mpc_policy, T=1, only_noise=label_with_noise)

                    o = rollout.get_O(t=0)
                    if label_with_noise:
                        u = rollout.get_U(t=0)
                    else:
                        u = rollout_no_noise.get_U(t=0)

                    sample_T.set_U(u, t=t)
                    sample_T.set_O(o, t=t)

                    if not self._use_dynamics:
                        sample_T.set_X(rollout.get_X(t=0), t=t)

                    if self._world.is_collision(sample_T, t=t):
                        self._logger.warning(
                            '\t\t\tCrashed at t={0}'.format(t))
                        break

                    if self._use_dynamics:
                        if t < T - 1:
                            x_tp1 = self._dynamics.evolve(x0, u)
                            sample_T.set_X(x_tp1, t=t + 1)

                    if hasattr(self._mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, self._mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                sample = sample_T.match(slice(0, t + 1))
                world_info = self._get_world_info()
                mpc_info = self._mpc_policy.get_info()

                if not self._is_good_rollout(sample, t):
                    self._logger.warning(
                        '\t\t\tNot good rollout. Repeating rollout.'.format(t))
                    continue

                samples.append(sample)
                world_infos.append(world_info)
                mpc_infos.append(mpc_info)

                assert (samples[-1].isfinite())
                elapsed = time.time() - start
                self._logger.info(
                    '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)'
                    .format(cond, rep, elapsed, t * params['dt'] / elapsed))
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)

        self._reset_world(itr, 0, 0)  # leave the world as it was
Esempio n. 27
0
    def sample_policy(self,
                      x0,
                      policy,
                      T=None,
                      only_noise=False,
                      **policy_args):
        """
        Run the policy and collect the trajectory data

        :param x0: initial state
        :param policy: to execute
        :param policy_args: e.g. ref_traj, noise, etc
        :rtype Sample
        """
        if T is None:
            T = policy._T
        policy_sample = Sample(meta_data=params, T=T)
        policy_sample.set_X(x0, t=0)
        policy_sample_no_noise = Sample(meta_data=params, T=T)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            u_t, u_t_no_noise = policy.act(x_t, o_t, t)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)
            if not only_noise:
                policy_sample_no_noise.set_U(u_t_no_noise, t=t)

            # propagate dynamics
            if t < T - 1:
                x_tp1 = self._dynamics.evolve(x_t, u_t)
                policy_sample.set_X(x_tp1, t=t + 1)

        return policy_sample, policy_sample_no_noise
Esempio n. 28
0
    def plan(self,
             x0,
             obs,
             dynamics,
             cost_funcs,
             dU,
             T,
             init_traj=None,
             plot_env=None,
             init_cov=None,
             config=None):
        """
        :type init_traj: Sample
        :type dynamics: Dynamics
        :type cost_func: Cost
        """
        if config is None:
            config = self._config
        self.means, self.covs = [], []

        if init_traj is not None:
            mean = init_traj.get_U().ravel()
        elif 'init_u' in config:
            mean = config['init_u'] * T
        else:
            mean = np.zeros(dU * T)

        lower_bound = config['bounds'][
            'min'] * T if 'bounds' in config else -np.inf * np.ones(dU * T)
        upper_bound = config['bounds'][
            'max'] * T if 'bounds' in config else np.inf * np.ones(dU * T)

        ### run one iteration of CEM with extra samples
        # if init_cov is None:
        cov = config['init_var'] * np.eye(dU * T)
        # else:
        #     cov = config['init_var'] * init_cov
        mean, cov = self._cem_step(x0,
                                   obs,
                                   mean,
                                   cov,
                                   config['init_M'],
                                   config['K'],
                                   lower_bound,
                                   upper_bound,
                                   dU,
                                   T,
                                   dynamics,
                                   cost_funcs,
                                   plot_env=plot_env)
        self.means.append(mean)
        self.covs.append(cov)

        if plot_env:
            pass  # raw_input('CEM iter 0')

        ### remaining iterations
        for iter in xrange(1, config['iters']):
            mean, cov = self._cem_step(x0,
                                       obs,
                                       mean,
                                       cov,
                                       config['M'],
                                       config['K'],
                                       lower_bound,
                                       upper_bound,
                                       dU,
                                       T,
                                       dynamics,
                                       cost_funcs,
                                       plot_env=plot_env)
            self.means.append(mean)
            self.covs.append(cov)

            if plot_env:
                pass  # raw_input('CEM iter {0}'.format(iter))

        sample = Sample(meta_data=self._meta_data, T=T)
        U = mean.reshape(T, dU)
        U[:, -1] = 0.  # TODO hack
        sample.set_U(U, t=slice(0, T))
        sample.set_O(obs, t=0)
        sample.set_X(x0, t=0)
        sample.rollout(dynamics)

        return sample
Esempio n. 29
0
 def _itr_load_samples(self, itr):
     fname = os.path.join(self._itr_dir(itr),
                          'samples_itr_{0}.npz'.format(itr))
     return Sample.load(fname)