def _create_mpc(self, itr, x0):
        """ Must initialize MPC """
        sample0 = Sample(meta_data=params, T=1)
        sample0.set_X(x0, t=0)
        self._update_world(sample0, 0)

        self._logger.info('\t\t\tCreating MPC')

        if self._planner_type == 'primitives':
            additional_costs = []
            mpc_policy = PrimitivesMPCPolicyBebop2d(
                self._trajopt,
                self._cost_probcoll,
                additional_costs=additional_costs,
                meta_data=params,
                use_threads=False,
                plot=True,
                epsilon_greedy=params['prediction']['dagger']
                ['epsilon_greedy'])
        elif self._planner_type == 'teleop':
            mpc_policy = TeleopMPCPolicyBebop2d(params)
        else:
            raise NotImplementedError(
                'planner_type {0} not implemented for bebop2d'.format(
                    self._planner_type))

        return mpc_policy
    def _run_itr_analyze(self, itr):
        T = 1
        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            for rep in xrange(self._conditions.repeats):
                self._logger.info('\t\tStarting cond {0} rep {1}'.format(
                    cond, rep))
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                mpc_policy = self._create_mpc(itr, x0)
                control_noise = self._create_control_noise(
                )  # create each time b/c may not be memoryless

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout = self._agent.sample_policy(x0,
                                                        mpc_policy,
                                                        noise=control_noise,
                                                        T=1)

                    u = rollout.get_U(t=0)
                    o = rollout.get_O(t=0)
                    u_label = u

                    sample_T.set_U(u_label, t=t)
                    sample_T.set_O(o, t=t)

                    if hasattr(mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                samples.append(sample_T)
                world_infos.append(self._get_world_info())
                mpc_infos.append(mpc_policy.get_info())

                assert (samples[-1].isfinite())
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)
Beispiel #3
0
 def _get_sim_state(self, xt):
     state_sample = Sample(meta_data=params, T=1)
     state_msg = self.sim_state
     state = np.array([
         state_msg.position.x, state_msg.position.y, state_msg.position.z,
         state_msg.orientation.x, state_msg.orientation.y,
         state_msg.orientation.z, state_msg.orientation.w
     ])
     state_sample.set_X(state[:3], t=0, sub_state='position')
     state_sample.set_X(state[3:], t=0, sub_state='orientation')
     x = np.nan_to_num(xt) + np.nan_to_num(state_sample.get_X(t=0))
     return x
Beispiel #4
0
    def sample_policy(self, x0, policy, T=None, **policy_args):
        if T is None:
            T = policy._T
        policy_sample = Sample(meta_data=params, T=T)
        noise = policy_args.get('noise', ZeroNoise(params))

        rate = rospy.Rate(1. / params['dt'])
        policy_sample.set_X(x0, t=0)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            u_t = policy.act(x_t, o_t, t, noise=noise)
            if params['probcoll']['planner_type'] != 'teleop': # TODO hack
                self.execute_control(u_t)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)

            # propagate dynamics
            if t < T-1:
                x_tp1 = self._dynamics.evolve(x_t, u_t)
                policy_sample.set_X(x_tp1, t=t+1)
            rate.sleep()

            # see if collision in the past cycle
            policy_sample.set_O([int(self.coll_callback.get() is not None)], t=t, sub_obs='collision')

        return policy_sample
    def act(self, x, obs, t, noise, ref_traj=None):
        ### create desired path (radiates outward)
        T, dt, theta, speed = self._T, self._meta_data[
            'dt'], self.theta, self.speed
        traj = Sample(meta_data=self._meta_data, T=T)

        ### initialize
        traj.set_X(x, t=0)
        traj.set_O(obs, t=0)

        ### create desired path (radiates outward)
        linearvel = [speed * np.cos(theta), speed * np.sin(theta)]
        for t in xrange(T - 1):
            x_t = traj.get_X(t=t)
            x_tp1 = self._dynamics.evolve(x_t, linearvel)

            traj.set_X(x_tp1, t=t + 1)
            traj.set_U(linearvel, t=t)
        traj.set_U(linearvel, t=-1)

        self._curr_traj = traj

        u = traj.get_U(t=0)
        return u + noise.sample(u)
Beispiel #6
0
    def sample_policy(self,
                      x0,
                      policy,
                      T=None,
                      use_noise=True,
                      only_noise=False,
                      **policy_args):
        if T is None:
            T = policy._T
        rate = rospy.Rate(1. / params['dt'])
        policy_sample = Sample(meta_data=params, T=T)
        policy_sample.set_X(x0, t=0)
        policy_sample_no_noise = Sample(meta_data=params, T=T)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            if self.sim:
                x_t = self._get_sim_state(x_t)
            start = time.time()
            u_t, u_t_no_noise = policy.act(x_t, o_t, t, only_noise=only_noise)
            self._logger.debug(time.time() - start)
            # only execute control if no collision
            # TODO is this necessary
            if int(o_t[policy_sample.get_O_idxs(sub_obs='collision')][0]) == 0:
                if use_noise:
                    self.execute_control(u_t)
                else:
                    self.execute_control(u_t_no_noise)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)
            if not only_noise:
                policy_sample_no_noise.set_U(u_t_no_noise, t=t)

            # In sim we do not have cycles
            if self.sim:
                policy_sample.set_O([int(self.sim_coll)],
                                    t=t,
                                    sub_obs='collision')
            else:
                # propagate dynamics
                if t < T - 1:
                    x_tp1 = self._dynamics.evolve(x_t, u_t)
                    policy_sample.set_X(x_tp1, t=t + 1)

                rate.sleep()
                # see if collision in the past cycle
                policy_sample.set_O(
                    [int(self.coll_callback.get() is not None)],
                    t=t,
                    sub_obs='collision')

        return policy_sample, policy_sample_no_noise
Beispiel #7
0
    def sample_policy(self,
                      x0,
                      policy,
                      T=None,
                      only_noise=False,
                      **policy_args):
        """
        Run the policy and collect the trajectory data

        :param x0: initial state
        :param policy: to execute
        :param policy_args: e.g. ref_traj, noise, etc
        :rtype Sample
        """
        if T is None:
            T = policy._T
        policy_sample = Sample(meta_data=params, T=T)
        policy_sample.set_X(x0, t=0)
        policy_sample_no_noise = Sample(meta_data=params, T=T)
        for t in xrange(T):
            # get observation and act
            x_t = policy_sample.get_X(t=t)
            o_t = self.get_observation(x_t)
            u_t, u_t_no_noise = policy.act(x_t, o_t, t)

            # record
            policy_sample.set_X(x_t, t=t)
            policy_sample.set_O(o_t, t=t)
            policy_sample.set_U(u_t, t=t)
            if not only_noise:
                policy_sample_no_noise.set_U(u_t_no_noise, t=t)

            # propagate dynamics
            if t < T - 1:
                x_tp1 = self._dynamics.evolve(x_t, u_t)
                policy_sample.set_X(x_tp1, t=t + 1)

        return policy_sample, policy_sample_no_noise
Beispiel #8
0
    def plan(self,
             x0,
             obs,
             dynamics,
             cost_funcs,
             dU,
             T,
             init_traj=None,
             plot_env=None,
             init_cov=None,
             config=None):
        """
        :type init_traj: Sample
        :type dynamics: Dynamics
        :type cost_func: Cost
        """
        if config is None:
            config = self._config
        self.means, self.covs = [], []

        if init_traj is not None:
            mean = init_traj.get_U().ravel()
        elif 'init_u' in config:
            mean = config['init_u'] * T
        else:
            mean = np.zeros(dU * T)

        lower_bound = config['bounds'][
            'min'] * T if 'bounds' in config else -np.inf * np.ones(dU * T)
        upper_bound = config['bounds'][
            'max'] * T if 'bounds' in config else np.inf * np.ones(dU * T)

        ### run one iteration of CEM with extra samples
        # if init_cov is None:
        cov = config['init_var'] * np.eye(dU * T)
        # else:
        #     cov = config['init_var'] * init_cov
        mean, cov = self._cem_step(x0,
                                   obs,
                                   mean,
                                   cov,
                                   config['init_M'],
                                   config['K'],
                                   lower_bound,
                                   upper_bound,
                                   dU,
                                   T,
                                   dynamics,
                                   cost_funcs,
                                   plot_env=plot_env)
        self.means.append(mean)
        self.covs.append(cov)

        if plot_env:
            pass  # raw_input('CEM iter 0')

        ### remaining iterations
        for iter in xrange(1, config['iters']):
            mean, cov = self._cem_step(x0,
                                       obs,
                                       mean,
                                       cov,
                                       config['M'],
                                       config['K'],
                                       lower_bound,
                                       upper_bound,
                                       dU,
                                       T,
                                       dynamics,
                                       cost_funcs,
                                       plot_env=plot_env)
            self.means.append(mean)
            self.covs.append(cov)

            if plot_env:
                pass  # raw_input('CEM iter {0}'.format(iter))

        sample = Sample(meta_data=self._meta_data, T=T)
        U = mean.reshape(T, dU)
        U[:, -1] = 0.  # TODO hack
        sample.set_U(U, t=slice(0, T))
        sample.set_O(obs, t=0)
        sample.set_X(x0, t=0)
        sample.rollout(dynamics)

        return sample
Beispiel #9
0
    def _cem_step(self,
                  x0,
                  obs,
                  mean,
                  cov,
                  M,
                  K,
                  lower_bound,
                  upper_bound,
                  dU,
                  T,
                  dynamics,
                  cost_funcs,
                  plot_env=None):
        """
        :param mean: mean of trajectory controls Gaussian
        :param obs: observation
        :param cov: covariance of trajectory controls Gaussian
        :param M: sample M controls
        :param K: keep K lowest cost trajectories
        :return: mean, cov
        """
        ### sample trajectories
        M_controls = []
        while len(M_controls) < M:
            control = np.random.multivariate_normal(mean, cov)
            if np.all(control < upper_bound) and np.all(control > lower_bound):
                M_controls.append(control)
        samples = []
        for U_flat in M_controls:
            sample = Sample(meta_data=self._meta_data, T=T)
            U = U_flat.reshape(T, dU)
            U[:, -1] = 0.  # TODO hack
            sample.set_U(U, t=slice(0, T))
            sample.set_O(obs, t=0)
            sample.set_X(x0, t=0)
            for sub_control, u_sub in self._config['fixed'].items():
                sample.set_U(np.array([list(u_sub) * T]).T,
                             t=slice(0, T),
                             sub_control=sub_control)
            sample.rollout(dynamics)
            samples.append(sample)

        ### keep lowest K cost trajectories
        costs = [0.] * len(samples)
        for cost_func in cost_funcs:
            if hasattr(cost_func, 'eval_batch'):
                costs = [
                    c + cost_func_approx.J for c, cost_func_approx in zip(
                        costs, cost_func.eval_batch(samples))
                ]
            else:
                costs = [
                    c + cost_func.eval(sample).J
                    for c, sample in zip(costs, samples)
                ]
        samples_costs = zip(samples, costs)
        samples_costs = sorted(samples_costs, key=lambda x: x[1])
        K_samples = [x[0] for x in samples_costs[:K]]

        ### fit Gaussian
        data = np.vstack([s.get_U().ravel() for s in K_samples])
        new_mean = np.mean(data, axis=0)
        new_cov = np.cov(data, rowvar=0)

        if plot_env:
            plot_env.rave_env.clear_plots()
            colors = [(0, 1, 0)] * K + [(1, 0, 0)] * (M - K)
            for sample, color in zip([x[0] for x in samples_costs], colors):
                for t in xrange(T - 1):
                    plot_env.rave_env.plot_segment(
                        sample.get_X(t=t, sub_state='position'),
                        sample.get_X(t=t + 1, sub_state='position'),
                        color=color)

        return new_mean, new_cov
Beispiel #10
0
def rosbag_to_sample(rosbag_fname):
    print('Parsing {0}'.format(rosbag_fname))

    bebop_topics = params['bebop']['topics']

    ### read all messages
    bag = rosbag.Bag(rosbag_fname, 'r')
    msg_dict = collections.defaultdict(list)
    start_time = None
    for topic, msg, t in bag.read_messages():
        if start_time is None:
            start_time = t.to_sec()
        else:
            assert(start_time <= t.to_sec())
        msg_dict[topic].append((t.to_sec() - start_time, msg))
    bag.close()
    # get_measured_vel(rosbag_fname, msg_dict) # necessary hack
    if len(msg_dict[bebop_topics['start_rollout']]) == 0:
        print('\tNo start_rollout, returning None')
        return None
    ### sort by time
    for k, v in msg_dict.items():
        msg_dict[k] = sorted(v, key=lambda x: x[0])
    ### prune start and end
    is_coll = (bebop_topics['collision'] in msg_dict.keys())
    start_time = msg_dict[bebop_topics['start_rollout']][0][0]
    end_time = msg_dict[bebop_topics['collision']][0][0] if is_coll else np.inf
    for topic, time_msg in msg_dict.items():
        msg_dict[topic] = [(t, msg) for t, msg in time_msg if start_time <= t <= end_time]

    ### necessary info for Samples
    dt = params['dt']
    T = params['prediction']['dagger']['T']

    ### space by dt
    image_time_msgs = spaced_messages(msg_dict[bebop_topics['image']], dt)[-T:]
    cmd_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['cmd_vel']], dt)[-T:]
    # measured_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['measured_vel']], dt)[-T:]

    ### create sample
    sample = Sample(T=len(image_time_msgs), meta_data=params)
    cvb = cv_bridge.CvBridge()
    for t, (image_time_msg, cmd_vel_time_msg) in enumerate(zip(image_time_msgs, cmd_vel_time_msgs)):
        image_msg = image_time_msg[1]
        cmd_vel_msg = cmd_vel_time_msg[1]

        ### image
        im = AgentBebop2d.process_image(image_msg, cvb)
        sample.set_O(im.ravel(), t=t, sub_obs='camera')

        ### collision
        sample.set_O([0], t=t, sub_obs='collision')

        ### linearvel
        sample.set_X([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t)
        sample.set_U([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t)
    sample.set_O([int(is_coll)], t=-1, sub_obs='collision')
    # sample.set_O([int(np.random.random() > 0.5)], t=-1, sub_obs='collision') # tmp

    assert(sample.isfinite())

    return sample
Beispiel #11
0
    def _run_rollout(self, itr):
        T = params['probcoll']['T']
        label_with_noise = params['probcoll']['label_with_noise']

        samples = []
        world_infos = []
        mpc_infos = []

        self._conditions.reset()
        for cond in xrange(self._conditions.length):
            rep = 0
            while rep < self._conditions.repeats:
                self._logger.info(
                    '\t\tStarting cond {0} rep {1} itr {2}'.format(
                        cond, rep, itr))
                start = time.time()
                if (cond == 0 and rep == 0) or self._world.randomize:
                    self._reset_world(itr, cond, rep)

                x0 = self._conditions.get_cond(cond, rep=rep)
                sample_T = Sample(meta_data=params, T=T)
                sample_T.set_X(x0, t=0)

                for t in xrange(T):
                    self._update_world(sample_T, t)

                    x0 = sample_T.get_X(t=t)

                    rollout, rollout_no_noise = self._agent.sample_policy(
                        x0, self._mpc_policy, T=1, only_noise=label_with_noise)

                    o = rollout.get_O(t=0)
                    if label_with_noise:
                        u = rollout.get_U(t=0)
                    else:
                        u = rollout_no_noise.get_U(t=0)

                    sample_T.set_U(u, t=t)
                    sample_T.set_O(o, t=t)

                    if not self._use_dynamics:
                        sample_T.set_X(rollout.get_X(t=0), t=t)

                    if self._world.is_collision(sample_T, t=t):
                        self._logger.warning(
                            '\t\t\tCrashed at t={0}'.format(t))
                        break

                    if self._use_dynamics:
                        if t < T - 1:
                            x_tp1 = self._dynamics.evolve(x0, u)
                            sample_T.set_X(x_tp1, t=t + 1)

                    if hasattr(self._mpc_policy, '_curr_traj'):
                        self._world.update_visualization(
                            sample_T, self._mpc_policy._curr_traj, t)

                else:
                    self._logger.info('\t\t\tLasted for t={0}'.format(t))

                sample = sample_T.match(slice(0, t + 1))
                world_info = self._get_world_info()
                mpc_info = self._mpc_policy.get_info()

                if not self._is_good_rollout(sample, t):
                    self._logger.warning(
                        '\t\t\tNot good rollout. Repeating rollout.'.format(t))
                    continue

                samples.append(sample)
                world_infos.append(world_info)
                mpc_infos.append(mpc_info)

                assert (samples[-1].isfinite())
                elapsed = time.time() - start
                self._logger.info(
                    '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)'
                    .format(cond, rep, elapsed, t * params['dt'] / elapsed))
                rep += 1

        self._itr_save_samples(itr, samples)
        self._itr_save_worlds(itr, world_infos)
        self._itr_save_mpcs(itr, mpc_infos)

        self._reset_world(itr, 0, 0)  # leave the world as it was
Beispiel #12
0
    def _evaluate_probcoll_model(self):
        ### get prediction model evaluations for all iterations
        itr = 0
        itrs_Xs, itrs_prob_means, itrs_prob_stds, itrs_costs = [], [], [], []
        while True:
            ### load NN
            model_file = self._itr_model_file(itr)
            if not os.path.exists(model_file):
                break
            bootstrap = ProbcollModelPointquad(read_only=True, finalize=False)
            bootstrap.load(model_file=model_file)

            wd = self._load_world_file(itr, 0, 0)
            self._world.env.clear()
            for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'],
                                            wd['cyl_heights']):
                self._world.env.add_cylinder(pose, radius, height)
            for pose, extents in zip(wd['box_poses'], wd['box_extents']):
                self._world.env.add_box(pose, extents)

            default_sample = Sample(meta_data=params, T=bootstrap.T)
            for k, v in params['probcoll']['conditions']['default'].items():
                default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k)

            ### evaluate on grid
            dx, dy = 30, 30
            positions = np.dstack(
                np.meshgrid(np.linspace(-0.5, 3.0, dx),
                            np.linspace(1.0, -1.0, dy))).reshape(dx * dy, 2)

            samples = []
            for pos in positions:
                s = copy.deepcopy(default_sample)

                pos = np.concatenate(
                    (pos, [s.get_X(t=0, sub_state='position')[-1]]))
                s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position')
                s.set_O(self._agent.get_observation(s.get_X(t=0), noise=False),
                        t=slice(0, bootstrap.T))
                s.set_U(np.zeros((bootstrap.T, bootstrap.dU)),
                        t=slice(0, bootstrap.T))

                samples.append(s)

            cp_params = params['probcoll']['cost_probcoll']
            eval_cost = cp_params['eval_cost']
            pre_activation = cp_params['pre_activation']
            probs_mean_batch, probs_std_batch = bootstrap.eval_sample_batch(
                samples, num_avg=10, pre_activation=pre_activation)

            bootstrap.close()

            sigmoid = lambda x: (1. / (1. + np.exp(-x)))
            costs = []
            for probs_mean, probs_std in zip(probs_mean_batch,
                                             probs_std_batch):
                cost = 0.
                for t in [bootstrap.T - 1]:
                    speed = 1.
                    cost += eval(eval_cost)
                costs.append(cost / bootstrap.T)
            costs = np.array(costs)

            # if itr == 3:
            #     import IPython; IPython.embed()

            Xs = np.array([
                s.get_X(t=0, sub_state='position')[:2] for s in samples
            ]).reshape(dx, dy, 2)
            itrs_Xs.append(Xs)
            if pre_activation:
                probs_mean_batch = sigmoid(probs_mean_batch)
                # probs_std_batch = sigmoid(probs_std_batch)
                probs_std_batch /= probs_std_batch.max()
            itrs_prob_means.append(
                probs_mean_batch.max(axis=1).reshape(dx, dy))
            itrs_prob_stds.append(probs_std_batch.max(axis=1).reshape(dx, dy))
            itrs_costs.append(costs.reshape(dx, dy))

            # if itr >= 3:
            #     break

            itr += 1

        return itrs_Xs, itrs_prob_means, itrs_prob_stds, itrs_costs
Beispiel #13
0
    def _evaluate_probcoll_model_groundtruth(self):
        """
        For each primitive, execute the primitive for T timesteps to obtain ground truth, and compare label
        """
        ### get prediction model evaluations for all iterations
        itr = 0
        itrs_gt_crashes, itrs_pred_crashes = [], []
        positions, speeds_angles = None, None
        while True:
            ### load NN
            model_file = self._itr_model_file(itr)
            if not os.path.exists(model_file):
                break
            bootstrap = ProbcollModelPointquad(read_only=True, finalize=False)
            bootstrap.load(model_file=model_file)

            wd = self._load_world_file(itr, 0, 0)
            self._world.env.clear()
            for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'],
                                            wd['cyl_heights']):
                self._world.env.add_cylinder(pose, radius, height)
            for pose, extents in zip(wd['box_poses'], wd['box_extents']):
                self._world.env.add_box(pose, extents)

            ### create MPC
            cp_params = params['probcoll']['cost']
            cost_cp = CostProbcollPointquad(
                bootstrap,
                None,
                weight=float(cp_params['weight']),
                eval_cost=cp_params['eval_cost'],
                pre_activation=cp_params['pre_activation'])
            mpc_policy = PrimitivesMPCPolicyPointquad(self._trajopt,
                                                      cost_cp,
                                                      additional_costs=[],
                                                      meta_data=params,
                                                      use_threads=False,
                                                      plot=False)

            default_sample = Sample(meta_data=params, T=bootstrap.T)
            for k, v in params['probcoll']['conditions']['default'].items():
                default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k)

            ### speeds/angles
            speeds_angles = [(p.speed, p.theta)
                             for p in mpc_policy._mpc_policies]
            positions = []

            ### evaluate on grid
            dx, dy = 12, 5
            rollouts = []
            for i, xpos in enumerate(np.linspace(-0.5, 1.5, dx)):
                for j, ypos in enumerate(np.linspace(0.6, -0.6, dy)):
                    s = copy.deepcopy(default_sample)

                    pos = [xpos, ypos, s.get_X(t=0, sub_state='position')[-1]]
                    s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position')
                    s.set_O(self._agent.get_observation(s.get_X(t=0),
                                                        noise=False),
                            t=slice(0, bootstrap.T))

                    if np.sum(s.get_O(sub_obs='collision', t=0)) > 0:
                        continue

                    positions.append((xpos, ypos))

                    x0 = s.get_X(t=0)
                    rollouts_ij = [
                        self._agent.sample_policy(x0,
                                                  p,
                                                  noise=ZeroNoise(params),
                                                  T=bootstrap.T)
                        for p in mpc_policy._mpc_policies
                    ]
                    rollouts.append(rollouts_ij)

            rollouts = np.array(rollouts)
            gt_crashes = np.array([
                r.get_O(sub_obs='collision').sum() > 0
                for r in rollouts.ravel()
            ]).reshape(rollouts.shape)

            sigmoid = lambda x: 1. / (1. + np.exp(-x))
            pred_crashes = np.array([
                sigmoid(mean) > 0.5 for mean in bootstrap.eval_sample_batch(
                    rollouts.ravel(), num_avg=10, pre_activation=True)[0]
            ]).reshape(rollouts.shape)

            bootstrap.close()

            itrs_gt_crashes.append(gt_crashes)
            itrs_pred_crashes.append(pred_crashes)

            itr += 1

        return itrs_gt_crashes, itrs_pred_crashes, positions, speeds_angles
Beispiel #14
0
    def _evaluate_prediction_trajectory_model(self):
        ### get prediction model evaluations for all iterations
        itr = 0
        itrs_Xs, itrs_thetas, itrs_speeds, itrs_prob_means, itrs_prob_stds, itrs_costs = [], [], [], [], [], []
        while True:
            ### load NN
            model_file = self._itr_model_file(itr)
            if not os.path.exists(model_file):
                break
            bootstrap = ProbcollModelPointquad(read_only=True, finalize=False)
            bootstrap.load(model_file=model_file)

            wd = self._load_world_file(itr, 0, 0)
            self._world.env.clear()
            for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'],
                                            wd['cyl_heights']):
                self._world.env.add_cylinder(pose, radius, height)
            for pose, extents in zip(wd['box_poses'], wd['box_extents']):
                self._world.env.add_box(pose, extents)

            ### create MPC
            cp_params = params['probcoll']['cost_probcoll']
            cost_cp = CostProbcollPointquad(
                bootstrap,
                None,
                weight=float(cp_params['weight']),
                eval_cost=cp_params['eval_cost'],
                pre_activation=cp_params['pre_activation'])
            mpc_policy = PrimitivesMPCPolicyPointquad(self._trajopt,
                                                      cost_cp,
                                                      additional_costs=[],
                                                      meta_data=params,
                                                      use_threads=False,
                                                      plot=False)

            default_sample = Sample(meta_data=params, T=bootstrap.T)
            for k, v in params['probcoll']['conditions']['default'].items():
                default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k)

            ### evaluate on grid
            dx, dy = 20, 20
            samples = []
            itr_costs = []
            thetas, speeds = [], []
            for i, xpos in enumerate(np.linspace(-0.5, 1.5, dx)):
                for j, ypos in enumerate(np.linspace(0.6, -0.6, dy)):
                    s = copy.deepcopy(default_sample)

                    pos = [xpos, ypos, s.get_X(t=0, sub_state='position')[-1]]
                    s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position')
                    s.set_O(self._agent.get_observation(s.get_X(t=0),
                                                        noise=False),
                            t=slice(0, bootstrap.T))
                    x = s.get_X(t=0)
                    o = s.get_O(t=0)

                    # assume straight line trajectories
                    p_samples = []
                    p_speeds, p_thetas = [], []
                    for primitive in mpc_policy._mpc_policies:
                        p_speeds.append(primitive.speed)
                        p_thetas.append(primitive.theta)
                        primitive.act(x, o, t=0, noise=ZeroNoise(params))
                        p_samples.append(primitive._curr_traj)

                    prim_costs = mpc_policy._primitives_cost(p_samples)
                    min_cost_idx = np.argmin(prim_costs)

                    samples.append(p_samples[min_cost_idx])
                    itr_costs.append(prim_costs[min_cost_idx])
                    thetas.append(p_thetas[min_cost_idx])
                    speeds.append(p_speeds[min_cost_idx])

            ### evaluate on grid
            cp_params = params['probcoll']['cost_probcoll']
            eval_cost = cp_params['eval_cost']
            pre_activation = cp_params['pre_activation']
            probs_mean_batch, probs_std_batch = bootstrap.eval_sample_batch(
                samples, num_avg=10, pre_activation=pre_activation)

            bootstrap.close()

            sigmoid = lambda x: (1. / (1. + np.exp(-x)))
            # costs = []
            # for probs_mean, probs_std in zip(probs_mean_batch, probs_std_batch):
            #     cost = 0.
            #     for t in [bootstrap.T - 1]:
            #         speed = 1.
            #         cost += eval(eval_cost)
            #     costs.append(cost / bootstrap.T)
            # costs = np.array(costs)

            itrs_Xs.append(
                np.array(
                    [s.get_X(t=0, sub_state='position')[:2] for s in samples]))
            itrs_thetas.append(np.array(thetas))
            itrs_speeds.append(
                np.array(speeds) /
                (params['trajopt']['cost_velocity']['velocity'][0] * dx *
                 dy))  # normalize so fits into grid density
            # itrs_speeds.append(np.ones(len(speeds), dtype=float) / (dx * dy)) # TODO
            if pre_activation:
                probs_mean_batch = sigmoid(probs_mean_batch)
                # probs_std_batch = sigmoid(probs_std_batch)
                probs_std_batch /= probs_std_batch.max()
            itrs_prob_means.append(np.array(probs_mean_batch))
            itrs_prob_stds.append(np.array(probs_std_batch))
            itrs_costs.append(np.array(itr_costs))

            itr += 1

            # if itr >= 2: # TODO
            #     break

        return itrs_Xs, itrs_thetas, itrs_speeds, itrs_prob_means, itrs_prob_stds, itrs_costs
Beispiel #15
0
    def _run_testing(self, itr):
        if (itr != 0 and (itr == self._max_iter - 1 \
                or itr % params['world']['testing']['itr_freq'] == 0)):
            self._logger.info('Itr {0} testing'.format(itr))
            if self._agent.sim:
                #                if self._async_on:
                #                    self._logger.debug('Recovering probcoll model')
                #                    self._probcoll_model.recover()
                T = params['probcoll']['T']
                conditions = params['world']['testing']['positions']
                samples = []
                reset_pos, reset_quat = self._agent.get_sim_state_data()
                for cond in xrange(len(conditions)):
                    self._logger.info('\t\tTesting cond {0} itr {1}'.format(
                        cond, itr))
                    start = time.time()
                    self._agent.execute_control(None,
                                                reset=True,
                                                pos=conditions[cond])
                    x0 = self._conditions.get_cond(0)
                    sample_T = Sample(meta_data=params, T=T)
                    sample_T.set_X(x0, t=0)
                    for t in xrange(T):
                        x0 = sample_T.get_X(t=t)

                        rollout, rollout_no_noise = self._agent.sample_policy(
                            x0, self._mpc_policy, T=1, use_noise=False)

                        o = rollout.get_O(t=0)
                        u = rollout_no_noise.get_U(t=0)

                        sample_T.set_U(u, t=t)
                        sample_T.set_O(o, t=t)

                        if not self._use_dynamics:
                            sample_T.set_X(rollout.get_X(t=0), t=t)

                        if self._world.is_collision(sample_T, t=t):
                            self._logger.warning(
                                '\t\t\tCrashed at t={0}'.format(t))
                            break

                        if self._use_dynamics:
                            if t < T - 1:
                                x_tp1 = self._dynamics.evolve(x0, u)
                                sample_T.set_X(x_tp1, t=t + 1)

                    else:
                        self._logger.info('\t\t\tLasted for t={0}'.format(t))

                    sample = sample_T.match(slice(0, t + 1))

                    if not self._is_good_rollout(sample, t):
                        self._logger.warning(
                            '\t\t\tNot good rollout. Repeating rollout.'.
                            format(t))
                        continue

                    samples.append(sample)

                    assert (samples[-1].isfinite())
                    elapsed = time.time() - start
                    self._logger.info(
                        '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)'
                        .format(cond, elapsed, t * params['dt'] / elapsed))

                self._itr_save_samples(itr, samples, prefix='testing_')
                self._agent.execute_control(None,
                                            reset=True,
                                            pos=reset_pos,
                                            quat=reset_quat)
            else:
                pass