def sample_policy(self, x0, policy, T=None, **policy_args): if T is None: T = policy._T policy_sample = Sample(meta_data=params, T=T) noise = policy_args.get('noise', ZeroNoise(params)) rate = rospy.Rate(1. / params['dt']) policy_sample.set_X(x0, t=0) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) u_t = policy.act(x_t, o_t, t, noise=noise) if params['probcoll']['planner_type'] != 'teleop': # TODO hack self.execute_control(u_t) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) # propagate dynamics if t < T-1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t+1) rate.sleep() # see if collision in the past cycle policy_sample.set_O([int(self.coll_callback.get() is not None)], t=t, sub_obs='collision') return policy_sample
def _evaluate_probcoll_model_on_samples(self, bootstrap, itr, samples, mpcs): ### load NN H = params['model']['T'] model_file = self._itr_model_file(itr) if not ProbcollModelPointquad.checkpoint_exists(model_file): return [None] * len(samples), [None] * len(samples) bootstrap.load(model_file=model_file) means, stds = [], [] for sample, mpc in zip(samples, mpcs): eval_samples = [] for o_t, U in zip(sample.get_O(), mpc['controls']): eval_sample = Sample(T=H, meta_data=params) eval_sample.set_U(U, t=slice(0, H)) eval_sample.set_O(o_t, t=0) eval_samples.append(eval_sample) mean, std = bootstrap.eval_sample_batch(eval_samples, num_avg=10, pre_activation=True) means.append(mean) stds.append(std) return means, stds
def _run_itr_analyze(self, itr): T = 1 samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): for rep in xrange(self._conditions.repeats): self._logger.info('\t\tStarting cond {0} rep {1}'.format( cond, rep)) if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) mpc_policy = self._create_mpc(itr, x0) control_noise = self._create_control_noise( ) # create each time b/c may not be memoryless for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout = self._agent.sample_policy(x0, mpc_policy, noise=control_noise, T=1) u = rollout.get_U(t=0) o = rollout.get_O(t=0) u_label = u sample_T.set_U(u_label, t=t) sample_T.set_O(o, t=t) if hasattr(mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) samples.append(sample_T) world_infos.append(self._get_world_info()) mpc_infos.append(mpc_policy.get_info()) assert (samples[-1].isfinite()) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos)
def sample_policy(self, x0, policy, T=None, use_noise=True, only_noise=False, **policy_args): if T is None: T = policy._T rate = rospy.Rate(1. / params['dt']) policy_sample = Sample(meta_data=params, T=T) policy_sample.set_X(x0, t=0) policy_sample_no_noise = Sample(meta_data=params, T=T) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) if self.sim: x_t = self._get_sim_state(x_t) start = time.time() u_t, u_t_no_noise = policy.act(x_t, o_t, t, only_noise=only_noise) self._logger.debug(time.time() - start) # only execute control if no collision # TODO is this necessary if int(o_t[policy_sample.get_O_idxs(sub_obs='collision')][0]) == 0: if use_noise: self.execute_control(u_t) else: self.execute_control(u_t_no_noise) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) if not only_noise: policy_sample_no_noise.set_U(u_t_no_noise, t=t) # In sim we do not have cycles if self.sim: policy_sample.set_O([int(self.sim_coll)], t=t, sub_obs='collision') else: # propagate dynamics if t < T - 1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t + 1) rate.sleep() # see if collision in the past cycle policy_sample.set_O( [int(self.coll_callback.get() is not None)], t=t, sub_obs='collision') return policy_sample, policy_sample_no_noise
def reset(self, back_up, itr=None, cond=None, rep=None, record=True): # self._agent.execute_control(None) # stop the car assert(itr is not None and cond is not None and rep is not None) # TODO add sim backup ### back car up straight and slow if back_up: sample = Sample(meta_data=params, T=2) sample.set_U([np.random.uniform(*self.wp['back_up']['cmd_steer'])], t=0, sub_control='cmd_steer') sample.set_U([self.wp['back_up']['cmd_vel']], t=0, sub_control='cmd_vel') u = sample.get_U(t=0) if self._agent.sim: if self._agent.sim_coll and self._agent.sim_last_coll: self._logger.info('Resetting the car') self._agent.execute_control(None, reset=True) elif self._agent.sim_coll: # TODO add backup logic for not sim self._logger.info('Backing the car up') for _ in xrange(int(self.wp['back_up']['duration'] / params['dt'])): self._agent.execute_control(u) if self._agent.sim_coll: break for _ in xrange(int(1.0 / params['dt'])): self._agent.execute_control(None) else: self._logger.info('Backing the car up') start = time.time() while time.time() - start < self.wp['back_up']['duration']: self._agent.execute_control(u) self._agent.execute_control(None) time.sleep(1.0) else: self._agent.execute_control(None, reset=True) ### TODO add a flag self._ros_reset_pub.publish(std_msgs.msg.Empty()) ### record if record: self._start_record_bag(itr, cond, rep) if not self._agent.sim: ### reset indicators self._ros_collision.get() self.num_collisions = 0
def _create_primitives(self): des_vel = params['planning']['cost_velocity']['velocity'] weights = params['planning']['cost_velocity']['weights'] assert (weights[1] == 0 and weights[2] == 0) thetas = np.linspace(-np.pi / 2., np.pi / 2., 19) speeds = np.linspace(0.1, 1.0, 10) * des_vel[0] samples = [] for theta in thetas: for speed in speeds: sample = Sample(T=self._H) linearvel = [speed * np.cos(theta), speed * np.sin(theta), 0.] sample.set_U(linearvel, t=slice(0, self._H)) samples.append(sample) return samples
def sample_policy(self, x0, policy, T=None, only_noise=False, **policy_args): """ Run the policy and collect the trajectory data :param x0: initial state :param policy: to execute :param policy_args: e.g. ref_traj, noise, etc :rtype Sample """ if T is None: T = policy._T policy_sample = Sample(meta_data=params, T=T) policy_sample.set_X(x0, t=0) policy_sample_no_noise = Sample(meta_data=params, T=T) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) u_t, u_t_no_noise = policy.act(x_t, o_t, t) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) if not only_noise: policy_sample_no_noise.set_U(u_t_no_noise, t=t) # propagate dynamics if t < T - 1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t + 1) return policy_sample, policy_sample_no_noise
def act(self, x, obs, t, noise, ref_traj=None): ### create desired path (radiates outward) T, dt, theta, speed = self._T, self._meta_data[ 'dt'], self.theta, self.speed traj = Sample(meta_data=self._meta_data, T=T) ### initialize traj.set_X(x, t=0) traj.set_O(obs, t=0) ### create desired path (radiates outward) linearvel = [speed * np.cos(theta), speed * np.sin(theta)] for t in xrange(T - 1): x_t = traj.get_X(t=t) x_tp1 = self._dynamics.evolve(x_t, linearvel) traj.set_X(x_tp1, t=t + 1) traj.set_U(linearvel, t=t) traj.set_U(linearvel, t=-1) self._curr_traj = traj u = traj.get_U(t=0) return u + noise.sample(u)
def plan(self, x0, obs, dynamics, cost_funcs, dU, T, init_traj=None, plot_env=None, init_cov=None, config=None): """ :type init_traj: Sample :type dynamics: Dynamics :type cost_func: Cost """ if config is None: config = self._config self.means, self.covs = [], [] if init_traj is not None: mean = init_traj.get_U().ravel() elif 'init_u' in config: mean = config['init_u'] * T else: mean = np.zeros(dU * T) lower_bound = config['bounds'][ 'min'] * T if 'bounds' in config else -np.inf * np.ones(dU * T) upper_bound = config['bounds'][ 'max'] * T if 'bounds' in config else np.inf * np.ones(dU * T) ### run one iteration of CEM with extra samples # if init_cov is None: cov = config['init_var'] * np.eye(dU * T) # else: # cov = config['init_var'] * init_cov mean, cov = self._cem_step(x0, obs, mean, cov, config['init_M'], config['K'], lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=plot_env) self.means.append(mean) self.covs.append(cov) if plot_env: pass # raw_input('CEM iter 0') ### remaining iterations for iter in xrange(1, config['iters']): mean, cov = self._cem_step(x0, obs, mean, cov, config['M'], config['K'], lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=plot_env) self.means.append(mean) self.covs.append(cov) if plot_env: pass # raw_input('CEM iter {0}'.format(iter)) sample = Sample(meta_data=self._meta_data, T=T) U = mean.reshape(T, dU) U[:, -1] = 0. # TODO hack sample.set_U(U, t=slice(0, T)) sample.set_O(obs, t=0) sample.set_X(x0, t=0) sample.rollout(dynamics) return sample
def _cem_step(self, x0, obs, mean, cov, M, K, lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=None): """ :param mean: mean of trajectory controls Gaussian :param obs: observation :param cov: covariance of trajectory controls Gaussian :param M: sample M controls :param K: keep K lowest cost trajectories :return: mean, cov """ ### sample trajectories M_controls = [] while len(M_controls) < M: control = np.random.multivariate_normal(mean, cov) if np.all(control < upper_bound) and np.all(control > lower_bound): M_controls.append(control) samples = [] for U_flat in M_controls: sample = Sample(meta_data=self._meta_data, T=T) U = U_flat.reshape(T, dU) U[:, -1] = 0. # TODO hack sample.set_U(U, t=slice(0, T)) sample.set_O(obs, t=0) sample.set_X(x0, t=0) for sub_control, u_sub in self._config['fixed'].items(): sample.set_U(np.array([list(u_sub) * T]).T, t=slice(0, T), sub_control=sub_control) sample.rollout(dynamics) samples.append(sample) ### keep lowest K cost trajectories costs = [0.] * len(samples) for cost_func in cost_funcs: if hasattr(cost_func, 'eval_batch'): costs = [ c + cost_func_approx.J for c, cost_func_approx in zip( costs, cost_func.eval_batch(samples)) ] else: costs = [ c + cost_func.eval(sample).J for c, sample in zip(costs, samples) ] samples_costs = zip(samples, costs) samples_costs = sorted(samples_costs, key=lambda x: x[1]) K_samples = [x[0] for x in samples_costs[:K]] ### fit Gaussian data = np.vstack([s.get_U().ravel() for s in K_samples]) new_mean = np.mean(data, axis=0) new_cov = np.cov(data, rowvar=0) if plot_env: plot_env.rave_env.clear_plots() colors = [(0, 1, 0)] * K + [(1, 0, 0)] * (M - K) for sample, color in zip([x[0] for x in samples_costs], colors): for t in xrange(T - 1): plot_env.rave_env.plot_segment( sample.get_X(t=t, sub_state='position'), sample.get_X(t=t + 1, sub_state='position'), color=color) return new_mean, new_cov
def rosbag_to_sample(rosbag_fname): print('Parsing {0}'.format(rosbag_fname)) bebop_topics = params['bebop']['topics'] ### read all messages bag = rosbag.Bag(rosbag_fname, 'r') msg_dict = collections.defaultdict(list) start_time = None for topic, msg, t in bag.read_messages(): if start_time is None: start_time = t.to_sec() else: assert(start_time <= t.to_sec()) msg_dict[topic].append((t.to_sec() - start_time, msg)) bag.close() # get_measured_vel(rosbag_fname, msg_dict) # necessary hack if len(msg_dict[bebop_topics['start_rollout']]) == 0: print('\tNo start_rollout, returning None') return None ### sort by time for k, v in msg_dict.items(): msg_dict[k] = sorted(v, key=lambda x: x[0]) ### prune start and end is_coll = (bebop_topics['collision'] in msg_dict.keys()) start_time = msg_dict[bebop_topics['start_rollout']][0][0] end_time = msg_dict[bebop_topics['collision']][0][0] if is_coll else np.inf for topic, time_msg in msg_dict.items(): msg_dict[topic] = [(t, msg) for t, msg in time_msg if start_time <= t <= end_time] ### necessary info for Samples dt = params['dt'] T = params['prediction']['dagger']['T'] ### space by dt image_time_msgs = spaced_messages(msg_dict[bebop_topics['image']], dt)[-T:] cmd_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['cmd_vel']], dt)[-T:] # measured_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['measured_vel']], dt)[-T:] ### create sample sample = Sample(T=len(image_time_msgs), meta_data=params) cvb = cv_bridge.CvBridge() for t, (image_time_msg, cmd_vel_time_msg) in enumerate(zip(image_time_msgs, cmd_vel_time_msgs)): image_msg = image_time_msg[1] cmd_vel_msg = cmd_vel_time_msg[1] ### image im = AgentBebop2d.process_image(image_msg, cvb) sample.set_O(im.ravel(), t=t, sub_obs='camera') ### collision sample.set_O([0], t=t, sub_obs='collision') ### linearvel sample.set_X([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t) sample.set_U([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t) sample.set_O([int(is_coll)], t=-1, sub_obs='collision') # sample.set_O([int(np.random.random() > 0.5)], t=-1, sub_obs='collision') # tmp assert(sample.isfinite()) return sample
def _run_rollout(self, itr): T = params['probcoll']['T'] label_with_noise = params['probcoll']['label_with_noise'] samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): rep = 0 while rep < self._conditions.repeats: self._logger.info( '\t\tStarting cond {0} rep {1} itr {2}'.format( cond, rep, itr)) start = time.time() if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, only_noise=label_with_noise) o = rollout.get_O(t=0) if label_with_noise: u = rollout.get_U(t=0) else: u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) if hasattr(self._mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, self._mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) world_info = self._get_world_info() mpc_info = self._mpc_policy.get_info() if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'.format(t)) continue samples.append(sample) world_infos.append(world_info) mpc_infos.append(mpc_info) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)' .format(cond, rep, elapsed, t * params['dt'] / elapsed)) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos) self._reset_world(itr, 0, 0) # leave the world as it was
def _run_testing(self, itr): if (itr != 0 and (itr == self._max_iter - 1 \ or itr % params['world']['testing']['itr_freq'] == 0)): self._logger.info('Itr {0} testing'.format(itr)) if self._agent.sim: # if self._async_on: # self._logger.debug('Recovering probcoll model') # self._probcoll_model.recover() T = params['probcoll']['T'] conditions = params['world']['testing']['positions'] samples = [] reset_pos, reset_quat = self._agent.get_sim_state_data() for cond in xrange(len(conditions)): self._logger.info('\t\tTesting cond {0} itr {1}'.format( cond, itr)) start = time.time() self._agent.execute_control(None, reset=True, pos=conditions[cond]) x0 = self._conditions.get_cond(0) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, use_noise=False) o = rollout.get_O(t=0) u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'. format(t)) continue samples.append(sample) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)' .format(cond, elapsed, t * params['dt'] / elapsed)) self._itr_save_samples(itr, samples, prefix='testing_') self._agent.execute_control(None, reset=True, pos=reset_pos, quat=reset_quat) else: pass