def _create_mpc(self, itr, x0): """ Must initialize MPC """ sample0 = Sample(meta_data=params, T=1) sample0.set_X(x0, t=0) self._update_world(sample0, 0) self._logger.info('\t\t\tCreating MPC') if self._planner_type == 'primitives': additional_costs = [] mpc_policy = PrimitivesMPCPolicyBebop2d( self._trajopt, self._cost_probcoll, additional_costs=additional_costs, meta_data=params, use_threads=False, plot=True, epsilon_greedy=params['prediction']['dagger'] ['epsilon_greedy']) elif self._planner_type == 'teleop': mpc_policy = TeleopMPCPolicyBebop2d(params) else: raise NotImplementedError( 'planner_type {0} not implemented for bebop2d'.format( self._planner_type)) return mpc_policy
def closest_collision_distance(self, x): sample = Sample(meta_data=params, T=params['T']) pos = x[sample.get_X_idxs(sub_state='position')] quat = x[sample.get_X_idxs(sub_state='orientation')] pose = posquat_to_pose(pos, quat) robotpos_dist_contactpos = self.rave_env.closest_collision(pose=pose) if robotpos_dist_contactpos is None: return 1e3 _, dist, _ = robotpos_dist_contactpos return dist
def _run_itr_analyze(self, itr): T = 1 samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): for rep in xrange(self._conditions.repeats): self._logger.info('\t\tStarting cond {0} rep {1}'.format( cond, rep)) if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) mpc_policy = self._create_mpc(itr, x0) control_noise = self._create_control_noise( ) # create each time b/c may not be memoryless for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout = self._agent.sample_policy(x0, mpc_policy, noise=control_noise, T=1) u = rollout.get_U(t=0) o = rollout.get_O(t=0) u_label = u sample_T.set_U(u_label, t=t) sample_T.set_O(o, t=t) if hasattr(mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) samples.append(sample_T) world_infos.append(self._get_world_info()) mpc_infos.append(mpc_policy.get_info()) assert (samples[-1].isfinite()) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos)
def get_observation(self, x): obs_sample = Sample(meta_data=params, T=2) ### collision coll_time = self.coll_callback.get() is_coll = coll_time is not None obs_sample.set_O([int(is_coll)], t=0, sub_obs='collision') ### camera image_msgs = self.image_callback.get() assert(len(image_msgs) > 0) ### keep only those before the collision if is_coll: image_msgs_filt = [im for im in image_msgs if im.header.stamp.to_sec() < coll_time.to_sec()] if len(image_msgs_filt) == 0: image_msgs_filt = [image_msgs[-1]] image_msgs = image_msgs_filt image_msg = image_msgs[-1] im = AgentBebop2d.process_image(image_msg, self.cv_bridge) # im = np.zeros((params['O']['camera']['height'], params['O']['camera']['width']), dtype=np.float32) obs_sample.set_O(im.ravel(), t=0, sub_obs='camera') ### publish image for debugging self.im_debug_pub.publish(self.cv_bridge.cv2_to_imgmsg((255. * im).astype(np.uint8), 'mono8')) return obs_sample.get_O(t=0)
def _itr_load_testing_samples(self, itr): fname = os.path.join(self._itr_dir(itr), 'testing_samples_itr_{0}.npz'.format(itr)) if os.path.exists(fname): return Sample.load(fname) elif not os.path.exists(self._itr_dir(itr)): raise Exception() else: return None
def _create_primitives(self): des_vel = params['planning']['cost_velocity']['velocity'] weights = params['planning']['cost_velocity']['weights'] assert (weights[1] == 0 and weights[2] == 0) thetas = np.linspace(-np.pi / 2., np.pi / 2., 19) speeds = np.linspace(0.1, 1.0, 10) * des_vel[0] samples = [] for theta in thetas: for speed in speeds: sample = Sample(T=self._H) linearvel = [speed * np.cos(theta), speed * np.sin(theta), 0.] sample.set_U(linearvel, t=slice(0, self._H)) samples.append(sample) return samples
def _evaluate_probcoll_model_on_samples(self, bootstrap, itr, samples, mpcs): ### load NN H = params['model']['T'] model_file = self._itr_model_file(itr) if not ProbcollModelPointquad.checkpoint_exists(model_file): return [None] * len(samples), [None] * len(samples) bootstrap.load(model_file=model_file) means, stds = [], [] for sample, mpc in zip(samples, mpcs): eval_samples = [] for o_t, U in zip(sample.get_O(), mpc['controls']): eval_sample = Sample(T=H, meta_data=params) eval_sample.set_U(U, t=slice(0, H)) eval_sample.set_O(o_t, t=0) eval_samples.append(eval_sample) mean, std = bootstrap.eval_sample_batch(eval_samples, num_avg=10, pre_activation=True) means.append(mean) stds.append(std) return means, stds
def _get_sim_state(self, xt): state_sample = Sample(meta_data=params, T=1) state_msg = self.sim_state state = np.array([ state_msg.position.x, state_msg.position.y, state_msg.position.z, state_msg.orientation.x, state_msg.orientation.y, state_msg.orientation.z, state_msg.orientation.w ]) state_sample.set_X(state[:3], t=0, sub_state='position') state_sample.set_X(state[3:], t=0, sub_state='orientation') x = np.nan_to_num(xt) + np.nan_to_num(state_sample.get_X(t=0)) return x
def execute_control(self, u, reset=False, pos=None, quat=None): if u is not None: s = Sample(meta_data=params, T=2) steer = u[s.get_U_idxs('cmd_steer')][0] vel = u[s.get_U_idxs('cmd_vel')][0] self.cmd_steer_pub.publish(std_msgs.msg.Float32(steer)) self.cmd_vel_pub.publish(std_msgs.msg.Float32(vel)) if self.sim: data = self.srv(steer=steer, vel=vel, reset=reset) else: self.cmd_steer_pub.publish(std_msgs.msg.Float32(49.5)) self.cmd_vel_pub.publish(std_msgs.msg.Float32(0.)) if self.sim: if reset: if quat is None: quat = [0.0, 0.0, 0.0, 0.0] if pos is None: if len(params['world']['testing']['positions']) > 0: pos = params['world']['testing']['positions'][ np.random.randint( len(params['world']['testing'] ['positions']))] else: pos = [0.0, 0.0, 0.0] pose = geometry_msgs.msg.Pose() pose.position.x, pose.position.y, pose.position.z = pos pose.orientation.x, pose.orientation.y, pose.orientation.z, pose.orientation.w = quat data = self.srv(reset=True, pose=pose) else: data = self.srv() if self.sim: self.sim_last_coll = self.sim_coll self.sim_coll = data.coll self.sim_image = data.image self.sim_depth = data.depth self.sim_back_image = data.back_image self.sim_back_depth = data.back_depth self.sim_state = data.pose
def get_observation(self, x): obs_sample = Sample(meta_data=params, T=2) if self.sim: is_coll = self.sim_coll image_msg = self.sim_image depth_msg = self.sim_depth back_image_msg = self.sim_back_image back_depth_msg = self.sim_back_depth else: ### collision coll_time = self.coll_callback.get() is_coll = coll_time is not None ### camera image_msgs = self.image_callback.get() assert (len(image_msgs) > 0) ### keep only those before the collision if is_coll: image_msgs_filt = [ im for im in image_msgs if im.header.stamp.to_sec() < coll_time.to_sec() ] if len(image_msgs_filt) == 0: image_msgs_filt = [image_msgs[-1]] image_msgs = image_msgs_filt image_msg = image_msgs[-1] if params['O'].get('use_depth', False): im = AgentRCcar.process_depth(depth_msg, self.cv_bridge) back_im = AgentRCcar.process_depth(back_depth_msg, self.cv_bridge) else: im = AgentRCcar.process_image(image_msg, self.cv_bridge) back_im = AgentRCcar.process_image(back_image_msg, self.cv_bridge) ros_image = self.cv_bridge.cv2_to_imgmsg(im, "mono8") self.pred_image_pub.publish(ros_image) obs_sample.set_O(im.ravel(), t=0, sub_obs='camera') obs_sample.set_O(back_im.ravel(), t=0, sub_obs='back_camera') obs_sample.set_O([int(is_coll)], t=0, sub_obs='collision') return obs_sample.get_O(t=0)
def reset(self, back_up, itr=None, cond=None, rep=None, record=True): # self._agent.execute_control(None) # stop the car assert(itr is not None and cond is not None and rep is not None) # TODO add sim backup ### back car up straight and slow if back_up: sample = Sample(meta_data=params, T=2) sample.set_U([np.random.uniform(*self.wp['back_up']['cmd_steer'])], t=0, sub_control='cmd_steer') sample.set_U([self.wp['back_up']['cmd_vel']], t=0, sub_control='cmd_vel') u = sample.get_U(t=0) if self._agent.sim: if self._agent.sim_coll and self._agent.sim_last_coll: self._logger.info('Resetting the car') self._agent.execute_control(None, reset=True) elif self._agent.sim_coll: # TODO add backup logic for not sim self._logger.info('Backing the car up') for _ in xrange(int(self.wp['back_up']['duration'] / params['dt'])): self._agent.execute_control(u) if self._agent.sim_coll: break for _ in xrange(int(1.0 / params['dt'])): self._agent.execute_control(None) else: self._logger.info('Backing the car up') start = time.time() while time.time() - start < self.wp['back_up']['duration']: self._agent.execute_control(u) self._agent.execute_control(None) time.sleep(1.0) else: self._agent.execute_control(None, reset=True) ### TODO add a flag self._ros_reset_pub.publish(std_msgs.msg.Empty()) ### record if record: self._start_record_bag(itr, cond, rep) if not self._agent.sim: ### reset indicators self._ros_collision.get() self.num_collisions = 0
def _load_samples(self): samples_itrs = [] itr = 0 while True: try: samples_itrs.append(self._itr_load_samples(itr)) itr += 1 except: break self._logger.info('Loaded {0} iterations of samples'.format( len(samples_itrs))) ### load initial dataset init_data_folder = params['probcoll'].get('init_data', None) if init_data_folder is not None: if itr == 0: samples_itrs.append([]) num_init_samples = 0 fnames = [ os.path.join(init_data_folder, fname) for fname in os.listdir(init_data_folder) ] for fname in fnames: try: samples = Sample.load(fname) except: continue self._logger.debug('Loaded samples from {0}'.format(fname)) samples_itrs[0] += samples num_init_samples += len(samples) self._logger.info('Loaded initial dataset of {0} samples'.format( num_init_samples)) return samples_itrs
def sample_policy(self, x0, policy, T=None, use_noise=True, only_noise=False, **policy_args): if T is None: T = policy._T rate = rospy.Rate(1. / params['dt']) policy_sample = Sample(meta_data=params, T=T) policy_sample.set_X(x0, t=0) policy_sample_no_noise = Sample(meta_data=params, T=T) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) if self.sim: x_t = self._get_sim_state(x_t) start = time.time() u_t, u_t_no_noise = policy.act(x_t, o_t, t, only_noise=only_noise) self._logger.debug(time.time() - start) # only execute control if no collision # TODO is this necessary if int(o_t[policy_sample.get_O_idxs(sub_obs='collision')][0]) == 0: if use_noise: self.execute_control(u_t) else: self.execute_control(u_t_no_noise) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) if not only_noise: policy_sample_no_noise.set_U(u_t_no_noise, t=t) # In sim we do not have cycles if self.sim: policy_sample.set_O([int(self.sim_coll)], t=t, sub_obs='collision') else: # propagate dynamics if t < T - 1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t + 1) rate.sleep() # see if collision in the past cycle policy_sample.set_O( [int(self.coll_callback.get() is not None)], t=t, sub_obs='collision') return policy_sample, policy_sample_no_noise
def _run_testing(self, itr): if (itr != 0 and (itr == self._max_iter - 1 \ or itr % params['world']['testing']['itr_freq'] == 0)): self._logger.info('Itr {0} testing'.format(itr)) if self._agent.sim: # if self._async_on: # self._logger.debug('Recovering probcoll model') # self._probcoll_model.recover() T = params['probcoll']['T'] conditions = params['world']['testing']['positions'] samples = [] reset_pos, reset_quat = self._agent.get_sim_state_data() for cond in xrange(len(conditions)): self._logger.info('\t\tTesting cond {0} itr {1}'.format( cond, itr)) start = time.time() self._agent.execute_control(None, reset=True, pos=conditions[cond]) x0 = self._conditions.get_cond(0) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, use_noise=False) o = rollout.get_O(t=0) u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'. format(t)) continue samples.append(sample) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} of testing ({1:.1f}s, {2:.3f}x real-time)' .format(cond, elapsed, t * params['dt'] / elapsed)) self._itr_save_samples(itr, samples, prefix='testing_') self._agent.execute_control(None, reset=True, pos=reset_pos, quat=reset_quat) else: pass
def sample_policy(self, x0, policy, T=None, **policy_args): if T is None: T = policy._T policy_sample = Sample(meta_data=params, T=T) noise = policy_args.get('noise', ZeroNoise(params)) rate = rospy.Rate(1. / params['dt']) policy_sample.set_X(x0, t=0) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) u_t = policy.act(x_t, o_t, t, noise=noise) if params['probcoll']['planner_type'] != 'teleop': # TODO hack self.execute_control(u_t) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) # propagate dynamics if t < T-1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t+1) rate.sleep() # see if collision in the past cycle policy_sample.set_O([int(self.coll_callback.get() is not None)], t=t, sub_obs='collision') return policy_sample
def _evaluate_prediction_trajectory_model(self): ### get prediction model evaluations for all iterations itr = 0 itrs_Xs, itrs_thetas, itrs_speeds, itrs_prob_means, itrs_prob_stds, itrs_costs = [], [], [], [], [], [] while True: ### load NN model_file = self._itr_model_file(itr) if not os.path.exists(model_file): break bootstrap = ProbcollModelPointquad(read_only=True, finalize=False) bootstrap.load(model_file=model_file) wd = self._load_world_file(itr, 0, 0) self._world.env.clear() for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'], wd['cyl_heights']): self._world.env.add_cylinder(pose, radius, height) for pose, extents in zip(wd['box_poses'], wd['box_extents']): self._world.env.add_box(pose, extents) ### create MPC cp_params = params['probcoll']['cost_probcoll'] cost_cp = CostProbcollPointquad( bootstrap, None, weight=float(cp_params['weight']), eval_cost=cp_params['eval_cost'], pre_activation=cp_params['pre_activation']) mpc_policy = PrimitivesMPCPolicyPointquad(self._trajopt, cost_cp, additional_costs=[], meta_data=params, use_threads=False, plot=False) default_sample = Sample(meta_data=params, T=bootstrap.T) for k, v in params['probcoll']['conditions']['default'].items(): default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k) ### evaluate on grid dx, dy = 20, 20 samples = [] itr_costs = [] thetas, speeds = [], [] for i, xpos in enumerate(np.linspace(-0.5, 1.5, dx)): for j, ypos in enumerate(np.linspace(0.6, -0.6, dy)): s = copy.deepcopy(default_sample) pos = [xpos, ypos, s.get_X(t=0, sub_state='position')[-1]] s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position') s.set_O(self._agent.get_observation(s.get_X(t=0), noise=False), t=slice(0, bootstrap.T)) x = s.get_X(t=0) o = s.get_O(t=0) # assume straight line trajectories p_samples = [] p_speeds, p_thetas = [], [] for primitive in mpc_policy._mpc_policies: p_speeds.append(primitive.speed) p_thetas.append(primitive.theta) primitive.act(x, o, t=0, noise=ZeroNoise(params)) p_samples.append(primitive._curr_traj) prim_costs = mpc_policy._primitives_cost(p_samples) min_cost_idx = np.argmin(prim_costs) samples.append(p_samples[min_cost_idx]) itr_costs.append(prim_costs[min_cost_idx]) thetas.append(p_thetas[min_cost_idx]) speeds.append(p_speeds[min_cost_idx]) ### evaluate on grid cp_params = params['probcoll']['cost_probcoll'] eval_cost = cp_params['eval_cost'] pre_activation = cp_params['pre_activation'] probs_mean_batch, probs_std_batch = bootstrap.eval_sample_batch( samples, num_avg=10, pre_activation=pre_activation) bootstrap.close() sigmoid = lambda x: (1. / (1. + np.exp(-x))) # costs = [] # for probs_mean, probs_std in zip(probs_mean_batch, probs_std_batch): # cost = 0. # for t in [bootstrap.T - 1]: # speed = 1. # cost += eval(eval_cost) # costs.append(cost / bootstrap.T) # costs = np.array(costs) itrs_Xs.append( np.array( [s.get_X(t=0, sub_state='position')[:2] for s in samples])) itrs_thetas.append(np.array(thetas)) itrs_speeds.append( np.array(speeds) / (params['trajopt']['cost_velocity']['velocity'][0] * dx * dy)) # normalize so fits into grid density # itrs_speeds.append(np.ones(len(speeds), dtype=float) / (dx * dy)) # TODO if pre_activation: probs_mean_batch = sigmoid(probs_mean_batch) # probs_std_batch = sigmoid(probs_std_batch) probs_std_batch /= probs_std_batch.max() itrs_prob_means.append(np.array(probs_mean_batch)) itrs_prob_stds.append(np.array(probs_std_batch)) itrs_costs.append(np.array(itr_costs)) itr += 1 # if itr >= 2: # TODO # break return itrs_Xs, itrs_thetas, itrs_speeds, itrs_prob_means, itrs_prob_stds, itrs_costs
def rosbag_to_sample(rosbag_fname): print('Parsing {0}'.format(rosbag_fname)) bebop_topics = params['bebop']['topics'] ### read all messages bag = rosbag.Bag(rosbag_fname, 'r') msg_dict = collections.defaultdict(list) start_time = None for topic, msg, t in bag.read_messages(): if start_time is None: start_time = t.to_sec() else: assert(start_time <= t.to_sec()) msg_dict[topic].append((t.to_sec() - start_time, msg)) bag.close() # get_measured_vel(rosbag_fname, msg_dict) # necessary hack if len(msg_dict[bebop_topics['start_rollout']]) == 0: print('\tNo start_rollout, returning None') return None ### sort by time for k, v in msg_dict.items(): msg_dict[k] = sorted(v, key=lambda x: x[0]) ### prune start and end is_coll = (bebop_topics['collision'] in msg_dict.keys()) start_time = msg_dict[bebop_topics['start_rollout']][0][0] end_time = msg_dict[bebop_topics['collision']][0][0] if is_coll else np.inf for topic, time_msg in msg_dict.items(): msg_dict[topic] = [(t, msg) for t, msg in time_msg if start_time <= t <= end_time] ### necessary info for Samples dt = params['dt'] T = params['prediction']['dagger']['T'] ### space by dt image_time_msgs = spaced_messages(msg_dict[bebop_topics['image']], dt)[-T:] cmd_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['cmd_vel']], dt)[-T:] # measured_vel_time_msgs = aligned_messages(image_time_msgs, msg_dict[bebop_topics['measured_vel']], dt)[-T:] ### create sample sample = Sample(T=len(image_time_msgs), meta_data=params) cvb = cv_bridge.CvBridge() for t, (image_time_msg, cmd_vel_time_msg) in enumerate(zip(image_time_msgs, cmd_vel_time_msgs)): image_msg = image_time_msg[1] cmd_vel_msg = cmd_vel_time_msg[1] ### image im = AgentBebop2d.process_image(image_msg, cvb) sample.set_O(im.ravel(), t=t, sub_obs='camera') ### collision sample.set_O([0], t=t, sub_obs='collision') ### linearvel sample.set_X([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t) sample.set_U([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t) sample.set_O([int(is_coll)], t=-1, sub_obs='collision') # sample.set_O([int(np.random.random() > 0.5)], t=-1, sub_obs='collision') # tmp assert(sample.isfinite()) return sample
def act(self, x, obs, t, noise, ref_traj=None): ### create desired path (radiates outward) T, dt, theta, speed = self._T, self._meta_data[ 'dt'], self.theta, self.speed traj = Sample(meta_data=self._meta_data, T=T) ### initialize traj.set_X(x, t=0) traj.set_O(obs, t=0) ### create desired path (radiates outward) linearvel = [speed * np.cos(theta), speed * np.sin(theta)] for t in xrange(T - 1): x_t = traj.get_X(t=t) x_tp1 = self._dynamics.evolve(x_t, linearvel) traj.set_X(x_tp1, t=t + 1) traj.set_U(linearvel, t=t) traj.set_U(linearvel, t=-1) self._curr_traj = traj u = traj.get_U(t=0) return u + noise.sample(u)
image_msg = image_time_msg[1] cmd_vel_msg = cmd_vel_time_msg[1] ### image im = AgentBebop2d.process_image(image_msg, cvb) sample.set_O(im.ravel(), t=t, sub_obs='camera') ### collision sample.set_O([0], t=t, sub_obs='collision') ### linearvel sample.set_X([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t) sample.set_U([cmd_vel_msg.linear.x, cmd_vel_msg.linear.y], t=t) sample.set_O([int(is_coll)], t=-1, sub_obs='collision') # sample.set_O([int(np.random.random() > 0.5)], t=-1, sub_obs='collision') # tmp assert(sample.isfinite()) return sample if __name__ == '__main__': rospy.init_node('rosbag_to_sample', anonymous=True) yaml_path = os.path.join(FOLDER, 'params_bebop2d.yaml') load_params(yaml_path) samples = [rosbag_to_sample(os.path.join(FOLDER, f)) for f in sorted(os.listdir(FOLDER)) if os.path.splitext(f)[1] == '.bag'] samples = filter(lambda x: x is not None, samples) Sample.save(os.path.join(FOLDER, 'samples.npz'), samples)
def _evaluate_probcoll_model_groundtruth(self): """ For each primitive, execute the primitive for T timesteps to obtain ground truth, and compare label """ ### get prediction model evaluations for all iterations itr = 0 itrs_gt_crashes, itrs_pred_crashes = [], [] positions, speeds_angles = None, None while True: ### load NN model_file = self._itr_model_file(itr) if not os.path.exists(model_file): break bootstrap = ProbcollModelPointquad(read_only=True, finalize=False) bootstrap.load(model_file=model_file) wd = self._load_world_file(itr, 0, 0) self._world.env.clear() for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'], wd['cyl_heights']): self._world.env.add_cylinder(pose, radius, height) for pose, extents in zip(wd['box_poses'], wd['box_extents']): self._world.env.add_box(pose, extents) ### create MPC cp_params = params['probcoll']['cost'] cost_cp = CostProbcollPointquad( bootstrap, None, weight=float(cp_params['weight']), eval_cost=cp_params['eval_cost'], pre_activation=cp_params['pre_activation']) mpc_policy = PrimitivesMPCPolicyPointquad(self._trajopt, cost_cp, additional_costs=[], meta_data=params, use_threads=False, plot=False) default_sample = Sample(meta_data=params, T=bootstrap.T) for k, v in params['probcoll']['conditions']['default'].items(): default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k) ### speeds/angles speeds_angles = [(p.speed, p.theta) for p in mpc_policy._mpc_policies] positions = [] ### evaluate on grid dx, dy = 12, 5 rollouts = [] for i, xpos in enumerate(np.linspace(-0.5, 1.5, dx)): for j, ypos in enumerate(np.linspace(0.6, -0.6, dy)): s = copy.deepcopy(default_sample) pos = [xpos, ypos, s.get_X(t=0, sub_state='position')[-1]] s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position') s.set_O(self._agent.get_observation(s.get_X(t=0), noise=False), t=slice(0, bootstrap.T)) if np.sum(s.get_O(sub_obs='collision', t=0)) > 0: continue positions.append((xpos, ypos)) x0 = s.get_X(t=0) rollouts_ij = [ self._agent.sample_policy(x0, p, noise=ZeroNoise(params), T=bootstrap.T) for p in mpc_policy._mpc_policies ] rollouts.append(rollouts_ij) rollouts = np.array(rollouts) gt_crashes = np.array([ r.get_O(sub_obs='collision').sum() > 0 for r in rollouts.ravel() ]).reshape(rollouts.shape) sigmoid = lambda x: 1. / (1. + np.exp(-x)) pred_crashes = np.array([ sigmoid(mean) > 0.5 for mean in bootstrap.eval_sample_batch( rollouts.ravel(), num_avg=10, pre_activation=True)[0] ]).reshape(rollouts.shape) bootstrap.close() itrs_gt_crashes.append(gt_crashes) itrs_pred_crashes.append(pred_crashes) itr += 1 return itrs_gt_crashes, itrs_pred_crashes, positions, speeds_angles
def _cem_step(self, x0, obs, mean, cov, M, K, lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=None): """ :param mean: mean of trajectory controls Gaussian :param obs: observation :param cov: covariance of trajectory controls Gaussian :param M: sample M controls :param K: keep K lowest cost trajectories :return: mean, cov """ ### sample trajectories M_controls = [] while len(M_controls) < M: control = np.random.multivariate_normal(mean, cov) if np.all(control < upper_bound) and np.all(control > lower_bound): M_controls.append(control) samples = [] for U_flat in M_controls: sample = Sample(meta_data=self._meta_data, T=T) U = U_flat.reshape(T, dU) U[:, -1] = 0. # TODO hack sample.set_U(U, t=slice(0, T)) sample.set_O(obs, t=0) sample.set_X(x0, t=0) for sub_control, u_sub in self._config['fixed'].items(): sample.set_U(np.array([list(u_sub) * T]).T, t=slice(0, T), sub_control=sub_control) sample.rollout(dynamics) samples.append(sample) ### keep lowest K cost trajectories costs = [0.] * len(samples) for cost_func in cost_funcs: if hasattr(cost_func, 'eval_batch'): costs = [ c + cost_func_approx.J for c, cost_func_approx in zip( costs, cost_func.eval_batch(samples)) ] else: costs = [ c + cost_func.eval(sample).J for c, sample in zip(costs, samples) ] samples_costs = zip(samples, costs) samples_costs = sorted(samples_costs, key=lambda x: x[1]) K_samples = [x[0] for x in samples_costs[:K]] ### fit Gaussian data = np.vstack([s.get_U().ravel() for s in K_samples]) new_mean = np.mean(data, axis=0) new_cov = np.cov(data, rowvar=0) if plot_env: plot_env.rave_env.clear_plots() colors = [(0, 1, 0)] * K + [(1, 0, 0)] * (M - K) for sample, color in zip([x[0] for x in samples_costs], colors): for t in xrange(T - 1): plot_env.rave_env.plot_segment( sample.get_X(t=t, sub_state='position'), sample.get_X(t=t + 1, sub_state='position'), color=color) return new_mean, new_cov
def plot_final_policy_combined(exp_groups, des_mps, des_speed, min_speed, ax=None, fs=20., save_path=None): dt = 0.5 if ax is None: f, ax = plt.subplots(1, 1, figsize=(5, 3.5)) else: f = ax.get_figure() exp_groups.insert(0, [{ 'folder': os.path.join(EXP_FOLDER, 'random'), 'label': 'Random policy', 'color': cm.Greys(1.0) }]) for exp_group_num, exp_group in enumerate(exp_groups): distance_travelled = [] for exp in exp_group: samples_folder = os.path.join(exp['folder'], 'prediction/final_policy/') samples_fnames = [ os.path.join(samples_folder, fname) for fname in os.listdir(samples_folder) if fname[-4:] == '.npz' ] assert (len(samples_fnames) == 1) samples_fname = samples_fnames[0] samples = Sample.load(samples_fname) for sample in samples: distance = dt * (des_mps / des_speed) * (sample.get_U()[:, 1] - min_speed).sum() distance_travelled.append(distance) width = 0.8 / len(exp_groups) ax.bar([exp_group_num * width], [np.mean(distance_travelled)], yerr=[np.std(distance_travelled)], width=width * 0.8, error_kw=dict(lw=4, capsize=5, capthick=2), color=exp_group[0]['color'], ecolor=cm.Greys(0.5), label=exp_group[0]['label']) # for exp_group in exp_groups: # ax.plot([], [], lw=5., label=exp_group[0]['label'], color=exp_group[0]['color']) lgd = ax.legend(loc='upper center', fontsize=fs * 0.8, bbox_to_anchor=(1.35, 0.7)) ax.set_xlim((-0.05, width * len(exp_groups))) ax.set_ylim((-0.3, ax.get_ylim()[1])) ax.set_ylabel(r'Distance travelled (m)', fontsize=fs) for tick in ax.xaxis.get_major_ticks() + ax.xaxis.get_minor_ticks(): tick.label1.set_visible(False) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(fs) if save_path is not None: f.savefig(save_path, bbox_extra_artists=(lgd, ), bbox_inches='tight')
def _evaluate_probcoll_model(self): ### get prediction model evaluations for all iterations itr = 0 itrs_Xs, itrs_prob_means, itrs_prob_stds, itrs_costs = [], [], [], [] while True: ### load NN model_file = self._itr_model_file(itr) if not os.path.exists(model_file): break bootstrap = ProbcollModelPointquad(read_only=True, finalize=False) bootstrap.load(model_file=model_file) wd = self._load_world_file(itr, 0, 0) self._world.env.clear() for pose, radius, height in zip(wd['cyl_poses'], wd['cyl_radii'], wd['cyl_heights']): self._world.env.add_cylinder(pose, radius, height) for pose, extents in zip(wd['box_poses'], wd['box_extents']): self._world.env.add_box(pose, extents) default_sample = Sample(meta_data=params, T=bootstrap.T) for k, v in params['probcoll']['conditions']['default'].items(): default_sample.set_X(v, t=slice(0, bootstrap.T), sub_state=k) ### evaluate on grid dx, dy = 30, 30 positions = np.dstack( np.meshgrid(np.linspace(-0.5, 3.0, dx), np.linspace(1.0, -1.0, dy))).reshape(dx * dy, 2) samples = [] for pos in positions: s = copy.deepcopy(default_sample) pos = np.concatenate( (pos, [s.get_X(t=0, sub_state='position')[-1]])) s.set_X(pos, t=slice(0, bootstrap.T), sub_state='position') s.set_O(self._agent.get_observation(s.get_X(t=0), noise=False), t=slice(0, bootstrap.T)) s.set_U(np.zeros((bootstrap.T, bootstrap.dU)), t=slice(0, bootstrap.T)) samples.append(s) cp_params = params['probcoll']['cost_probcoll'] eval_cost = cp_params['eval_cost'] pre_activation = cp_params['pre_activation'] probs_mean_batch, probs_std_batch = bootstrap.eval_sample_batch( samples, num_avg=10, pre_activation=pre_activation) bootstrap.close() sigmoid = lambda x: (1. / (1. + np.exp(-x))) costs = [] for probs_mean, probs_std in zip(probs_mean_batch, probs_std_batch): cost = 0. for t in [bootstrap.T - 1]: speed = 1. cost += eval(eval_cost) costs.append(cost / bootstrap.T) costs = np.array(costs) # if itr == 3: # import IPython; IPython.embed() Xs = np.array([ s.get_X(t=0, sub_state='position')[:2] for s in samples ]).reshape(dx, dy, 2) itrs_Xs.append(Xs) if pre_activation: probs_mean_batch = sigmoid(probs_mean_batch) # probs_std_batch = sigmoid(probs_std_batch) probs_std_batch /= probs_std_batch.max() itrs_prob_means.append( probs_mean_batch.max(axis=1).reshape(dx, dy)) itrs_prob_stds.append(probs_std_batch.max(axis=1).reshape(dx, dy)) itrs_costs.append(costs.reshape(dx, dy)) # if itr >= 3: # break itr += 1 return itrs_Xs, itrs_prob_means, itrs_prob_stds, itrs_costs
def plot_final_policy(exp_groups, des_mps, des_speed, min_speed, ax=None, fs=20., save_path=None): dt = 0.5 if ax is None: f, ax = plt.subplots(1, 1, figsize=(10, 3.5)) else: f = ax.get_figure() exp_groups.insert(0, [{ 'folder': os.path.join(EXP_FOLDER, 'random'), 'label': 'Random policy', 'color': cm.Greys(0.5) }]) for exp_num, exp in enumerate(flatten_list(exp_groups)): samples_folder = os.path.join(exp['folder'], 'prediction/final_policy/') samples_fnames = [ os.path.join(samples_folder, fname) for fname in os.listdir(samples_folder) if fname[-4:] == '.npz' ] assert (len(samples_fnames) == 1) samples_fname = samples_fnames[0] samples = Sample.load(samples_fname) distance_travelled = [] for sample in samples: distance = dt * (des_mps / des_speed) * (sample.get_U()[:, 1] - min_speed).sum() distance_travelled.append(distance) # ax.plot(range(len(distance_travelled)), distance_travelled, # linestyle='', # marker=exp['marker'], # color=exp['color'], # mew=exp['mew']) ax.bar(np.arange(len(distance_travelled)) + 0.15 * exp_num + 0.1, distance_travelled, width=0.15, color=exp['color']) for exp_group in exp_groups: ax.plot([], [], lw=5., label=exp_group[0]['label'], color=exp_group[0]['color']) lgd = ax.legend(loc='upper right', fontsize=fs * 0.8) ax.set_xlabel(r'Starting position', fontsize=fs) ax.set_ylabel(r'Distance travelled (m)', fontsize=fs) N = len(distance_travelled) ax.set_xticks(np.arange(N) + 0.5) ax.set_xticklabels([str(x) for x in range(N)]) # Hide major tick labels and customize minor tick labels ax.xaxis.set_minor_formatter(ticker.NullFormatter()) ax.xaxis.set_major_locator(ticker.FixedLocator(np.arange(N) + 0.5)) ax.xaxis.set_major_formatter( ticker.FixedFormatter([r'{0}'.format(str(x)) for x in range(N)])) for tick in ax.xaxis.get_major_ticks() + ax.yaxis.get_major_ticks(): tick.label.set_fontsize(fs) if save_path is not None: f.savefig(save_path, bbox_extra_artists=(lgd, ), bbox_inches='tight')
def _itr_save_samples(self, itr, samples, prefix=''): Sample.save(self._itr_samples_file(itr, prefix=prefix), samples)
def _run_rollout(self, itr): T = params['probcoll']['T'] label_with_noise = params['probcoll']['label_with_noise'] samples = [] world_infos = [] mpc_infos = [] self._conditions.reset() for cond in xrange(self._conditions.length): rep = 0 while rep < self._conditions.repeats: self._logger.info( '\t\tStarting cond {0} rep {1} itr {2}'.format( cond, rep, itr)) start = time.time() if (cond == 0 and rep == 0) or self._world.randomize: self._reset_world(itr, cond, rep) x0 = self._conditions.get_cond(cond, rep=rep) sample_T = Sample(meta_data=params, T=T) sample_T.set_X(x0, t=0) for t in xrange(T): self._update_world(sample_T, t) x0 = sample_T.get_X(t=t) rollout, rollout_no_noise = self._agent.sample_policy( x0, self._mpc_policy, T=1, only_noise=label_with_noise) o = rollout.get_O(t=0) if label_with_noise: u = rollout.get_U(t=0) else: u = rollout_no_noise.get_U(t=0) sample_T.set_U(u, t=t) sample_T.set_O(o, t=t) if not self._use_dynamics: sample_T.set_X(rollout.get_X(t=0), t=t) if self._world.is_collision(sample_T, t=t): self._logger.warning( '\t\t\tCrashed at t={0}'.format(t)) break if self._use_dynamics: if t < T - 1: x_tp1 = self._dynamics.evolve(x0, u) sample_T.set_X(x_tp1, t=t + 1) if hasattr(self._mpc_policy, '_curr_traj'): self._world.update_visualization( sample_T, self._mpc_policy._curr_traj, t) else: self._logger.info('\t\t\tLasted for t={0}'.format(t)) sample = sample_T.match(slice(0, t + 1)) world_info = self._get_world_info() mpc_info = self._mpc_policy.get_info() if not self._is_good_rollout(sample, t): self._logger.warning( '\t\t\tNot good rollout. Repeating rollout.'.format(t)) continue samples.append(sample) world_infos.append(world_info) mpc_infos.append(mpc_info) assert (samples[-1].isfinite()) elapsed = time.time() - start self._logger.info( '\t\t\tFinished cond {0} rep {1} ({2:.1f}s, {3:.3f}x real-time)' .format(cond, rep, elapsed, t * params['dt'] / elapsed)) rep += 1 self._itr_save_samples(itr, samples) self._itr_save_worlds(itr, world_infos) self._itr_save_mpcs(itr, mpc_infos) self._reset_world(itr, 0, 0) # leave the world as it was
def sample_policy(self, x0, policy, T=None, only_noise=False, **policy_args): """ Run the policy and collect the trajectory data :param x0: initial state :param policy: to execute :param policy_args: e.g. ref_traj, noise, etc :rtype Sample """ if T is None: T = policy._T policy_sample = Sample(meta_data=params, T=T) policy_sample.set_X(x0, t=0) policy_sample_no_noise = Sample(meta_data=params, T=T) for t in xrange(T): # get observation and act x_t = policy_sample.get_X(t=t) o_t = self.get_observation(x_t) u_t, u_t_no_noise = policy.act(x_t, o_t, t) # record policy_sample.set_X(x_t, t=t) policy_sample.set_O(o_t, t=t) policy_sample.set_U(u_t, t=t) if not only_noise: policy_sample_no_noise.set_U(u_t_no_noise, t=t) # propagate dynamics if t < T - 1: x_tp1 = self._dynamics.evolve(x_t, u_t) policy_sample.set_X(x_tp1, t=t + 1) return policy_sample, policy_sample_no_noise
def plan(self, x0, obs, dynamics, cost_funcs, dU, T, init_traj=None, plot_env=None, init_cov=None, config=None): """ :type init_traj: Sample :type dynamics: Dynamics :type cost_func: Cost """ if config is None: config = self._config self.means, self.covs = [], [] if init_traj is not None: mean = init_traj.get_U().ravel() elif 'init_u' in config: mean = config['init_u'] * T else: mean = np.zeros(dU * T) lower_bound = config['bounds'][ 'min'] * T if 'bounds' in config else -np.inf * np.ones(dU * T) upper_bound = config['bounds'][ 'max'] * T if 'bounds' in config else np.inf * np.ones(dU * T) ### run one iteration of CEM with extra samples # if init_cov is None: cov = config['init_var'] * np.eye(dU * T) # else: # cov = config['init_var'] * init_cov mean, cov = self._cem_step(x0, obs, mean, cov, config['init_M'], config['K'], lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=plot_env) self.means.append(mean) self.covs.append(cov) if plot_env: pass # raw_input('CEM iter 0') ### remaining iterations for iter in xrange(1, config['iters']): mean, cov = self._cem_step(x0, obs, mean, cov, config['M'], config['K'], lower_bound, upper_bound, dU, T, dynamics, cost_funcs, plot_env=plot_env) self.means.append(mean) self.covs.append(cov) if plot_env: pass # raw_input('CEM iter {0}'.format(iter)) sample = Sample(meta_data=self._meta_data, T=T) U = mean.reshape(T, dU) U[:, -1] = 0. # TODO hack sample.set_U(U, t=slice(0, T)) sample.set_O(obs, t=0) sample.set_X(x0, t=0) sample.rollout(dynamics) return sample
def _itr_load_samples(self, itr): fname = os.path.join(self._itr_dir(itr), 'samples_itr_{0}.npz'.format(itr)) return Sample.load(fname)