def _reset_start_goal_manually(self, start_pos, goal_pos): p = self.params.reset_params.start_config start_112 = np.array([[start_pos]], dtype=np.float32) goal_112 = np.array([[goal_pos]], dtype=np.float32) heading_111 = np.zeros((1, 1, 1)) speed_111 = np.zeros((1, 1, 1)) ang_speed_111 = np.zeros((1, 1, 1)) # Initialize the start configuration self.start_config = SystemConfig(dt=p.dt, n=1, k=1, position_nk2=start_112, heading_nk1=heading_111, speed_nk1=speed_111, angular_speed_nk1=ang_speed_111) # The system dynamics may need the current starting position for # coordinate transforms (i.e. realistic simulation) self.system_dynamics.reset_start_state(self.start_config) # Initialize the goal configuration self.goal_config = SystemConfig(dt=p.dt, n=1, k=1, position_nk2=goal_112) return False
def load_and_process_data(self, filename, discard_lqr_controller_data=False, discard_precomputed_lqr_trajectories=False, track_trajectory_acceleration=False): """Load control pipeline data from a pickle file and process it so that it can be used by the pipeline.""" with open(filename, 'rb') as f: data = pickle.load(f) # To save memory, when discard_precomputed_lqr_trajectories is true the lqr_trajectories variables can be # discarded. dt = data['lqr_trajectories']['dt'] n = data['lqr_trajectories']['n'] if discard_precomputed_lqr_trajectories: lqr_trajectories = Trajectory(dt=dt, n=n, k=0) else: lqr_trajectories = Trajectory.init_from_numpy_repr( track_trajectory_acceleration=track_trajectory_acceleration, **data['lqr_trajectories']) # To save memory the LQR controllers and reference trajectories (spline trajectories) can be discarded when not # needed (i.e. in simulation when the saved lqr_trajectory is the exact result of applying the saved LQR # controllers. n = data['spline_trajectories']['n'] if discard_lqr_controller_data: spline_trajectories = Trajectory(dt=dt, n=n, k=0) K_nkfd = tf.zeros((2, 1, 1, 1), dtype=np.float32) k_nkf1 = tf.zeros((2, 1, 1, 1), dtype=np.float32) else: spline_trajectories = Trajectory.init_from_numpy_repr( track_trajectory_acceleration=track_trajectory_acceleration, **data['spline_trajectories']) K_nkfd = tf.constant(data['K_nkfd']) k_nkf1 = tf.constant(data['k_nkf1']) # Load remaining variables start_speeds = tf.constant(data['start_speeds']) start_configs = SystemConfig.init_from_numpy_repr( track_trajectory_acceleration=track_trajectory_acceleration, **data['start_configs']) waypt_configs = SystemConfig.init_from_numpy_repr( track_trajectory_acceleration=track_trajectory_acceleration, **data['waypt_configs']) horizons = tf.constant(data['horizons']) data_processed = { 'start_speeds': start_speeds, 'start_configs': start_configs, 'waypt_configs': waypt_configs, 'spline_trajectories': spline_trajectories, 'horizons': horizons, 'lqr_trajectories': lqr_trajectories, 'K_nkfd': K_nkfd, 'k_nkf1': k_nkf1 } return data_processed
def optimize(self, start_config): """ Optimize the objective over a trajectory starting from start_config. """ p = self.params model = p.model raw_data = self._raw_data(start_config) processed_data = model.create_nn_inputs_and_outputs(raw_data) # Predict the NN output nn_output_113 = model.predict_nn_output_with_postprocessing( processed_data['inputs'], is_training=False)[:, None] # Transform to World Coordinates waypoint_ego_config = SystemConfig( dt=self.params.dt, n=1, k=1, position_nk2=nn_output_113[:, :, :2], heading_nk1=nn_output_113[:, :, 2:3]) self.params.system_dynamics.to_world_coordinates( start_config, waypoint_ego_config, self.waypoint_world_config) # Evaluate the objective and retrieve Control Pipeline data obj_vals, data = self.eval_objective(start_config, self.waypoint_world_config) # The batch dimension is length 1 since there is only one waypoint min_idx = 0 min_cost = obj_vals[min_idx] waypts, horizons_s, trajectories_lqr, trajectories_spline, controllers = data self.opt_waypt.assign_from_config_batch_idx(waypts, min_idx) self.opt_traj.assign_from_trajectory_batch_idx(trajectories_lqr, min_idx) # Convert horizon in seconds to horizon in # of steps min_horizon = int( tf.ceil(horizons_s[min_idx, 0] / self.params.dt).numpy()) data = { 'system_config': SystemConfig.copy(start_config), 'waypoint_config': SystemConfig.copy(self.opt_waypt), 'trajectory': Trajectory.copy(self.opt_traj), 'spline_trajectory': Trajectory.copy(trajectories_spline), 'planning_horizon': min_horizon, 'K_nkfd': controllers['K_nkfd'][min_idx:min_idx + 1], 'k_nkf1': controllers['k_nkf1'][min_idx:min_idx + 1], 'img_nmkd': raw_data['img_nmkd'] } return data
def gather_across_batch_dim(self, data, idxs): """ For each key in data gather idxs across the batch dimension creating a new data dictionary.""" data_bin = {} data_bin['waypt_configs'] = SystemConfig.gather_across_batch_dim_and_create(data['waypt_configs'], idxs) data_bin['start_configs'] = SystemConfig.gather_across_batch_dim_and_create(data['start_configs'], idxs) data_bin['start_speeds'] = tf.gather(data['start_speeds'], idxs, axis=0) data_bin['spline_trajectories'] = Trajectory.gather_across_batch_dim_and_create(data['spline_trajectories'], idxs) data_bin['horizons'] = tf.gather(data['horizons'], idxs, axis=0) data_bin['lqr_trajectories'] = Trajectory.gather_across_batch_dim_and_create(data['lqr_trajectories'], idxs) data_bin['K_nkfd'] = tf.gather(data['K_nkfd'], idxs, axis=0) data_bin['k_nkf1'] = tf.gather(data['k_nkf1'], idxs, axis=0) return data_bin
def mask_and_concat_data_along_batch_dim(data, k): """Keeps the elements in data which were produced before time index k. Concatenates each list in data along the batch dim after masking. Also returns data from the first segment not in the valid mask.""" # Extract the Index of the Last Data Segment data_times = np.cumsum([traj.k for traj in data['trajectory']]) valid_mask = (data_times <= k) data_last = {} last_data_idxs = np.where(np.logical_not(valid_mask))[0] # Take the first last_data_idx if len(last_data_idxs) > 0: last_data_idx = last_data_idxs[0] last_data_valid = True else: # Take the last element as it is not valid anyway last_data_idx = len(valid_mask) - 1 last_data_valid = False # Get the last segment data data_last['system_config'] = data['system_config'][last_data_idx] data_last['waypoint_config'] = data['waypoint_config'][last_data_idx] data_last['trajectory'] = data['trajectory'][last_data_idx] data_last['spline_trajectory'] = data['spline_trajectory'][ last_data_idx] data_last['planning_horizon_n1'] = [ data['planning_horizon'][last_data_idx] ] data_last['K_nkfd'] = data['K_nkfd'][last_data_idx] data_last['k_nkf1'] = data['k_nkf1'][last_data_idx] data_last['img_nmkd'] = data['img_nmkd'][last_data_idx] # Get the main planner data data['system_config'] = SystemConfig.concat_across_batch_dim( np.array(data['system_config'])[valid_mask]) data['waypoint_config'] = SystemConfig.concat_across_batch_dim( np.array(data['waypoint_config'])[valid_mask]) data['trajectory'] = Trajectory.concat_across_batch_dim( np.array(data['trajectory'])[valid_mask]) data['spline_trajectory'] = Trajectory.concat_across_batch_dim( np.array(data['spline_trajectory'])[valid_mask]) data['planning_horizon_n1'] = np.array( data['planning_horizon'])[valid_mask][:, None] data['K_nkfd'] = tf.boolean_mask(tf.concat(data['K_nkfd'], axis=0), valid_mask) data['k_nkf1'] = tf.boolean_mask(tf.concat(data['k_nkf1'], axis=0), valid_mask) data['img_nmkd'] = np.array(np.concatenate(data['img_nmkd'], axis=0))[valid_mask] return data, data_last, last_data_valid
def concat_data_across_binning_dim(self, data): """Concatenate across the binning dimension. It is asummed that data is a dictionary where each key maps to a list of tensors, Trajectory, or System Config objects. The concatenated results are stored in lists of length 1 for each key (i.e. only one bin).""" data['start_speeds'] = [tf.concat(data['start_speeds'], axis=0)] data['start_configs'] = [SystemConfig.concat_across_batch_dim(data['start_configs'])] data['waypt_configs'] = [SystemConfig.concat_across_batch_dim(data['waypt_configs'])] data['spline_trajectories'] = [Trajectory.concat_across_batch_dim(data['spline_trajectories'])] data['horizons'] = [tf.concat(data['horizons'], axis=0)] data['lqr_trajectories'] = [Trajectory.concat_across_batch_dim(data['lqr_trajectories'])] data['K_nkfd'] = [tf.concat(data['K_nkfd'], axis=0)] data['k_nkf1'] = [tf.concat(data['k_nkf1'], axis=0)] return data
def optimize(self, start_config, goal_config=None, sim_state_hist=None): """ Optimize the objective over a trajectory starting from start_config. 1. Uses a control pipeline to plan paths from start_config 2. Evaluates the objective function on the resulting trajectories 3. Return the minimum cost waypoint, trajectory, and cost """ # TODO: # changing the code so that sim_state is all history obj_vals, data = self.eval_objective(start_config, goal_config, sim_state_hist=sim_state_hist) min_idx = np.argmin(obj_vals) min_cost = obj_vals[min_idx] waypts, horizons_s, trajectories_lqr, trajectories_spline, controllers = data self.opt_waypt.assign_from_config_batch_idx(waypts, min_idx) self.opt_traj.assign_from_trajectory_batch_idx(trajectories_lqr, min_idx) # Convert horizon in seconds to horizon in # of steps try: min_horizon = int(np.ceil(horizons_s[min_idx] / self.params.dt)) except: min_horizon = int(np.ceil(horizons_s[min_idx, 0] / self.params.dt)) # If the real LQR data has been discarded just take the first element # since it will be all zeros if self.params.control_pipeline_params.discard_LQR_controller_data: K_nkfd = controllers['K_nkfd'][0:1] k_nkf1 = controllers['k_nkf1'][0:1] else: K_nkfd = controllers['K_nkfd'][min_idx:min_idx + 1] k_nkf1 = controllers['k_nkf1'][min_idx:min_idx + 1] data = { 'system_config': SystemConfig.copy(start_config), 'waypoint_config': SystemConfig.copy(self.opt_waypt), 'trajectory': Trajectory.copy(self.opt_traj), 'spline_trajectory': Trajectory.copy(trajectories_spline), 'planning_horizon': min_horizon, 'K_nkfd': K_nkfd, 'k_nkf1': k_nkf1, 'img_nmkd': [] } # Dont think we need for our purposes return data
def test_lqr_feedback_coordinate_transform(): p = create_params() n, k = p.n, p.k dubins = p.system_dynamics_params.system(p.dt, params=p.simulation_params) # # Robot starts from (0, 0, 0) # # and does a small spiral start_pos_n13 = tf.constant(np.array([[[0.0, 0.0, 0.0]]], dtype=np.float32)) speed_nk1 = np.ones((n, k - 1, 1), dtype=np.float32) * 2.0 angular_speed_nk1 = np.linspace(1.5, 1.3, k - 1, dtype=np.float32)[None, :, None] u_nk2 = tf.constant(np.concatenate([speed_nk1, angular_speed_nk1], axis=2)) traj_ref_egocentric = dubins.simulate_T(start_pos_n13, u_nk2, k) cost_fn = QuadraticRegulatorRef(traj_ref_egocentric, dubins, p) lqr_solver = LQRSolver(T=k - 1, dynamics=dubins, cost=cost_fn) start_config0 = SystemConfig(dt=p.dt, n=p.n, k=1, position_nk2=start_pos_n13[:, :, :2], heading_nk1=start_pos_n13[:, :, 2:3]) lqr_res = lqr_solver.lqr(start_config0, traj_ref_egocentric, verbose=False) traj_lqr_egocentric = lqr_res['trajectory_opt'] K_array_egocentric = lqr_res['K_opt_nkfd'] k_array = lqr_res['k_opt_nkf1'] # The origin of the egocentric frame in world coordinates start_pos_n13 = tf.constant(np.array([[[1.0, 1.0, np.pi / 2.]]], dtype=np.float32)) ref_config = SystemConfig(dt=p.dt, n=p.n, k=1, position_nk2=start_pos_n13[:, :, :2], heading_nk1=start_pos_n13[:, :, 2:3]) # Convert to world coordinates traj_ref_world = dubins.to_world_coordinates(ref_config, traj_ref_egocentric, mode='new') traj_lqr_world = dubins.to_world_coordinates(ref_config, traj_lqr_egocentric, mode='new') K_array_world = dubins.convert_K_to_world_coordinates(ref_config, K_array_egocentric, mode='new') # Apply K_array_world to the system from ref_config traj_test_world = lqr_solver.apply_control(ref_config, traj_ref_world, k_array, K_array_world) # Check that the LQR Trajectory computed using K_array_egocentric # then transformed to world (traj_lqr_world) matches the # LQR Trajectory computed directly using K_array_world assert((tf.norm(traj_lqr_world.position_nk2() - traj_test_world.position_nk2(), axis=2).numpy() < 1e-4).all()) assert((tf.norm(angle_normalize(traj_lqr_world.heading_nk1() - traj_test_world.heading_nk1()), axis=2).numpy() < 1e-4).all()) assert((tf.norm(traj_lqr_world.speed_nk1() - traj_test_world.speed_nk1(), axis=2).numpy() < 1e-4).all()) assert((tf.norm(traj_lqr_world.angular_speed_nk1() - traj_test_world.angular_speed_nk1(), axis=2).numpy() < 1e-4).all())
def visualize_coordinate_transform(): """Visual sanity check that coordinate transforms are working. """ fig, _, axs = utils.subplot2(plt, (2, 2), (8, 8), (.4, .4)) axs = axs[::-1] n, k = 1, 30 dt = .1 dubins_car = DubinsV1(dt=dt) traj_egocentric = Trajectory(dt=dt, n=n, k=k, variable=True) traj_world = Trajectory(dt=dt, n=n, k=k, variable=True) # Form a trajectory in global frame # convert to egocentric and back start_pos_global_n12 = tf.constant([[[1.0, 1.0]]], dtype=tf.float32) start_heading_global_n11 = tf.constant([[[np.pi / 2.]]], dtype=tf.float32) start_config_global = SystemConfig(dt=dt, n=n, k=1, position_nk2=start_pos_global_n12, heading_nk1=start_heading_global_n11) start_n13 = tf.concat([start_pos_global_n12, start_heading_global_n11], axis=2) u_n12 = np.array([[[.01, .1]]], dtype=np.float32) u_nk2 = tf.constant(np.broadcast_to(u_n12, (n, k, 2)), dtype=tf.float32) trajectory_world = dubins_car.simulate_T(start_n13, u_nk2, T=k - 1) trajectory_world.render([axs[0]], batch_idx=0, freq=4, name='World') # Convert to egocentric dubins_car.to_egocentric_coordinates(start_config_global, trajectory_world, traj_egocentric) traj_egocentric.render([axs[1]], batch_idx=0, freq=4, name='Egocentric') dubins_car.to_world_coordinates(start_config_global, traj_egocentric, traj_world) traj_world.render([axs[2]], batch_idx=0, freq=4, name='World #2') plt.savefig('./tmp/coordinate_transform.png', bbox_inches='tight')
def mask_and_concat_data_along_batch_dim(data, k): """Keeps the elements in data which were produced before time index k. Concatenates each list in data along the batch dim after masking.""" # Extract the Index of the Last Data Segment data_times = np.cumsum( [u_nk2.shape[1].value for u_nk2 in data['optimal_control_nk2']]) valid_mask = (data_times <= k) data_last = {} last_data_idxs = np.where(np.logical_not(valid_mask))[0] # Take the first last_data_idx if len(last_data_idxs) > 0: last_data_idx = last_data_idxs[0] last_data_valid = True else: # Take the last element as it is not valid anyway last_data_idx = len(valid_mask) - 1 last_data_valid = False # Get the last segment data data_last['system_config'] = data['system_config'][last_data_idx] data_last['optimal_control_nk2'] = data['optimal_control_nk2'][ last_data_idx] data_last['img_nmkd'] = data['img_nmkd'][last_data_idx] # Get the main planner data data['system_config'] = SystemConfig.concat_across_batch_dim( np.array(data['system_config'])[valid_mask]) data['optimal_control_nk2'] = tf.boolean_mask( tf.concat(data['optimal_control_nk2'], axis=0), valid_mask) data['img_nmkd'] = np.array(np.concatenate(data['img_nmkd'], axis=0))[valid_mask] return data, data_last, last_data_valid
def plan(self): """ Runs the planner for one step from config to generate a subtrajectory, the resulting robot config after the robot executes the subtrajectory, and generates relevant planner data NOTE: this planner only considers obstacles in the environment, not collisions with other agents/personal space """ assert(hasattr(self, 'planner')) assert(self.sim_dt is not None) # Generate the next trajectory segment, update next config, update actions/data if self.end_acting: return self.planner_data = self.planner.optimize(self.planned_next_config, self.goal_config) traj_segment = \ Trajectory.new_traj_clip_along_time_axis(self.planner_data['trajectory'], self.params.control_horizon, repeat_second_to_last_speed=True) self.planned_next_config = \ SystemConfig.init_config_from_trajectory_time_index( traj_segment, t=-1) tr_acc = self.params.planner_params.track_accel self.trajectory.append_along_time_axis(traj_segment, track_trajectory_acceleration=tr_acc) self.enforce_termination_conditions()
def act(self): """ A utility method to initialize a config object from a particular timestep of a given trajectory object""" if self.end_acting: # exit if there is no more acting to do return assert(self.sim_dt is not None) step = int(np.floor(self.sim_dt / self.params.dt)) # first check for collisions with any other gen_agents self.check_collisions(self.world_state) # then update the current config incrementally (can teleport to end if t=-1) new_config = SystemConfig.init_config_from_trajectory_time_index(self.trajectory, t=self.path_step) self.set_current_config(new_config) # updating "next step" for agent path after traversing it self.path_step += step # considers a full on collision once the agent has passed its "collision point" if self.path_step >= self.trajectory.k or self.path_step >= self.collision_point_k: self.end_acting = True if self.end_acting: if self.params.verbose: print("terminated act for agent", self.get_name()) # save memory by deleting control pipeline (very memory intensive) del self.planner # reset the collision cooldown (to "uncollided") if it has reached 0 if(self.collision_cooldown > 0): self.collision_cooldown -= 1
def _set_instance_variables(self, data): """Set the control pipelines instance variables from a data dictionary.""" self.start_configs = data['start_configs'] self.waypt_configs = data['waypt_configs'] self.start_speeds = data['start_speeds'] self.spline_trajectories = data['spline_trajectories'] self.horizons = data['horizons'] self.lqr_trajectories = data['lqr_trajectories'] self.K_nkfd = data['K_nkfd'] self.k_nkf1 = data['k_nkf1'] # Initialize variable tensor for waypoints in world coordinates dt = self.params.system_dynamics_params.dt self.waypt_configs_world = [ SystemConfig(dt=dt, n=config.n, k=1, variable=True, track_trajectory_acceleration=self.params. track_trajectory_acceleration) for config in data['start_configs'] ] self.instance_variables_loaded = True if self.params.verbose: N = self.params.waypoint_params.n for v0, start_config in zip(self.start_velocities, self.start_configs): print('Velocity: {:.3f}, {:.3f}% of goals kept({:d}).'.format( v0, 100. * start_config.n / N, start_config.n))
def _iterate(self, config): """ Runs the planner for one step from config to generate a subtrajectory, the resulting robot config after the robot executes the subtrajectory, and relevant planner data""" planner_data = self.planner.optimize(config) trajectory_segment, trajectory_data, commanded_actions_nkf = self._process_planner_data(config, planner_data) next_config = SystemConfig.init_config_from_trajectory_time_index(trajectory_segment, t=-1) return trajectory_segment, next_config, trajectory_data, commanded_actions_nkf
def optimize(self, start_config): """ Optimize the objective over a trajectory starting from start_config. 1. Uses a control pipeline to plan paths from start_config 2. Evaluates the objective function on the resulting trajectories 3. Return the minimum cost waypoint, trajectory, and cost """ obj_vals, data = self.eval_objective(start_config) min_idx = tf.argmin(obj_vals) min_cost = obj_vals[min_idx] waypts, horizons_s, trajectories_lqr, trajectories_spline, controllers = data self.opt_waypt.assign_from_config_batch_idx(waypts, min_idx) self.opt_traj.assign_from_trajectory_batch_idx(trajectories_lqr, min_idx) # Convert horizon in seconds to horizon in # of steps min_horizon = int( tf.ceil(horizons_s[min_idx, 0] / self.params.dt).numpy()) # If the real LQR data has been discarded just take the first element # since it will be all zeros if self.params.control_pipeline_params.discard_LQR_controller_data: K_nkfd = controllers['K_nkfd'][0:1] k_nkf1 = controllers['k_nkf1'][0:1] else: K_nkfd = controllers['K_nkfd'][min_idx:min_idx + 1] k_nkf1 = controllers['k_nkf1'][min_idx:min_idx + 1] img_nmkd = self.simulator.get_observation(config=start_config) data = { 'system_config': SystemConfig.copy(start_config), 'waypoint_config': SystemConfig.copy(self.opt_waypt), 'trajectory': Trajectory.copy(self.opt_traj), 'spline_trajectory': Trajectory.copy(trajectories_spline), 'planning_horizon': min_horizon, 'K_nkfd': K_nkfd, 'k_nkf1': k_nkf1, 'img_nmkd': img_nmkd } return data
def __init__(self, obj_fn, params): self.obj_fn = obj_fn self.params = params.planner.parse_params(params) self.opt_waypt = SystemConfig(dt=params.dt, n=1, k=1, variable=True) self.opt_traj = Trajectory(dt=params.dt, n=1, k=params.planning_horizon, variable=True) self.control_pipeline = self._init_control_pipeline()
def test_coordinate_transform(): n, k = 1, 30 dt = .1 p = create_params() dubins_car = DubinsV1(dt=dt, params=p.system_dynamics_params) ref_config = dubins_car.init_egocentric_robot_config(dt=dt, n=n) pos_nk2 = np.ones((n, k, 2), dtype=np.float32) * np.random.rand() traj_global = Trajectory(dt=dt, n=n, k=k, position_nk2=pos_nk2) traj_egocentric = Trajectory(dt=dt, n=n, k=k, variable=True) traj_global_new = Trajectory(dt=dt, n=n, k=k, variable=True) dubins_car.to_egocentric_coordinates(ref_config, traj_global, traj_egocentric) # Test 0 transform assert ((pos_nk2 == traj_egocentric.position_nk2()).all()) ref_config_pos_112 = np.array([[[5.0, 5.0]]], dtype=np.float32) ref_config_pos_n12 = np.repeat(ref_config_pos_112, repeats=n, axis=0) ref_config = SystemConfig(dt=dt, n=n, k=1, position_nk2=ref_config_pos_n12) traj_egocentric = dubins_car.to_egocentric_coordinates( ref_config, traj_global, traj_egocentric) # Test translation assert ((pos_nk2 - 5.0 == traj_egocentric.position_nk2()).all()) ref_config_heading_111 = np.array([[[3. * np.pi / 4.]]], dtype=np.float32) ref_config_heading_nk1 = np.repeat(ref_config_heading_111, repeats=n, axis=0) ref_config = SystemConfig(dt=dt, n=n, k=1, position_nk2=ref_config_pos_n12, heading_nk1=ref_config_heading_nk1) traj_egocentric = dubins_car.to_egocentric_coordinates( ref_config, traj_global, traj_egocentric) traj_global_new = dubins_car.to_world_coordinates(ref_config, traj_egocentric, traj_global_new) assert (np.allclose(traj_global.position_nk2(), traj_global_new.position_nk2()))
class NNPlanner(Planner): """ A planner which uses a trained neural network. """ def __init__(self, simulator, params): super(NNPlanner, self).__init__(simulator, params) self.goal_ego_config = SystemConfig(dt=self.params.dt, n=1, k=1) @staticmethod def parse_params(p): """ Parse the parameters to add some additional helpful parameters. """ return p def _raw_data(self, start_config): """ Return a dictionary of raw_data from the simulator. To be passed to model.create_nn_inputs_and_outputs """ simulator = self.simulator data = {} # Convert Goal to Egocentric Coordinates self.params.system_dynamics.to_egocentric_coordinates( start_config, simulator.goal_config, self.goal_ego_config) # Image Data if hasattr(self.params.model, 'occupancy_grid_positions_ego_1mk12'): kwargs = { 'occupancy_grid_positions_ego_1mk12': self.params.model.occupancy_grid_positions_ego_1mk12 } else: kwargs = {} data['img_nmkd'] = simulator.get_observation(config=start_config, **kwargs) # Vehicle Data data['vehicle_state_nk3'] = start_config.position_and_heading_nk3( ).numpy() data[ 'vehicle_controls_nk2'] = start_config.speed_and_angular_speed_nk2( ).numpy() # Goal Data data['goal_position_n2'] = simulator.goal_config.position_nk2().numpy( )[:, 0, :] data['goal_position_ego_n2'] = self.goal_ego_config.position_nk2( ).numpy()[:, 0, :] # Dummy Labels data['optimal_waypoint_ego_n3'] = np.ones((1, 3), dtype=np.float32) data['waypoint_horizon_n1'] = np.ones((1, 1), dtype=np.float32) data['optimal_control_nk2'] = np.ones((1, 1, 2), dtype=np.float32) return data
def init_egocentric_robot_config(dt, n, v=0.0, w=0.0, dtype=tf.float32): """ A utility function initializing the robot at x=0, y=0, theta=0, v=v, w=w, a=0, alpha=0.""" k = 1 position_nk2 = tf.zeros((n, k, 2), dtype=dtype) heading_nk1 = tf.zeros((n, k, 1), dtype=dtype) speed_nk1 = v*tf.ones((n, k, 1), dtype=dtype) angular_speed_nk1 = w*tf.ones((n, k, 1), dtype=dtype) return SystemConfig(dt=dt, n=n, k=k, position_nk2=position_nk2, heading_nk1=heading_nk1, speed_nk1=speed_nk1, angular_speed_nk1=angular_speed_nk1, variable=False)
def generate_control_pipeline(self, params=None): p = self.params # Initialize spline, cost function, lqr solver waypoints_egocentric = self._sample_egocentric_waypoints(vf=0.) self._init_pipeline() pipeline_data = self.helper.empty_data_dictionary() with tf.name_scope('generate_control_pipeline'): if not self._incorrectly_binned_data_exists(): for v0 in self.start_velocities: if p.verbose: print('Initial Bin: v0={:.3f}'.format(v0)) start_config = self.system_dynamics.init_egocentric_robot_config( dt=p.system_dynamics_params.dt, n=self.waypoint_grid.n, v=v0) goal_config = SystemConfig.copy(waypoints_egocentric) start_config, goal_config, horizons_n1 = self._dynamically_fit_spline( start_config, goal_config) lqr_trajectory, K_nkfd, k_nkf1 = self._lqr(start_config) # TODO: Put the initial bin information in here too. This will make debugging much easier. data_bin = { 'start_configs': start_config, 'waypt_configs': goal_config, 'start_speeds': self.spline_trajectory.speed_nk1()[:, 0], 'spline_trajectories': Trajectory.copy(self.spline_trajectory), 'horizons': horizons_n1, 'lqr_trajectories': lqr_trajectory, 'K_nkfd': K_nkfd, 'k_nkf1': k_nkf1 } self.helper.append_data_bin_to_pipeline_data( pipeline_data, data_bin) # This data is incorrectly binned by velocity so collapse it all into one bin before saving it. pipeline_data = self.helper.concat_data_across_binning_dim( pipeline_data) self._save_incorrectly_binned_data(pipeline_data) else: pipeline_data = self._load_incorrectly_binned_data() pipeline_data = self._rebin_data_by_initial_velocity(pipeline_data) self._set_instance_variables(pipeline_data) for i, v0 in enumerate(self.start_velocities): filename = self._data_file_name(v0=v0) data_bin = self.helper.prepare_data_for_saving(pipeline_data, i) self.save_control_pipeline(data_bin, filename)
def generate_config_from_pos_3(pos_3, dt=0.1, v=0, w=0): pos_n11 = np.array([[[pos_3[0], pos_3[1]]]], dtype=np.float32) heading_n11 = np.array([[[pos_3[2]]]], dtype=np.float32) speed_nk1 = np.ones((1, 1, 1), dtype=np.float32) * v angular_speed_nk1 = np.ones((1, 1, 1), dtype=np.float32) * w return SystemConfig(dt, 1, 1, position_nk2=pos_n11, heading_nk1=heading_n11, speed_nk1=speed_nk1, angular_speed_nk1=angular_speed_nk1, variable=False)
def _sample_egocentric_waypoints(self, vf=0.): """ Uniformly samples an egocentric waypoint grid over which to plan trajectories.""" p = self.params.waypoint_params waypoints_egocentric = self.waypoint_grid.sample_egocentric_waypoints( vf=vf) waypoints_egocentric = self._ensure_waypoints_valid( waypoints_egocentric) wx_n11, wy_n11, wtheta_n11, wv_n11, ww_n11 = waypoints_egocentric waypt_pos_n12 = np.concatenate([wx_n11, wy_n11], axis=2) waypoint_egocentric_config = SystemConfig(dt=self.params.dt, n=self.waypoint_grid.n, k=1, position_nk2=waypt_pos_n12, speed_nk1=wv_n11, heading_nk1=wtheta_n11, angular_speed_nk1=ww_n11, variable=True) return waypoint_egocentric_config
def optimize(self, start_config): """ Optimize the objective over a trajectory starting from start_config. """ p = self.params model = p.model raw_data = self._raw_data(start_config) processed_data = model.create_nn_inputs_and_outputs(raw_data) # Predict the NN output nn_output_112k = model.predict_nn_output_with_postprocessing( processed_data['inputs'], is_training=False)[:, None] optimal_control_hat_1k2 = tf.reshape(nn_output_112k, (1, -1, 2)) data = { 'optimal_control_nk2': optimal_control_hat_1k2, 'system_config': SystemConfig.copy(start_config), 'img_nmkd': raw_data['img_nmkd'] } return data
def execute_velocity_cmds(self): for _ in range(self.num_cmds_per_batch): if self.get_end_acting(): break current_config = self.get_current_config() # the command is indexed by self.num_executed and is safe due to the size constraints in the update() vel_cmd = self.joystick_inputs[self.num_executed] assert (len(vel_cmd) == 2) # always a 2 tuple of v and w v = clip_vel(vel_cmd[0], self.v_bounds) w = clip_vel(vel_cmd[1], self.w_bounds) # NOTE: the format for the acceleration commands to the open loop for the robot is: # np.array([[[L, A]]], dtype=np.float32) where L is linear, A is angular command = np.array([[[v, w]]], dtype=np.float32) t_seg, _ = Agent.apply_control_open_loop(self, current_config, command, 1, sim_mode='ideal') self.trajectory.append_along_time_axis( t_seg, track_trajectory_acceleration=True) # act trajectory segment self.current_config = \ SystemConfig.init_config_from_trajectory_time_index( t_seg, t=-1)
def test_spline_rescaling(): # Set the random seed np.random.seed(seed=1) # Spline trajectory params n = 2 dt = .1 k = 10 final_times_n1 = tf.constant([[2.], [1.]]) # Goal states and initial speeds goal_posx_n11 = tf.constant([[[0.4]], [[1.]]]) goal_posy_n11 = tf.constant([[[0.]], [[1.]]]) goal_heading_n11 = tf.constant([[[0.]], [[np.pi / 2]]]) start_speed_nk1 = tf.ones((2, 1, 1), dtype=tf.float32) * 0.5 # Define the maximum speed, angular speed and maximum horizon max_speed = 0.6 max_angular_speed = 1.1 # Define start and goal configurations start_config = SystemConfig(dt, n, 1, speed_nk1=start_speed_nk1, variable=False) goal_config = SystemConfig(dt, n, k=1, position_nk2=tf.concat( [goal_posx_n11, goal_posy_n11], axis=2), heading_nk1=goal_heading_n11, variable=True) # Fit the splines p = DotMap(spline_params=DotMap(epsilon=1e-5)) spline_trajs = Spline3rdOrder(dt=dt, k=k, n=n, params=p.spline_params) spline_trajs.fit(start_config, goal_config, final_times_n1, factors=None) # Evaluate the splines ts_nk = tf.stack([ tf.linspace(0., final_times_n1[0, 0], 100), tf.linspace(0., final_times_n1[1, 0], 100) ], axis=0) spline_trajs.eval_spline(ts_nk, calculate_speeds=True) # Compute the required horizon required_horizon_n1 = spline_trajs.compute_dynamically_feasible_horizon( max_speed, max_angular_speed) assert required_horizon_n1[0, 0] < final_times_n1[0, 0] assert required_horizon_n1[1, 0] > final_times_n1[1, 0] # Compute the maximum speed and angular speed max_speed_n1 = tf.reduce_max(spline_trajs.speed_nk1(), axis=1) max_angular_speed_n1 = tf.reduce_max(tf.abs( spline_trajs.angular_speed_nk1()), axis=1) assert max_speed_n1[0, 0] < max_speed assert max_angular_speed_n1[0, 0] < max_angular_speed assert max_speed_n1[1, 0] > max_speed assert max_angular_speed_n1[1, 0] > max_angular_speed # Rescale horizon so that the trajectories are dynamically feasible spline_trajs.rescale_spline_horizon_to_dynamically_feasible_horizon( max_speed, max_angular_speed) assert np.allclose(spline_trajs.final_times_n1.numpy(), required_horizon_n1.numpy(), atol=1e-2) # Compute the maximum speed and angular speed max_speed_n1 = tf.reduce_max(spline_trajs.speed_nk1(), axis=1) max_angular_speed_n1 = tf.reduce_max(tf.abs( spline_trajs.angular_speed_nk1()), axis=1) assert max_speed_n1[0, 0] <= max_speed assert max_angular_speed_n1[0, 0] <= max_angular_speed assert max_speed_n1[1, 0] <= max_speed assert max_angular_speed_n1[1, 0] <= max_angular_speed # Find the spline trajectories that are valid valid_idxs_n = spline_trajs.find_trajectories_within_a_horizon( horizon_s=2.) assert valid_idxs_n.shape == (1, ) assert valid_idxs_n.numpy()[0] == 0
def _reset_goal_configuration(self, rng): p = self.params.reset_params.goal_config goal_norm = self.params.goal_dist_norm goal_radius = self.params.goal_cutoff_dist start_112 = self.start_config.position_nk2() obs_margin = self.params.avoid_obstacle_objective.obstacle_margin1 # Reset the goal position if p.position.reset_type == 'random': # Select a random position on the map that is at least obstacle margin away from the nearest obstacle, and # not within the goal margin of the start position. dist_to_obs = 0. dist_to_goal = 0. while dist_to_obs <= obs_margin or dist_to_goal <= goal_radius: goal_112 = self.obstacle_map.sample_point_112(rng) dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(goal_112)) dist_to_goal = np.linalg.norm((start_112 - goal_112)[0], ord=goal_norm) elif p.position.reset_type == 'custom': x, y = p.position.goal_pos goal_112 = np.array([[[x, y]]], dtype=np.float32) dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(goal_112)) assert(dist_to_obs.numpy() > 0.0) elif p.position.reset_type == 'random_v1': assert self.obstacle_map.name == 'SBPDMap' # Select a random position on the map that is at least obs_margin away from the # nearest obstacle, and not within the goal margin of the start position. # Additionaly the goal position must satisfy: # fmm_dist(start, goal) - l2_dist(start, goal) > fmm_l2_gap (goal should not be # fmm_dist(start, goal) < max_dist (goal should not be too far away) # Construct an fmm map where the 0 level set is the start position start_fmm_map = self._init_fmm_map(goal_pos_n2=self.start_config.position_nk2()[:, 0]) # enforce fmm_dist(start, goal) < max_fmm_dist free_xy = np.where(start_fmm_map.fmm_distance_map.voxel_function_mn < p.position.max_fmm_dist) free_xy = np.array(free_xy).T free_xy = free_xy[:, ::-1] free_xy_pts_m2 = self.obstacle_map._map_to_point(free_xy) # enforce dist_to_nearest_obstacle > obs_margin dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(free_xy_pts_m2[:, None])).numpy() dist_to_obs_valid_mask = dist_to_obs > obs_margin # enforce dist_to_goal > goal_radius fmm_dist_to_goal = np.squeeze(start_fmm_map.fmm_distance_map.compute_voxel_function(free_xy_pts_m2[:, None]).numpy()) fmm_dist_to_goal_valid_mask = fmm_dist_to_goal > goal_radius # enforce fmm_dist - l2_dist > fmm_l2_gap fmm_l2_gap = rng.uniform(0.0, p.position.max_dist_diff) l2_dist_to_goal = np.linalg.norm((start_112 - free_xy_pts_m2[:, None]), axis=2)[:, 0] fmm_dist_to_goal = np.squeeze(start_fmm_map.fmm_distance_map.compute_voxel_function(free_xy_pts_m2[:, None]).numpy()) fmm_l2_gap_valid_mask = fmm_dist_to_goal - l2_dist_to_goal > fmm_l2_gap valid_mask = np.logical_and.reduce((dist_to_obs_valid_mask, fmm_dist_to_goal_valid_mask, fmm_l2_gap_valid_mask)) free_xy = free_xy[valid_mask] if len(free_xy) == 0: # there are no goal points within the max_fmm_dist of start # return True so the start is reset return True goal_112 = self.obstacle_map.sample_point_112(rng, free_xy_map_m2=free_xy) else: raise NotImplementedError('Unknown reset type for the vehicle goal position.') # Initialize the goal configuration self.goal_config = SystemConfig(dt=p.dt, n=1, k=1, position_nk2=goal_112) return False
def _reset_start_configuration(self, rng): """ Reset the starting configuration of the vehicle. """ p = self.params.reset_params.start_config # Reset the position if p.position.reset_type == 'random': # Select a random position on the map that is at least obstacle margin # away from the nearest obstacle obs_margin = self.params.avoid_obstacle_objective.obstacle_margin1 dist_to_obs = 0. while dist_to_obs <= obs_margin: start_112 = self.obstacle_map.sample_point_112(rng) dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(start_112)) elif p.position.reset_type == 'custom': x, y = p.position.start_pos start_112 = np.array([[[x, y]]], dtype=np.float32) dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(start_112)) assert(dist_to_obs.numpy() > 0.0) else: raise NotImplementedError('Unknown reset type for the vehicle starting position.') # Reset the heading if p.heading.reset_type == 'zero': heading_111 = np.zeros((1, 1, 1)) elif p.heading.reset_type == 'random': heading_111 = rng.uniform(p.heading.bounds[0], p.heading.bounds[1], (1, 1, 1)) elif p.position.reset_type == 'custom': theta = p.heading.start_heading heading_111 = np.array([[[theta]]], dtype=np.float32) else: raise NotImplementedError('Unknown reset type for the vehicle starting heading.') # Reset the speed if p.speed.reset_type == 'zero': speed_111 = np.zeros((1, 1, 1)) elif p.speed.reset_type == 'random': speed_111 = rng.uniform(p.speed.bounds[0], p.speed.bounds[1], (1, 1, 1)) elif p.speed.reset_type == 'custom': speed = p.speed.start_speed speed_111 = np.array([[[speed]]], dtype=np.float32) else: raise NotImplementedError('Unknown reset type for the vehicle starting speed.') # Reset the angular speed if p.ang_speed.reset_type == 'zero': ang_speed_111 = np.zeros((1, 1, 1)) elif p.ang_speed.reset_type == 'random': ang_speed_111 = rng.uniform(p.ang_speed.bounds[0], p.ang_speed.bounds[1], (1, 1, 1)) elif p.ang_speed.reset_type == 'gaussian': ang_speed_111 = rng.normal(p.ang_speed.gaussian_params[0], p.ang_speed.gaussian_params[1], (1, 1, 1)) elif p.ang_speed.reset_type == 'custom': ang_speed = p.ang_speed.start_ang_speed ang_speed_111 = np.array([[[ang_speed]]], dtype=np.float32) else: raise NotImplementedError('Unknown reset type for the vehicle starting angular speed.') # Initialize the start configuration self.start_config = SystemConfig(dt=p.dt, n=1, k=1, position_nk2=start_112, heading_nk1=heading_111, speed_nk1=speed_111, angular_speed_nk1=ang_speed_111) # The system dynamics may need the current starting position for # coordinate transforms (i.e. realistic simulation) self.system_dynamics.reset_start_state(self.start_config)
class Simulator(SimulatorHelper): def __init__(self, params): self.params = params.simulator.parse_params(params) self.rng = np.random.RandomState(params.seed) self.obstacle_map = self._init_obstacle_map(self.rng) self.obj_fn = self._init_obj_fn() self.planner = self._init_planner() self.system_dynamics = self._init_system_dynamics() @staticmethod def parse_params(p): """ Parse the parameters to add some additional helpful parameters. """ # Parse the dependencies p.planner_params.planner.parse_params(p.planner_params) p.obstacle_map_params.obstacle_map.parse_params(p.obstacle_map_params) dt = p.planner_params.control_pipeline_params.system_dynamics_params.dt p.episode_horizon = int(np.ceil(p.episode_horizon_s / dt)) p.control_horizon = int(np.ceil(p.control_horizon_s / dt)) p.dt = dt return p # TODO: Varun. Dont clip the vehicle trajectory object, # but store the time index of when # the episode ends. Use this when plotting stuff def simulate(self): """ A function that simulates an entire episode. The agent starts at self.start_config, repeatedly calling _iterate to generate subtrajectories. Generates a vehicle_trajectory for the episode, calculates its objective value, and sets the episode_type (timeout, collision, success)""" config = self.start_config vehicle_trajectory = self.vehicle_trajectory vehicle_data = self.planner.empty_data_dict() end_episode = False commanded_actions_nkf = [] while not end_episode: trajectory_segment, next_config, data, commanded_actions_1kf = self._iterate(config) # Append to Vehicle Data for key in vehicle_data.keys(): vehicle_data[key].append(data[key]) vehicle_trajectory.append_along_time_axis(trajectory_segment) commanded_actions_nkf.append(commanded_actions_1kf) config = next_config end_episode, episode_data = self._enforce_episode_termination_conditions(vehicle_trajectory, vehicle_data, commanded_actions_nkf) self.vehicle_trajectory = episode_data['vehicle_trajectory'] self.vehicle_data = episode_data['vehicle_data'] self.vehicle_data_last_step = episode_data['vehicle_data_last_step'] self.last_step_data_valid = episode_data['last_step_data_valid'] self.episode_type = episode_data['episode_type'] self.valid_episode = episode_data['valid_episode'] self.commanded_actions_1kf = episode_data['commanded_actions_1kf'] self.obj_val = self._compute_objective_value(self.vehicle_trajectory) def _iterate(self, config): """ Runs the planner for one step from config to generate a subtrajectory, the resulting robot config after the robot executes the subtrajectory, and relevant planner data""" planner_data = self.planner.optimize(config) trajectory_segment, trajectory_data, commanded_actions_nkf = self._process_planner_data(config, planner_data) next_config = SystemConfig.init_config_from_trajectory_time_index(trajectory_segment, t=-1) return trajectory_segment, next_config, trajectory_data, commanded_actions_nkf def _process_planner_data(self, start_config, planner_data): """ Process the planners current plan. This could mean applying open loop control or LQR feedback control on a system. """ # The 'plan' is open loop control if 'trajectory' not in planner_data.keys(): trajectory, commanded_actions_nkf = self.apply_control_open_loop(start_config, planner_data['optimal_control_nk2'], T=self.params.control_horizon-1, sim_mode=self.system_dynamics.simulation_params.simulation_mode) # The 'plan' is LQR feedback control else: # If we are using ideal system dynamics the planned trajectory # is already dynamically feasible. Clip it to the control horizon if self.system_dynamics.simulation_params.simulation_mode == 'ideal': trajectory = Trajectory.new_traj_clip_along_time_axis(planner_data['trajectory'], self.params.control_horizon, repeat_second_to_last_speed=True) _, commanded_actions_nkf = self.system_dynamics.parse_trajectory(trajectory) elif self.system_dynamics.simulation_params.simulation_mode == 'realistic': trajectory, commanded_actions_nkf = self.apply_control_closed_loop(start_config, planner_data['spline_trajectory'], planner_data['k_nkf1'], planner_data['K_nkfd'], T=self.params.control_horizon-1, sim_mode='realistic') else: assert(False) self.planner.clip_data_along_time_axis(planner_data, self.params.control_horizon) return trajectory, planner_data, commanded_actions_nkf def get_observation(self, config=None, pos_n3=None, **kwargs): """ Return the robot's observation from configuration config or pos_nk3. """ return [None]*config.n def get_observation_from_data_dict_and_model(self, data_dict, model): """ Returns the robot's observation from the data inside data_dict, using parameters specified by the model. """ raise NotImplementedError def get_simulator_data_numpy_repr(self): """ Convert the vehicle trajectory, vehicle data, and vehicle data last step to numpy representations and return them. """ vehicle_trajectory = self.vehicle_trajectory.to_numpy_repr() vehicle_data = self.planner.convert_planner_data_to_numpy_repr(self.vehicle_data) vehicle_data_last_step = self.planner.convert_planner_data_to_numpy_repr(self.vehicle_data_last_step) vehicle_commanded_actions_1kf = self.commanded_actions_1kf.numpy() return vehicle_trajectory, vehicle_data, vehicle_data_last_step, vehicle_commanded_actions_1kf def _enforce_episode_termination_conditions(self, vehicle_trajectory, planner_data, commanded_actions_nkf): p = self.params time_idxs = [] for condition in p.episode_termination_reasons: time_idxs.append(self._compute_time_idx_for_termination_condition(vehicle_trajectory, condition)) try: idx = np.argmin(time_idxs) except ValueError: idx = np.argmin([time_idx.numpy() for time_idx in time_idxs]) try: termination_time = time_idxs[idx].numpy() except ValueError: termination_time = time_idxs[idx] if termination_time != np.inf: end_episode = True vehicle_trajectory.clip_along_time_axis(termination_time) planner_data, planner_data_last_step, last_step_data_valid = self.planner.mask_and_concat_data_along_batch_dim(planner_data, k=termination_time) commanded_actions_1kf = tf.concat(commanded_actions_nkf, axis=1)[:, :termination_time] # If all of the data was masked then # the episode simulated is not valid valid_episode = True if planner_data['system_config'] is None: valid_episode = False episode_data = {'vehicle_trajectory': vehicle_trajectory, 'vehicle_data': planner_data, 'vehicle_data_last_step': planner_data_last_step, 'last_step_data_valid': last_step_data_valid, 'episode_type': idx, 'valid_episode': valid_episode, 'commanded_actions_1kf': commanded_actions_1kf} else: end_episode = False episode_data = {} return end_episode, episode_data def reset(self, seed=-1): """Reset the simulator. Optionally takes a seed to reset the simulator's random state.""" if seed != -1: self.rng.seed(seed) # Note: Obstacle map must be reset independently of the fmm map. # Sampling start and goal may depend on the updated state of the # obstacle map. Updating the fmm map depends on the newly sampled goal. reset_start = True while reset_start: self._reset_obstacle_map(self.rng) self._reset_start_configuration(self.rng) reset_start = self._reset_goal_configuration(self.rng) self._update_fmm_map() self.vehicle_trajectory = Trajectory(dt=self.params.dt, n=1, k=0) self.obj_val = np.inf self.vehicle_data = {} def _reset_obstacle_map(self, rng): raise NotImplementedError def _update_fmm_map(self): raise NotImplementedError def _reset_start_configuration(self, rng): """ Reset the starting configuration of the vehicle. """ p = self.params.reset_params.start_config # Reset the position if p.position.reset_type == 'random': # Select a random position on the map that is at least obstacle margin # away from the nearest obstacle obs_margin = self.params.avoid_obstacle_objective.obstacle_margin1 dist_to_obs = 0. while dist_to_obs <= obs_margin: start_112 = self.obstacle_map.sample_point_112(rng) dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(start_112)) elif p.position.reset_type == 'custom': x, y = p.position.start_pos start_112 = np.array([[[x, y]]], dtype=np.float32) dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(start_112)) assert(dist_to_obs.numpy() > 0.0) else: raise NotImplementedError('Unknown reset type for the vehicle starting position.') # Reset the heading if p.heading.reset_type == 'zero': heading_111 = np.zeros((1, 1, 1)) elif p.heading.reset_type == 'random': heading_111 = rng.uniform(p.heading.bounds[0], p.heading.bounds[1], (1, 1, 1)) elif p.position.reset_type == 'custom': theta = p.heading.start_heading heading_111 = np.array([[[theta]]], dtype=np.float32) else: raise NotImplementedError('Unknown reset type for the vehicle starting heading.') # Reset the speed if p.speed.reset_type == 'zero': speed_111 = np.zeros((1, 1, 1)) elif p.speed.reset_type == 'random': speed_111 = rng.uniform(p.speed.bounds[0], p.speed.bounds[1], (1, 1, 1)) elif p.speed.reset_type == 'custom': speed = p.speed.start_speed speed_111 = np.array([[[speed]]], dtype=np.float32) else: raise NotImplementedError('Unknown reset type for the vehicle starting speed.') # Reset the angular speed if p.ang_speed.reset_type == 'zero': ang_speed_111 = np.zeros((1, 1, 1)) elif p.ang_speed.reset_type == 'random': ang_speed_111 = rng.uniform(p.ang_speed.bounds[0], p.ang_speed.bounds[1], (1, 1, 1)) elif p.ang_speed.reset_type == 'gaussian': ang_speed_111 = rng.normal(p.ang_speed.gaussian_params[0], p.ang_speed.gaussian_params[1], (1, 1, 1)) elif p.ang_speed.reset_type == 'custom': ang_speed = p.ang_speed.start_ang_speed ang_speed_111 = np.array([[[ang_speed]]], dtype=np.float32) else: raise NotImplementedError('Unknown reset type for the vehicle starting angular speed.') # Initialize the start configuration self.start_config = SystemConfig(dt=p.dt, n=1, k=1, position_nk2=start_112, heading_nk1=heading_111, speed_nk1=speed_111, angular_speed_nk1=ang_speed_111) # The system dynamics may need the current starting position for # coordinate transforms (i.e. realistic simulation) self.system_dynamics.reset_start_state(self.start_config) def _reset_goal_configuration(self, rng): p = self.params.reset_params.goal_config goal_norm = self.params.goal_dist_norm goal_radius = self.params.goal_cutoff_dist start_112 = self.start_config.position_nk2() obs_margin = self.params.avoid_obstacle_objective.obstacle_margin1 # Reset the goal position if p.position.reset_type == 'random': # Select a random position on the map that is at least obstacle margin away from the nearest obstacle, and # not within the goal margin of the start position. dist_to_obs = 0. dist_to_goal = 0. while dist_to_obs <= obs_margin or dist_to_goal <= goal_radius: goal_112 = self.obstacle_map.sample_point_112(rng) dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(goal_112)) dist_to_goal = np.linalg.norm((start_112 - goal_112)[0], ord=goal_norm) elif p.position.reset_type == 'custom': x, y = p.position.goal_pos goal_112 = np.array([[[x, y]]], dtype=np.float32) dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(goal_112)) assert(dist_to_obs.numpy() > 0.0) elif p.position.reset_type == 'random_v1': assert self.obstacle_map.name == 'SBPDMap' # Select a random position on the map that is at least obs_margin away from the # nearest obstacle, and not within the goal margin of the start position. # Additionaly the goal position must satisfy: # fmm_dist(start, goal) - l2_dist(start, goal) > fmm_l2_gap (goal should not be # fmm_dist(start, goal) < max_dist (goal should not be too far away) # Construct an fmm map where the 0 level set is the start position start_fmm_map = self._init_fmm_map(goal_pos_n2=self.start_config.position_nk2()[:, 0]) # enforce fmm_dist(start, goal) < max_fmm_dist free_xy = np.where(start_fmm_map.fmm_distance_map.voxel_function_mn < p.position.max_fmm_dist) free_xy = np.array(free_xy).T free_xy = free_xy[:, ::-1] free_xy_pts_m2 = self.obstacle_map._map_to_point(free_xy) # enforce dist_to_nearest_obstacle > obs_margin dist_to_obs = tf.squeeze(self.obstacle_map.dist_to_nearest_obs(free_xy_pts_m2[:, None])).numpy() dist_to_obs_valid_mask = dist_to_obs > obs_margin # enforce dist_to_goal > goal_radius fmm_dist_to_goal = np.squeeze(start_fmm_map.fmm_distance_map.compute_voxel_function(free_xy_pts_m2[:, None]).numpy()) fmm_dist_to_goal_valid_mask = fmm_dist_to_goal > goal_radius # enforce fmm_dist - l2_dist > fmm_l2_gap fmm_l2_gap = rng.uniform(0.0, p.position.max_dist_diff) l2_dist_to_goal = np.linalg.norm((start_112 - free_xy_pts_m2[:, None]), axis=2)[:, 0] fmm_dist_to_goal = np.squeeze(start_fmm_map.fmm_distance_map.compute_voxel_function(free_xy_pts_m2[:, None]).numpy()) fmm_l2_gap_valid_mask = fmm_dist_to_goal - l2_dist_to_goal > fmm_l2_gap valid_mask = np.logical_and.reduce((dist_to_obs_valid_mask, fmm_dist_to_goal_valid_mask, fmm_l2_gap_valid_mask)) free_xy = free_xy[valid_mask] if len(free_xy) == 0: # there are no goal points within the max_fmm_dist of start # return True so the start is reset return True goal_112 = self.obstacle_map.sample_point_112(rng, free_xy_map_m2=free_xy) else: raise NotImplementedError('Unknown reset type for the vehicle goal position.') # Initialize the goal configuration self.goal_config = SystemConfig(dt=p.dt, n=1, k=1, position_nk2=goal_112) return False def _compute_objective_value(self, vehicle_trajectory): p = self.params.objective_fn_params if p.obj_type == 'valid_mean': vehicle_trajectory.update_valid_mask_nk() else: assert(p.obj_type in ['valid_mean', 'mean']) obj_val = tf.squeeze(self.obj_fn.evaluate_function(vehicle_trajectory)) return obj_val def _update_obj_fn(self): """ Update the objective function to use a new obstacle_map and fmm map """ for objective in self.obj_fn.objectives: if isinstance(objective, ObstacleAvoidance): objective.obstacle_map = self.obstacle_map elif isinstance(objective, GoalDistance): objective.fmm_map = self.fmm_map elif isinstance(objective, AngleDistance): objective.fmm_map = self.fmm_map else: assert(False) def _init_obstacle_map(self, obstacle_params=None): """ Initializes a new obstacle map.""" raise NotImplementedError def _init_system_dynamics(self): """ If there is a control pipeline (i.e. model based method) return its system_dynamics. Else create a new system_dynamics instance. """ try: return self.planner.control_pipeline.system_dynamics except AttributeError: p = self.params.planner_params.control_pipeline_params.system_dynamics_params return p.system(dt=p.dt, params=p) def _init_obj_fn(self): """ Initialize the objective function. Use fmm_map = None as this is undefined until a goal configuration is specified. """ p = self.params obj_fn = ObjectiveFunction(p.objective_fn_params) if not p.avoid_obstacle_objective.empty(): obj_fn.add_objective(ObstacleAvoidance( params=p.avoid_obstacle_objective, obstacle_map=self.obstacle_map)) if not p.goal_distance_objective.empty(): obj_fn.add_objective(GoalDistance( params=p.goal_distance_objective, fmm_map=None)) if not p.goal_angle_objective.empty(): obj_fn.add_objective(AngleDistance( params=p.goal_angle_objective, fmm_map=None)) return obj_fn def _init_fmm_map(self, goal_pos_n2=None): p = self.params self.obstacle_occupancy_grid = self.obstacle_map.create_occupancy_grid_for_map() if goal_pos_n2 is None: goal_pos_n2 = self.goal_config.position_nk2()[0] return FmmMap.create_fmm_map_based_on_goal_position( goal_positions_n2=goal_pos_n2, map_size_2=np.array(p.obstacle_map_params.map_size_2), dx=p.obstacle_map_params.dx, map_origin_2=p.obstacle_map_params.map_origin_2, mask_grid_mn=self.obstacle_occupancy_grid) def _init_planner(self): p = self.params return p.planner_params.planner(simulator=self, params=p.planner_params) # Functions for computing relevant metrics # on robot trajectories def _dist_to_goal(self, trajectory): """Calculate the FMM distance between each state in trajectory and the goal.""" for objective in self.obj_fn.objectives: if isinstance(objective, GoalDistance): dist_to_goal_nk = objective.compute_dist_to_goal_nk(trajectory) return dist_to_goal_nk def _calculate_min_obs_distances(self, vehicle_trajectory): """Returns an array of dimension 1k where each element is the distance to the closest obstacle at each time step.""" pos_1k2 = vehicle_trajectory.position_nk2() obstacle_dists_1k = self.obstacle_map.dist_to_nearest_obs(pos_1k2) return obstacle_dists_1k def _calculate_trajectory_collisions(self, vehicle_trajectory): """Returns an array of dimension 1k where each element is a 1 if the robot collided with an obstacle at that time step or 0 otherwise. """ pos_1k2 = vehicle_trajectory.position_nk2() obstacle_dists_1k = self.obstacle_map.dist_to_nearest_obs(pos_1k2) return tf.cast(obstacle_dists_1k < 0.0, tf.float32) def get_metrics(self): """After the episode is over, call the get_metrics function to get metrics per episode. Returns a structure, lists of which are passed to accumulate metrics static function to generate summary statistics.""" dists_1k = self._dist_to_goal(self.vehicle_trajectory) init_dist = dists_1k[0, 0].numpy() final_dist = dists_1k[0, -1].numpy() collisions_mu = np.mean(self._calculate_trajectory_collisions(self.vehicle_trajectory)) min_obs_distances = self._calculate_min_obs_distances(self.vehicle_trajectory) return np.array([self.obj_val, init_dist, final_dist, self.vehicle_trajectory.k, collisions_mu, np.min(min_obs_distances), self.episode_type]) @staticmethod def collect_metrics(ms, termination_reasons=['Timeout', 'Collision', 'Success']): ms = np.array(ms) if len(ms) == 0: return None, None obj_vals, init_dists, final_dists, episode_length, collisions, min_obs_distances, episode_types = ms.T keys = ['Objective Value', 'Initial Distance', 'Final Distance', 'Episode Length', 'Collisions_Mu', 'Min Obstacle Distance'] vals = [obj_vals, init_dists, final_dists, episode_length, collisions, min_obs_distances] # mean, 25 percentile, median, 75 percentile fns = [np.mean, lambda x: np.percentile(x, q=25), lambda x: np.percentile(x, q=50), lambda x: np.percentile(x, q=75)] fn_names = ['mu', '25', '50', '75'] out_vals, out_keys = [], [] for k, v in zip(keys, vals): for fn, name in zip(fns, fn_names): _ = fn(v) out_keys.append('{:s}_{:s}'.format(k, name)) out_vals.append(_) # Log the number of episodes num_episodes = len(episode_types) out_keys.append('Number Episodes') out_vals.append(num_episodes) # Log Percet Collision, Timeout, Success, Etc. for i, reason in enumerate(termination_reasons): out_keys.append('Percent {:s}'.format(reason)) out_vals.append(1.*np.sum(episode_types == i) / num_episodes) # Log the Mean Episode Length for Each Episode Type episode_idxs = np.where(episode_types == i)[0] episode_length_for_this_episode_type = episode_length[episode_idxs] if len(episode_length_for_this_episode_type) > 0: mean_episode_length_for_this_episode_type = np.mean(episode_length_for_this_episode_type) out_keys.append('Mean Episode Length for {:s} Episodes'.format(reason)) out_vals.append(mean_episode_length_for_this_episode_type) return out_keys, out_vals def start_recording_video(self, video_number): """ By default the simulator does not support video capture.""" return None def stop_recording_video(self, video_number, video_filename): """ By default the simulator does not support video capture.""" return None def render(self, axs, freq=4, render_velocities=False, prepend_title=''): if type(axs) is list or type(axs) is np.ndarray: self._render_trajectory(axs[0], freq) if render_velocities: self._render_velocities(axs[1], axs[2]) [ax.set_title('{:s}{:s}'.format(prepend_title, ax.get_title())) for ax in axs] else: self._render_trajectory(axs, freq) axs.set_title('{:s}{:s}'.format(prepend_title, axs.get_title())) def _render_obstacle_map(self, ax): raise NotImplementedError def _render_trajectory(self, ax, freq=4): p = self.params self._render_obstacle_map(ax) if 'waypoint_config' in self.vehicle_data.keys(): self.vehicle_trajectory.render([ax], freq=freq, plot_quiver=False) self._render_waypoints(ax) else: self.vehicle_trajectory.render([ax], freq=freq, plot_quiver=True) boundary_params = {'norm': p.goal_dist_norm, 'cutoff': p.goal_cutoff_dist, 'color': 'g'} self.start_config.render(ax, batch_idx=0, marker='o', color='blue') self.goal_config.render_with_boundary(ax, batch_idx=0, marker='*', color='black', boundary_params=boundary_params) goal = self.goal_config.position_nk2()[0, 0] start = self.start_config.position_nk2()[0, 0] text_color = p.episode_termination_colors[self.episode_type] ax.set_title('Start: [{:.2f}, {:.2f}] '.format(*start) + 'Goal: [{:.2f}, {:.2f}]'.format(*goal), color=text_color) final_pos = self.vehicle_trajectory.position_nk2()[0, -1] ax.set_xlabel('Cost: {cost:.3f} '.format(cost=self.obj_val) + 'End: [{:.2f}, {:.2f}]'.format(*final_pos), color=text_color) def _render_waypoints(self, ax): # Plot the system configuration and corresponding # waypoint produced in the same color system_configs = self.vehicle_data['system_config'] waypt_configs = self.vehicle_data['waypoint_config'] cmap = matplotlib.cm.get_cmap(self.params.waypt_cmap) for i, (system_config, waypt_config) in enumerate(zip(system_configs, waypt_configs)): color = cmap(i / system_configs.n) system_config.render(ax, batch_idx=0, plot_quiver=True, marker='o', color=color) # Render the waypoint's number at each # waypoint's location pos_2 = waypt_config.position_nk2()[0, 0].numpy() ax.text(pos_2[0], pos_2[1], str(i), color=color) def _render_velocities(self, ax0, ax1): speed_k = self.vehicle_trajectory.speed_nk1()[0, :, 0].numpy() angular_speed_k = self.vehicle_trajectory.angular_speed_nk1()[0, :, 0].numpy() time = np.r_[:self.vehicle_trajectory.k]*self.vehicle_trajectory.dt if self.system_dynamics.simulation_params.simulation_mode == 'realistic': ax0.plot(time, speed_k, 'r--', label='Applied') ax0.plot(time, self.commanded_actions_1kf[0, :, 0], 'b--', label='Commanded') ax0.set_title('Linear Velocity') ax0.legend() ax1.plot(time, angular_speed_k, 'r--', label='Applied') ax1.plot(time, self.commanded_actions_1kf[0, :, 1], 'b--', label='Commanded') ax1.set_title('Angular Velocity') ax1.legend() else: ax0.plot(time, speed_k, 'r--') ax0.set_title('Linear Velocity') ax1.plot(time, angular_speed_k, 'r--') ax1.set_title('Angular Velocity')
def test_planner(): # Create parameters # Create planner parameters planner_params = create_planner_params() sim_params = create_sim_params() # Define the objective # objective = ObstacleAvoidance(params=p.avoid_obstacle_objective, # obstacle_map=obstacle_map) sim = SBPDSimulator(sim_params) splanner = SamplingPlanner(sim, planner_params) # Spline trajectory params n = 1 dt = 0.1 # Goal states and initial speeds goal_pos_n11 = tf.constant([[[8., 12.5]] ]) # Goal position (must be 1x1x2 array) goal_heading_n11 = tf.constant([[[np.pi / 2.]]]) # Start states and initial speeds start_pos_n11 = tf.constant([[[22., 16.5]] ]) # Goal position (must be 1x1x2 array) start_heading_n11 = tf.constant([[[-np.pi]]]) start_speed_nk1 = tf.ones((1, 1, 1), dtype=tf.float32) * 0.0 # Define start and goal configurations # start_config = SystemConfig(dt, n, 1, speed_nk1=start_speed_nk1, variable=False) start_config = SystemConfig(dt, n, k=1, position_nk2=start_pos_n11, heading_nk1=start_heading_n11, speed_nk1=start_speed_nk1, variable=False) goal_config = SystemConfig(dt, n, k=1, position_nk2=goal_pos_n11, heading_nk1=goal_heading_n11, variable=True) # waypts, horizons, trajectories_lqr, trajectories_spline, controllers = controller.plan(start_config, goal_config) splanner.simulator.reset_with_start_and_goal(start_config, goal_config) splanner.optimize(start_config) splanner.simulator.simulate() # Visualization fig = plt.figure() # obstacle_map.render(ax) # ax.plot(pos_nk2[0, :, 0].numpy(), pos_nk2[0, :, 1].numpy(), 'r.') # ax.plot(trajectory._position_nk2[0, :, 0],trajectory._position_nk2[0, :, 1], 'r-') ax = fig.add_subplot(1, 3, 1) splanner.simulator.render(ax) ax = fig.add_subplot(1, 3, 2) splanner.simulator.render(ax, zoom=4) #ax.plot(objective[0, 0], objective[0, 1], 'k*') # ax.set_title('obstacle map') ax = fig.add_subplot(1, 3, 3) splanner.simulator.vehicle_trajectory.render(ax, plot_quiver=True) splanner.simulator._render_waypoints(ax, plot_quiver=True, text_offset=(-1.5, 0.1)) # ax = fig.add_subplot(1,3,3) # ax.plot(opt_traj._position_nk2[0, :, 0],opt_traj._position_nk2[0, :, 1], 'r-') # ax.set_title('opt_traj') # Plotting the "distance map" # ax = fig.add_subplot(1,2,2) # ax.imshow(distance_map, origin='lower') # ax.set_title('distance map') fig.savefig('./tests/planner/test_planner.png', bbox_inches='tight', pad_inches=0)
def test_spline_3rd_order(visualize=False): np.random.seed(seed=1) n = 5 dt = .01 k = 100 target_state = np.random.uniform(-np.pi, np.pi, 3) v0 = np.random.uniform(0., 0.5, 1)[0] # Initial speed vf = 0. # Initial SystemConfig is [0, 0, 0, v0, 0] start_speed_nk1 = tf.ones((n, 1, 1), dtype=tf.float32) * v0 goal_posx_nk1 = tf.ones((n, 1, 1), dtype=tf.float32) * target_state[0] goal_posy_nk1 = tf.ones((n, 1, 1), dtype=tf.float32) * target_state[1] goal_pos_nk2 = tf.concat([goal_posx_nk1, goal_posy_nk1], axis=2) goal_heading_nk1 = tf.ones((n, 1, 1), dtype=tf.float32) * target_state[2] goal_speed_nk1 = tf.ones((n, 1, 1), dtype=tf.float32) * vf start_config = SystemConfig(dt, n, 1, speed_nk1=start_speed_nk1, variable=False) goal_config = SystemConfig(dt, n, 1, position_nk2=goal_pos_nk2, speed_nk1=goal_speed_nk1, heading_nk1=goal_heading_nk1, variable=True) start_nk5 = start_config.position_heading_speed_and_angular_speed_nk5() start_n5 = start_nk5[:, 0] goal_nk5 = goal_config.position_heading_speed_and_angular_speed_nk5() goal_n5 = goal_nk5[:, 0] p = DotMap(spline_params=DotMap(epsilon=1e-5)) ts_nk = tf.tile(tf.linspace(0., dt * k, k)[None], [n, 1]) spline_traj = Spline3rdOrder(dt=dt, k=k, n=n, params=p.spline_params) spline_traj.fit(start_config, goal_config, factors=None) spline_traj.eval_spline(ts_nk, calculate_speeds=True) pos_nk3 = spline_traj.position_and_heading_nk3() v_nk1 = spline_traj.speed_nk1() start_pos_diff = (pos_nk3 - start_n5[:, None, :3])[:, 0] goal_pos_diff = (pos_nk3 - goal_n5[:, None, :3])[:, -1] assert (np.allclose(start_pos_diff, np.zeros((n, 3)), atol=1e-6)) assert (np.allclose(goal_pos_diff, np.zeros((n, 3)), atol=1e-6)) start_vel_diff = (v_nk1 - start_n5[:, None, 3:4])[:, 0] goal_vel_diff = (v_nk1 - goal_n5[:, None, 3:4])[:, -1] assert (np.allclose(start_vel_diff, np.zeros((n, 1)), atol=1e-6)) assert (np.allclose(goal_vel_diff, np.zeros((n, 1)), atol=1e-6)) if visualize: fig = plt.figure() ax = fig.add_subplot(111) spline_traj.render(ax, freq=4) plt.show()