def evaluate_objective(self, trajectory): optimal_angular_orientation_nk = self.fmm_map.fmm_angle_map.compute_voxel_function( trajectory.position_nk2()) angular_dist_to_optimal_path_nk = angle_normalize( trajectory.heading_nk1()[:, :, 0] - optimal_angular_orientation_nk) return self.p.angle_cost * np.power( np.abs(angular_dist_to_optimal_path_nk), self.p.power)
def convert_position_and_heading_to_ego_coordinates(ref_position_and_heading_n13, world_position_and_heading_nk3): """ Converts a sequence of position and headings to the ego frame.""" position_nk2 = world_position_and_heading_nk3[:, :, :2] - ref_position_and_heading_n13[:, :, :2] position_nk2 = rotate_pos_nk2(position_nk2, -ref_position_and_heading_n13[:, :, 2:3]) heading_nk1 = angle_normalize(world_position_and_heading_nk3[:, :, 2:3] - ref_position_and_heading_n13[:, :, 2:3]) return tf.concat([position_nk2, heading_nk1], axis=2)
def apply_control_closed_loop(self, start_config, trajectory_ref, k_array_nTf1, K_array_nTfd, T, sim_mode='ideal'): """ Apply LQR feedback control to the system to track trajectory_ref Here k_array_nTf1 and K_array_nTfd are tensors of dimension (n, self.T-1, f, 1) and (n, self.T-1, f, d) respectively. """ with tf.name_scope('apply_control'): x0_n1d, _ = self.system_dynamics.parse_trajectory(start_config) assert(len(x0_n1d.shape) == 3) # [n,1,x_dim] angle_dims = self.system_dynamics._angle_dims commanded_actions_nkf = [] applied_actions = [] states = [x0_n1d*1.] x_ref_nkd, u_ref_nkf = self.system_dynamics.parse_trajectory(trajectory_ref) x_next_n1d = x0_n1d*1. for t in range(T): x_ref_n1d, u_ref_n1f = x_ref_nkd[:, t:t+1], u_ref_nkf[:, t:t+1] error_t_n1d = x_next_n1d - x_ref_n1d # TODO: Currently calling numpy() here as tfe.DEVICE_PLACEMENT_SILENT # is not working to place non-gpu ops (i.e. mod) on the cpu # turning tensors into numpy arrays is a hack around this. error_t_n1d = tf.concat([error_t_n1d[:, :, :angle_dims], angle_normalize(error_t_n1d[:, :, angle_dims:angle_dims+1].numpy()), error_t_n1d[:, :, angle_dims+1:]], axis=2) fdback_nf1 = tf.matmul(K_array_nTfd[:, t], tf.transpose(error_t_n1d, perm=[0, 2, 1])) u_n1f = u_ref_n1f + tf.transpose(k_array_nTf1[:, t] + fdback_nf1, perm=[0, 2, 1]) x_next_n1d = self.system_dynamics.simulate(x_next_n1d, u_n1f, mode=sim_mode) commanded_actions_nkf.append(u_n1f) # Append the applied action to the action list if sim_mode == 'ideal': applied_actions.append(u_n1f) elif sim_mode == 'realistic': # TODO: This line is intended for a real hardware setup. # If running this code on a real robot the user will need to # implement hardware.state_dx such that it reflects the current # sensor reading of the robot's applied actions applied_actions.append(np.array(self.system_dynamics.hardware.state_dx*1.)[None, None]) else: assert(False) states.append(x_next_n1d) commanded_actions_nkf.append(u_n1f) commanded_actions_nkf = tf.concat(commanded_actions_nkf, axis=1) u_nkf = tf.concat(applied_actions, axis=1) x_nkd = tf.concat(states, axis=1) trajectory = self.system_dynamics.assemble_trajectory(x_nkd, u_nkf, pad_mode='repeat') return trajectory, commanded_actions_nkf
def convert_position_and_heading_to_world_coordinates( ref_position_and_heading_n13, ego_position_and_heading_nk3): """ Converts a sequence of position and headings to the world frame.""" position_nk2 = rotate_pos_nk2(ego_position_and_heading_nk3[:, :, :2], ref_position_and_heading_n13[:, :, 2:3]) position_nk2 = position_nk2 + ref_position_and_heading_n13[:, :, :2] heading_nk1 = angle_normalize(ego_position_and_heading_nk3[:, :, 2:3] + ref_position_and_heading_n13[:, :, 2:3]) return tf.concat([position_nk2, heading_nk1], axis=2)
def compute_closest_waypt_idx(self, desired_waypt_config, waypt_configs): """" Given desired_waypoint_config and a list of precomputed waypoints in waypt_configs returns the index of the closest (in wrapped l2 distance) precomputed waypoint.""" # TODO: Potentially add linear and angular velocity here diff_pos_nk2 = desired_waypt_config.position_nk2( ) - waypt_configs.position_nk2() diff_heading_nk1 = angle_normalize( desired_waypt_config.heading_nk1().numpy() - waypt_configs.heading_nk1().numpy()) diff = tf.concat([diff_pos_nk2, diff_heading_nk1], axis=2) idx = tf.argmin(tf.norm(diff, axis=2)) return idx.numpy()[0]
def apply_control(self, start_config, trajectory, k_array_nTf1, K_array_nTfd, sim_mode='ideal'): """ apply the derived control to the error system to derive a new trajectory. Here k_array_nTf1 and K_aaray_nTfd are tensors of dimension (n, self.T-1, f, 1) and (n, self.T-1, f, d) respectively. """ with tf.name_scope('apply_control'): x0_n1d, _ = self.plant_dyn.parse_trajectory(start_config) assert (len(x0_n1d.shape) == 3) # [n,1,x_dim] angle_dims = self.plant_dyn._angle_dims actions = [] states = [x0_n1d * 1.] x_ref_nkd, u_ref_nkf = self.plant_dyn.parse_trajectory(trajectory) x_next_n1d = x0_n1d * 1. for t in range(self.T): x_ref_n1d, u_ref_n1f = x_ref_nkd[:, t:t + 1], u_ref_nkf[:, t:t + 1] error_t_n1d = x_next_n1d - x_ref_n1d # TODO: Currently calling numpy() here as tfe.DEVICE_PLACEMENT_SILENT # is not working to place non-gpu ops (i.e. mod) on the cpu # turning tensors into numpy arrays is a hack around this. error_t_n1d = tf.concat([ error_t_n1d[:, :, :angle_dims], angle_normalize( error_t_n1d[:, :, angle_dims:angle_dims + 1].numpy()), error_t_n1d[:, :, angle_dims + 1:] ], axis=2) fdback_nf1 = tf.matmul( K_array_nTfd[:, t], tf.transpose(error_t_n1d, perm=[0, 2, 1])) u_n1f = u_ref_n1f + tf.transpose( k_array_nTf1[:, t] + fdback_nf1, perm=[0, 2, 1]) x_next_n1d = self.fwdSim(x_next_n1d, u_n1f, mode=sim_mode) actions.append(u_n1f) states.append(x_next_n1d) u_nkf = tf.concat(actions, axis=1) x_nkd = tf.concat(states, axis=1) trajectory = self.plant_dyn.assemble_trajectory(x_nkd, u_nkf, pad_mode='repeat') return trajectory
def compute_closest_waypt_idx(self, desired_waypt_config, waypt_configs): """" Given desired_waypoint_config and a list of precomputed waypoints in waypt_configs returns the index of the closest (in wrapped l2 distance) precomputed waypoint.""" # TODO: Potentially add linear and angular velocity here broadcasted_goal = np.ones(shape=waypt_configs.position_nk2().shape, dtype=np.float32) * desired_waypt_config.position_nk2()[0] diff_pos_nk2 = broadcasted_goal - waypt_configs.position_nk2() broadcasted_goal_angle = np.ones(shape=waypt_configs.heading_nk1().shape, dtype=np.float32) * desired_waypt_config.heading_nk1()[0] diff_heading_nk1 = angle_normalize(broadcasted_goal_angle - waypt_configs.heading_nk1().numpy()) # if(obstacle_map is not None): # dist_nearest_obst = tf.map_fn(lambda x: obstacle_map.dist_to_nearest_obs([x]), waypt_configs.position_nk2()) # else: # dist_nearest_obst = np.zeros(shape=waypt_configs.position_nk2().shape, dtype=np.float32) diff = tf.concat([diff_pos_nk2, diff_heading_nk1], axis=2) idx = tf.argmin(tf.norm(diff, axis=2)) return idx.numpy()[0]
def convert_position_and_heading_to_world_coordinates(ref_position_and_heading_n13, ego_position_and_heading_nk3): """ Converts a sequence of position and headings to the world frame. the ref_position_and_heading_n13 is the base parameters for the world frame [0,0,0] """ position_nk2 = rotate_pos_nk2(ego_position_and_heading_nk3[:, :, :2], ref_position_and_heading_n13[:, :, 2:3]) ref_position_n12 = ref_position_and_heading_n13[:, :, :2] # x and y coordinates ref_position_0 = ref_position_n12[0] ref_position_nk2 = np.broadcast_to(ref_position_0, ego_position_and_heading_nk3[:, :, :2].shape) ref_heading_n11 = ref_position_and_heading_n13[:, :, 2:3] ref_heading_0 = ref_heading_n11[0] ref_heading_nk1 = np.broadcast_to(ref_heading_0, ego_position_and_heading_nk3[:, :, 2:3].shape) position_nk2 = position_nk2 + ref_position_nk2 heading_nk1 = angle_normalize(ego_position_and_heading_nk3[:, :, 2:3] + ref_heading_nk1) return tf.concat([position_nk2, heading_nk1], axis=2)
def test_lqr_feedback_coordinate_transform(): p = create_params() n, k = p.n, p.k dubins = p.system_dynamics_params.system(p.dt, params=p.simulation_params) # # Robot starts from (0, 0, 0) # # and does a small spiral start_pos_n13 = tf.constant(np.array([[[0.0, 0.0, 0.0]]], dtype=np.float32)) speed_nk1 = np.ones((n, k - 1, 1), dtype=np.float32) * 2.0 angular_speed_nk1 = np.linspace(1.5, 1.3, k - 1, dtype=np.float32)[None, :, None] u_nk2 = tf.constant(np.concatenate([speed_nk1, angular_speed_nk1], axis=2)) traj_ref_egocentric = dubins.simulate_T(start_pos_n13, u_nk2, k) cost_fn = QuadraticRegulatorRef(traj_ref_egocentric, dubins, p) lqr_solver = LQRSolver(T=k - 1, dynamics=dubins, cost=cost_fn) start_config0 = SystemConfig(dt=p.dt, n=p.n, k=1, position_nk2=start_pos_n13[:, :, :2], heading_nk1=start_pos_n13[:, :, 2:3]) lqr_res = lqr_solver.lqr(start_config0, traj_ref_egocentric, verbose=False) traj_lqr_egocentric = lqr_res['trajectory_opt'] K_array_egocentric = lqr_res['K_opt_nkfd'] k_array = lqr_res['k_opt_nkf1'] # The origin of the egocentric frame in world coordinates start_pos_n13 = tf.constant(np.array([[[1.0, 1.0, np.pi / 2.]]], dtype=np.float32)) ref_config = SystemConfig(dt=p.dt, n=p.n, k=1, position_nk2=start_pos_n13[:, :, :2], heading_nk1=start_pos_n13[:, :, 2:3]) # Convert to world coordinates traj_ref_world = dubins.to_world_coordinates(ref_config, traj_ref_egocentric, mode='new') traj_lqr_world = dubins.to_world_coordinates(ref_config, traj_lqr_egocentric, mode='new') K_array_world = dubins.convert_K_to_world_coordinates(ref_config, K_array_egocentric, mode='new') # Apply K_array_world to the system from ref_config traj_test_world = lqr_solver.apply_control(ref_config, traj_ref_world, k_array, K_array_world) # Check that the LQR Trajectory computed using K_array_egocentric # then transformed to world (traj_lqr_world) matches the # LQR Trajectory computed directly using K_array_world assert((tf.norm(traj_lqr_world.position_nk2() - traj_test_world.position_nk2(), axis=2).numpy() < 1e-4).all()) assert((tf.norm(angle_normalize(traj_lqr_world.heading_nk1() - traj_test_world.heading_nk1()), axis=2).numpy() < 1e-4).all()) assert((tf.norm(traj_lqr_world.speed_nk1() - traj_test_world.speed_nk1(), axis=2).numpy() < 1e-4).all()) assert((tf.norm(traj_lqr_world.angular_speed_nk1() - traj_test_world.angular_speed_nk1(), axis=2).numpy() < 1e-4).all())
def construct_z(self, trajectory): """ Input: A trajectory with x_dim =d and u_dim=f Output: z_nkg - a tensor of size n,k,g where g=d+f """ # with tf.name_scope('construct_z'): x_nkd, u_nkf = self.system.parse_trajectory(trajectory) x_ref_nkd, u_ref_nkf = self.system.parse_trajectory( self.trajectory_ref) delx_nkd = x_nkd - x_ref_nkd delu_nkf = u_nkf - u_ref_nkf z_nkg = np.concatenate([ delx_nkd[:, :, :self.angle_dims], angle_normalize(delx_nkd[:, :, self.angle_dims:self.angle_dims + 1]), delx_nkd[:, :, self.angle_dims + 1:], delu_nkf ], axis=2) return z_nkg
def back_propagation(self, trajectory): """ Back propagation along the given trajectory (state and control), solving the Riccati equations for the error system (delta_x, delta_u, t). Need to approximate the dynamics/costs along the given trajectory. Dynamics needs a time-varying first-order approximation. Costs need time-varying second-order approximation. """ # with tf.name_scope('back_prop'): angle_dims = self.plant_dyn._angle_dims lqr_sys = self.build_lqr_system(trajectory) x_nkd, u_nkf = self.plant_dyn.parse_trajectory(trajectory) # k (feedforward) and K (feedback) are lists of length self.T # where each element is a tensor of dimension (n, f, 1) # and (n, f, d) respectively. fdfwd_Tnf1 = [None] * self.T fdbck_gain_Tnfd = [None] * self.T # initialize with the terminal cost parameters # to prepare the backpropagation Vxx_ndd = lqr_sys['dldxx_nkdd'][:, -1] Vx_nd1 = lqr_sys['dldx_nkd'][:, -1, :, None] # TODO: Currently calling numpy() here as tfe.DEVICE_PLACEMENT_SILENT # is not working to place non-gpu ops (i.e. mod) on the cpu # turning tensors into numpy arrays is a hack around this. for t in reversed(range(self.T)): error_t_nd = lqr_sys['f_nkd'][:, t] - x_nkd[:, t + 1] error_t_nd = np.concatenate([ error_t_nd[:, :angle_dims], angle_normalize(error_t_nd[:, angle_dims:angle_dims + 1]), error_t_nd[:, angle_dims + 1:] ], axis=1) error_t_nd1 = error_t_nd[:, :, None] dfdx_ndd = lqr_sys['dfdx_nkdd'][:, t] dfdu_ndf = lqr_sys['dfdu_nkdf'][:, t] dfdx_T_ndd = np.transpose(dfdx_ndd, axes=[0, 2, 1]) dfdu_T_ndf = np.transpose(dfdu_ndf, axes=[0, 2, 1]) dfdx_T_dot_Vxx_ndd = np.matmul(dfdx_T_ndd, Vxx_ndd) dfdu_T_dot_Vxx_nfd = np.matmul(dfdu_T_ndf, Vxx_ndd) Qx_nd1 = (lqr_sys['dldx_nkd'][:, t][:, :, None] + np.matmul(dfdx_T_ndd, Vx_nd1) + np.matmul(dfdx_T_dot_Vxx_ndd, error_t_nd1)) Qu_nf1 = (lqr_sys['dldu_nkf'][:, t][:, :, None] + np.matmul(dfdu_T_ndf, Vx_nd1) + np.matmul(dfdu_T_dot_Vxx_nfd, error_t_nd1)) Qxx_ndd = lqr_sys['dldxx_nkdd'][:, t] + \ np.matmul(dfdx_T_dot_Vxx_ndd, dfdx_ndd) Qux_nfd = lqr_sys['dldux_nkfd'][:, t] + \ np.matmul(dfdu_T_dot_Vxx_nfd, dfdx_ndd) Quu_nff = lqr_sys['dlduu_nkff'][:, t] + \ np.matmul(dfdu_T_dot_Vxx_nfd, dfdu_ndf) # use regularized inverse for numerical stability inv_Quu_nff = self.regularized_pseudo_inverse_(Quu_nff, reg=self.reg) # get k and K fdfwd_Tnf1[t] = np.matmul(-inv_Quu_nff, Qu_nf1) fdbck_gain_Tnfd[t] = np.matmul(-inv_Quu_nff, Qux_nfd) fdbck_gain_nfd = np.transpose(fdbck_gain_Tnfd[t], axes=[0, 2, 1]) # update value function for the previous time step Vxx_ndd = Qxx_ndd - np.matmul(np.matmul(fdbck_gain_nfd, Quu_nff), fdbck_gain_Tnfd[t]) Vx_nd1 = Qx_nd1 - \ np.matmul(np.matmul(fdbck_gain_nfd, Quu_nff), fdfwd_Tnf1[t]) # Stack the outer time dimension as dimension 1 # in the tensors (tf.stack is equivalent to np.asarray) fdfwd_nTf1 = np.stack(fdfwd_Tnf1, axis=1) fdbck_gain_nTfd = np.stack(fdbck_gain_Tnfd, axis=1) return fdfwd_nTf1, fdbck_gain_nTfd