def compute_next_pose(self): # init_pose (3,) [x, y, theta] # init_input (8,): # 0 1 2 3 4 5 6 7 # xdot, ydot, thetadot, sin(theta), cos(theta), vel, delta, dt torch.normal(0, self.sigma, out=self.noise) # Create nn_input self.nn_input[:, 0:3] = self.pose_dot self.nn_input[:, 5:7] = self.ctrl # TODO: Add noise? self.nn_input[:, 5:7].add_(self.noise) # (K,2) + (K,2) # Add noise self.nn_input[:, 5].clamp_(self.MIN_VEL, self.MAX_VEL) self.nn_input[:, 6].clamp_(self.MIN_DEL, self.MAX_DEL) torch.sin(self.pose[:, 2], out=self.nn_input[:, 3]) # sin(theta) torch.cos(self.pose[:, 2], out=self.nn_input[:, 4]) # cos(theta) # pose_dot = model(nn_input) # Call model to learn new pose_dot pose_dot = self.model(Variable(self.nn_input, requires_grad=False)) # (K, 3) self.pose_dot = pose_dot.data # pose += pose_dot # update pose self.pose.add_(self.pose_dot) # Update pose #Utils.clamp_angle_tensor_(pose_dot[:,2]) Utils.clamp_angle_tensor_(self.pose[:, 2])
def cost(self, pose, goal, ctrl, noise, t): # (K,3), (3,), (2,), (K,2), 1 # input shapes # This cost function drives the behavior of the car. You want to specify a # cost function that penalizes behavior that is bad with high cost, and # encourages good behavior with low cost. # We have split up the cost function for you to a) get the car to the goal # b) avoid driving into walls and c) the MPPI control penalty to stay # smooth # You should feel free to explore other terms to get better or unique # behavior self.cur_pose.copy_(pose) # TODO: threshold the cost to 0 we are close enough ## FIRST - get the pose cost self.dist_to_goal.copy_(self.cur_pose).sub_(goal) # (K,3) xy_to_goal = self.dist_to_goal[:, :2] # (K,2) xy_to_goal.norm(p=2, dim=1, out=self.euclidian_distance) self.euclidian_distance.pow(2, out=self.euclidian_distance).mul_(1.2) theta_distance = self.dist_to_goal[:, 2] Utils.clamp_angle_tensor_(theta_distance) theta_distance.abs_().mul_(0.8 * (1.2**(t * 0.5))) self.pose_cost.copy_(self.euclidian_distance).add_(theta_distance) ## SECOND - get the control cost, taking the abs self.ctrl_cost.copy_(ctrl).mul_(self._lambda).mul_( self.inv_sigma).mul_(noise).mul_(1.9) self.ctrl_cost.abs_().sum( dim=1, out=self.running_cost) # (K,) copied into running_cost ## THIRD - do a bounds check Utils.world_to_map_torch(self.cur_pose, self.map_info, self.map_angle, self.map_c, self.map_s) # cur pose is now in pixels xs = self.cur_pose[:, 0] ys = self.cur_pose[:, 1] # scary bounds # bad_ks = (xs < 0) # bad_ks |= (xs >= self.map_width) # byte tensors, NOT indices # bad_ks |= (ys < 0) # bad_ks |= (ys >= self.map_height) # byte tensors, NOT indices # xs *= (~bad_ks).float() # (K,) modified bad coords to be in range, so we can index the map # ys *= (~bad_ks).float() # (K,) modified bad coords to be in range, so we can index the map self.bounds_check.copy_( self.permissible_region[ys.long(), xs.long()]) # (K,) with map value 0 or 1 BOUNDS_COST = 10000 self.bounds_check.mul_(BOUNDS_COST * t) self.pose_cost.mul_(2.5) # running cost already contains ctrl_cost self.running_cost.add_(self.pose_cost).add_(self.bounds_check)
def cost(self, pose, goal, ctrl, noise, t): # input shapes: (K,3), (3,), (2,), (K,2) # This cost function drives the behavior of the car. You want to specify a # cost function that penalizes behavior that is bad with high cost, and # encourages good behavior with low cost. # We have split up the cost function for you to a) get the car to the goal # b) avoid driving into walls and c) the MPPI control penalty to stay # smooth # You should feel free to explore other terms to get better or unique # behavior cur_pose = pose.clone() # TODO: threshold the cost to 0 we are close enough ## FIRST - get the pose cost dist_to_goal = cur_pose - goal # (K,3) euclidian_distance = dist_to_goal[:, :2].norm(p=2, dim=1).pow(2) # (K,) theta_distance = dist_to_goal[:, 2] Utils.clamp_angle_tensor_(theta_distance) theta_distance.abs_() #self.pose_cost = euclidian_distance * 1.7 + theta_distance * 1.1 * (1.2 ** (t * 0.5)) self.pose_cost = euclidian_distance * 1.2 + theta_distance * 0.8 * ( 1.2**(t * 0.5)) ## SECOND - get the control cost, taking the abs ctrl_cost = self._lambda * ctrl * self.inv_sigma * noise # (K,2) ctrl_cost = ctrl_cost.abs().sum(dim=1) # (K,) ## THIRD - do a bounds check Utils.world_to_map_torch(cur_pose, self.map_info, self.map_angle, self.map_c, self.map_s) # cur pose is now in pixels xs = cur_pose[:, 0] ys = cur_pose[:, 1] # bad_ks = (xs < 0) # bad_ks |= (xs >= self.map_width) # byte tensors, NOT indices # bad_ks |= (ys < 0) # bad_ks |= (ys >= self.map_height) # byte tensors, NOT indices # xs *= (~bad_ks).float() # (K,) modified bad coords to be in range, so we can index the map # ys *= (~bad_ks).float() # (K,) modified bad coords to be in range, so we can index the map self.bad_ks[:] = self.permissible_region[ ys.long(), xs.long()] # (K,) with map value 0 or 1 BOUNDS_COST = 10000 bounds_check = self.bad_ks * BOUNDS_COST * t self.pose_cost *= 2.5 ctrl_cost *= 1.9 self.running_cost += self.pose_cost self.running_cost += ctrl_cost self.running_cost += bounds_check
def mppi(self, init_pose, init_input): # init_pose (3,) [x, y, theta] # init_input (8,): # 0 1 2 3 4 5 6 7 # xdot, ydot, thetadot, sin(theta), cos(theta), vel, delta, dt t0 = time.time() dt = 0.1 self.running_cost.zero_() # convert pose into torch and one for each trajectory pose = init_pose.repeat(self.K, 1) # pose (K, 3) # create one input for each trajectory init_input[2] = Utils.clamp_angle(init_input[2]) nn_input = init_input.repeat(self.K, 1) # nn_input (K, 8) pose_dot = nn_input[:, :3] # pose_dot (K, 3) # MPPI should generate noise according to sigma torch.normal(0, self.sigma, out=self.noise) # Perform rollouts with those controls from your current pose for t in xrange(self.T): # Update nn_inputs with new pose information nn_input[:, 0:3] = pose_dot # xdot, ydot, thetadot nn_input[:, 7] = dt # dt torch.sin(pose[:, 2], out=nn_input[:, 3]) # sin(theta) torch.cos(pose[:, 2], out=nn_input[:, 4]) # cos(theta) # combine noise with central control sequence nn_input[:, 5:7] = self.ctrl[t] + self.noise[t] # (2,) + (K,2) nn_input[:, 5].clamp_(self.MIN_VEL, self.MAX_VEL) nn_input[:, 6].clamp_(self.MIN_DEL, self.MAX_DEL) # Call model to learn new pose_dot pose_dot = self.model(Variable(nn_input, requires_grad=False)) # (K, 3) pose_dot = pose_dot.data Utils.clamp_angle_tensor_(pose_dot[:, 2]) pose += pose_dot # Update pose Utils.clamp_angle_tensor_(pose[:, 2]) # add new pose into poses self.poses[:, t, :] = pose.clone() # poses[:,t,:] (K,3) = pose (K,3) self.cost(pose, self.goal_tensor, self.ctrl[t], self.noise[t], t) # (K,3), (3,), (2,), (K,2) => (K,) # Perform the MPPI weighting on your calculatd costs # Scale the added noise by the weighting and add to your control sequence beta = torch.min(self.running_cost) self.running_cost -= beta self.running_cost /= -self._lambda torch.exp(self.running_cost, out=self.running_cost) weights = self.running_cost weights /= torch.sum(weights) # weights (K,) # apply weights to noise through time for each trajectory weights = weights.expand(self.T, self.K) # weights (T,K) weighted_vel_noise = weights * self.noise[:, :, 0] # (T,K) * (T,K) = (T,K) weighted_delta_noise = weights * self.noise[:, :, 1] # (T,K) * (T,K) = (T,K) # sum the weighted noise over all trajectories for each time step vel_noise_sum = torch.sum(weighted_vel_noise, dim=1) # (T,) delta_noise_sum = torch.sum(weighted_delta_noise, dim=1) # (T,) # update central control through time for vel and delta separately # self.ctrl # (T,2) self.ctrl[:, 0] += vel_noise_sum # (T,) += (T,) self.ctrl[:, 1] += delta_noise_sum # (T,) += (T,) self.ctrl[:, 0].clamp_(self.MIN_VEL, self.MAX_VEL) self.ctrl[:, 1].clamp_(self.MIN_DEL, self.MAX_DEL) # Notes: TODO # MPPI can be assisted by carefully choosing lambda, and sigma # It is advisable to clamp the control values to be within the feasible range # of controls sent to the Vesc # Your code should account for theta being between -pi and pi. This is # important. # The more code that uses pytorch's cuda abilities, the better; every line in # python will slow down the control calculations. You should be able to keep a # reasonable amount of calculations done (T = 40, K = 2000) within the 100ms # between inferred-poses from the particle filter. print("MPPI: %4.5f ms" % ((time.time() - t0) * 1000.0)) return self.poses
def cost(self, pose, goal, ctrl, noise, t): # input shapes: (K,3), (3,), (2,), (K,2) # This cost function drives the behavior of the car. You want to specify a # cost function that penalizes behavior that is bad with high cost, and # encourages good behavior with low cost. # We have split up the cost function for you to a) get the car to the goal # b) avoid driving into walls and c) the MPPI control penalty to stay # smooth # You should feel free to explore other terms to get better or unique # behavior cur_pose = pose.clone() # TODO: threshold the cost to 0 we are close enough ## FIRST - get the pose cost dist_to_goal = cur_pose - goal # (K,3) euclidian_distance = dist_to_goal[:, :2].norm(p=2, dim=1).pow(2) # (K,) theta_distance = dist_to_goal[:, 2] Utils.clamp_angle_tensor_(theta_distance) theta_distance.abs_() mean_dist = torch.mean(euclidian_distance) if mean_dist < 0.4 and mean_dist > 0.2: theta_weight = 5.0 pose_cost = 1.0 / ( 5 * euclidian_distance) + theta_distance * theta_weight * ( 1.2**t) # (K,) elif mean_dist < 0.2: theta_weight = 4.8 eucl_weight = 1.0 pose_cost = euclidian_distance * eucl_weight + theta_distance * theta_weight / ( t + 1) # (K,) else: theta_weight = 2.8 eucl_weight = 1.0 pose_cost = euclidian_distance * eucl_weight + theta_distance * theta_weight / ( t + 1) # (K,) ## SECOND - get the control cost, taking the abs ctrl_cost = self._lambda * ctrl * self.inv_sigma * noise # (K,2) ctrl_cost = ctrl_cost.abs().sum(dim=1) # (K,) ## THIRD - do a bounds check Utils.world_to_map_torch(cur_pose, self.map_info, self.map_angle, self.map_c, self.map_s) # cur pose is now in pixels xs = cur_pose[:, 0] ys = cur_pose[:, 1] bad_ks = (xs < 0) bad_ks |= (xs >= self.map_width) # byte tensors, NOT indices bad_ks |= (ys < 0) bad_ks |= (ys >= self.map_height) # byte tensors, NOT indices xs *= (~bad_ks).float( ) # (K,) modified bad coords to be in range, so we can index the map ys *= (~bad_ks).float( ) # (K,) modified bad coords to be in range, so we can index the map map_values = self.permissible_region[ ys.long(), xs.long()] # (K,) with map value 0 or 1 bad_ks |= map_values BOUNDS_COST = 10000 bounds_check = bad_ks.type(self.dtype) * BOUNDS_COST return pose_cost * 2.5 + ctrl_cost * 2.3 + bounds_check