def generate_agent_sample_tl_history( state_index: int, frames: np.ndarray, agents: np.ndarray, tl_faces: np.ndarray, selected_track_id: Optional[int], render_context: RenderContext, history_num_frames: int, history_step_size: int, future_num_frames: int, future_step_size: int, filter_agents_threshold: float, rasterizer: Optional[Rasterizer] = None, perturbation: Optional[Perturbation] = None, ) -> dict: """Sampling function with rich traffic light history information. Generates the inputs and targets to train a deep prediction model. A deep prediction model takes as input the state of the world (here: an image we will call the "raster"), and outputs where that agent will be some seconds into the future. This function has a lot of arguments and is intended for internal use, you should try to use higher level classes and partials that use this function. Args: state_index (int): The anchor frame index, i.e. the "current" timestep in the scene frames (np.ndarray): The scene frames array, can be numpy array or a zarr array agents (np.ndarray): The full agents array, can be numpy array or a zarr array tl_faces (np.ndarray): The full traffic light faces array, can be numpy array or a zarr array selected_track_id (Optional[int]): Either None for AV, or the ID of an agent that you want to predict the future of. This agent is centered in the raster and the returned targets are derived from their future states. raster_size (Tuple[int, int]): Desired output raster dimensions pixel_size (np.ndarray): Size of one pixel in the real world ego_center (np.ndarray): Where in the raster to draw the ego, [0.5,0.5] would be the center history_num_frames (int): Amount of history frames to draw into the rasters history_step_size (int): Steps to take between frames, can be used to subsample history frames future_num_frames (int): Amount of history frames to draw into the rasters future_step_size (int): Steps to take between targets into the future filter_agents_threshold (float): Value between 0 and 1 to use as cutoff value for agent filtering based on their probability of being a relevant agent rasterizer (Optional[Rasterizer]): Rasterizer of some sort that draws a map image perturbation (Optional[Perturbation]): Object that perturbs the input and targets, used to train models that can recover from slight divergence from training set data Raises: ValueError: A ValueError is returned if the specified ``selected_track_id`` is not present in the scene or was filtered by applying the ``filter_agent_threshold`` probability filtering. Returns: dict: a dict object with the raster array, the future offset coordinates (meters), the future yaw angular offset, the future_availability as a binary mask """ # the history slice is ordered starting from the latest frame and goes backward in time., ex. slice(100, 91, -2) history_slice = get_history_slice(state_index, history_num_frames, history_step_size, include_current_state=True) future_slice = get_future_slice(state_index, future_num_frames, future_step_size) all_history_frames = frames[state_index::-1].copy() # current to past history_frames = frames[history_slice].copy( ) # copy() required if the object is a np.ndarray future_frames = frames[future_slice].copy() sorted_frames = np.concatenate( (history_frames[::-1], future_frames)) # from past to future # get agents (past and future) agent_slice = get_agents_slice_from_frames(sorted_frames[0], sorted_frames[-1]) agents = agents[agent_slice].copy( ) # this is the minimum slice of agents we need history_frames[ "agent_index_interval"] -= agent_slice.start # sync interval with the agents array future_frames[ "agent_index_interval"] -= agent_slice.start # sync interval with the agents array history_agents = filter_agents_by_frames(history_frames, agents) future_agents = filter_agents_by_frames(future_frames, agents) # tl_slice = get_tl_faces_slice_from_frames(history_frames[-1], history_frames[0]) # -1 is the farthest # # sync interval with the traffic light faces array # history_frames["traffic_light_faces_index_interval"] -= tl_slice.start # history_tl_faces = filter_tl_faces_by_frames(history_frames, tl_faces[tl_slice].copy()) all_tl_slice = get_tl_faces_slice_from_frames( all_history_frames[-1], all_history_frames[0]) # -1 is the farthest # sync interval with the traffic light faces array all_history_frames[ "traffic_light_faces_index_interval"] -= all_tl_slice.start all_history_tl_faces = filter_tl_faces_by_frames( all_history_frames, tl_faces[all_tl_slice].copy()) if perturbation is not None: history_frames, future_frames = perturbation.perturb( history_frames=history_frames, future_frames=future_frames) # State you want to predict the future of. cur_frame = history_frames[0] cur_agents = history_agents[0] if selected_track_id is None: agent_centroid_m = cur_frame["ego_translation"][:2] agent_yaw_rad = rotation33_as_yaw(cur_frame["ego_rotation"]) agent_extent_m = np.asarray( (EGO_EXTENT_LENGTH, EGO_EXTENT_WIDTH, EGO_EXTENT_HEIGHT)) selected_agent = None label_probabilities = torch.tensor([-1]) # Set -1 for Host car. else: # this will raise IndexError if the agent is not in the frame or under agent-threshold # this is a strict error, we cannot recover from this situation try: agent = filter_agents_by_track_id( filter_agents_by_labels(cur_agents, filter_agents_threshold), selected_track_id)[0] except IndexError: raise ValueError( f" track_id {selected_track_id} not in frame or below threshold" ) agent_centroid_m = agent["centroid"] agent_yaw_rad = float(agent["yaw"]) agent_extent_m = agent["extent"] selected_agent = agent label_probabilities = agent["label_probabilities"] input_im = (None if not rasterizer else rasterizer.rasterize( history_frames, history_agents, all_history_tl_faces, selected_agent)) world_from_agent = compute_agent_pose(agent_centroid_m, agent_yaw_rad) agent_from_world = np.linalg.inv(world_from_agent) raster_from_world = render_context.raster_from_world( agent_centroid_m, agent_yaw_rad) future_coords_offset, future_yaws_offset, future_availability = _create_targets_for_deep_prediction( future_num_frames, future_frames, selected_track_id, future_agents, agent_from_world, agent_yaw_rad) # history_num_frames + 1 because it also includes the current frame history_coords_offset, history_yaws_offset, history_availability = _create_targets_for_deep_prediction( history_num_frames + 1, history_frames, selected_track_id, history_agents, agent_from_world, agent_yaw_rad) return { "image": input_im, "target_positions": future_coords_offset, "target_yaws": future_yaws_offset, "target_availabilities": future_availability, "history_positions": history_coords_offset, "history_yaws": history_yaws_offset, "history_availabilities": history_availability, "world_to_image": raster_from_world, # TODO deprecate "raster_from_agent": raster_from_world @ world_from_agent, "raster_from_world": raster_from_world, "agent_from_world": agent_from_world, "world_from_agent": world_from_agent, "centroid": agent_centroid_m, "yaw": agent_yaw_rad, "extent": agent_extent_m, "label_probabilities": label_probabilities, }
def test_unroll(zarr_cat_dataset: ChunkedDataset, dmg: LocalDataManager, cfg: dict) -> None: rasterizer = build_rasterizer(cfg, dmg) # change the first yaw of scene 1 # this will break if some broadcasting happens zarr_cat_dataset.frames = np.asarray(zarr_cat_dataset.frames) slice_frames = get_frames_slice_from_scenes(zarr_cat_dataset.scenes[1]) rot = zarr_cat_dataset.frames[slice_frames.start]["ego_rotation"].copy() zarr_cat_dataset.frames[ slice_frames.start]["ego_rotation"] = yaw_as_rotation33( rotation33_as_yaw(rot + 0.75)) scene_indices = list(range(len(zarr_cat_dataset.scenes))) ego_dataset = EgoDataset(cfg, zarr_cat_dataset, rasterizer) # control only agents at T0, control them forever sim_cfg = SimulationConfig(use_ego_gt=False, use_agents_gt=False, disable_new_agents=True, distance_th_close=1000, distance_th_far=1000, num_simulation_steps=10) # ego will move by 1 each time ego_model = MockModel(advance_x=1.0) # agents will move by 0.5 each time agents_model = MockModel(advance_x=0.5) sim = ClosedLoopSimulator(sim_cfg, ego_dataset, torch.device("cpu"), ego_model, agents_model) sim_outputs = sim.unroll(scene_indices) # check ego movement for sim_output in sim_outputs: ego_tr = sim_output.simulated_ego[ "ego_translation"][:sim_cfg.num_simulation_steps, :2] ego_dist = np.linalg.norm(np.diff(ego_tr, axis=0), axis=-1) assert np.allclose(ego_dist, 1.0) ego_tr = sim_output.simulated_ego_states[:sim_cfg.num_simulation_steps, TrajectoryStateIndices. X:TrajectoryStateIndices.Y + 1] ego_dist = np.linalg.norm(np.diff(ego_tr.numpy(), axis=0), axis=-1) assert np.allclose(ego_dist, 1.0, atol=1e-3) # all rotations should be the same as the first one as the MockModel outputs 0 for that rots_sim = sim_output.simulated_ego[ "ego_rotation"][:sim_cfg.num_simulation_steps] r_rep = sim_output.recorded_ego["ego_rotation"][0] for r_sim in rots_sim: assert np.allclose(rotation33_as_yaw(r_sim), rotation33_as_yaw(r_rep), atol=1e-2) # all rotations should be the same as the first one as the MockModel outputs 0 for that rots_sim = sim_output.simulated_ego_states[:sim_cfg. num_simulation_steps, TrajectoryStateIndices. THETA] r_rep = sim_output.recorded_ego_states[0, TrajectoryStateIndices.THETA] for r_sim in rots_sim: assert np.allclose(r_sim, r_rep, atol=1e-2) # check agents movements for sim_output in sim_outputs: # we need to know which agents were controlled during simulation # TODO: this is not ideal, we should keep track of them through the simulation sim_dataset = SimulationDataset.from_dataset_indices( ego_dataset, [sim_output.scene_id], sim_cfg) sim_dataset.rasterise_agents_frame_batch( 0) # this will fill agents_tracked agents_tracks = [el[1] for el in sim_dataset._agents_tracked] for track_id in agents_tracks: states = sim_output.simulated_agents agents = filter_agents_by_track_id( states, track_id)[:sim_cfg.num_simulation_steps] agent_dist = np.linalg.norm(np.diff(agents["centroid"], axis=0), axis=-1) assert np.allclose(agent_dist, 0.5)
def _create_targets_for_deep_prediction( num_frames: int, frames: np.ndarray, selected_track_id: Optional[int], agents: List[np.ndarray], agent_current_centroid: np.ndarray, agent_current_velocity: np.ndarray, agent_current_yaw: float, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ Internal function that creates the targets and availability masks for deep prediction-type models. The futures/history offset (in meters) are computed. When no info is available (e.g. agent not in frame) a 0 is set in the availability array (1 otherwise). Args: num_frames (int): number of offset we want in the future/history frames (np.ndarray): available frames. This may be less than num_frames selected_track_id (Optional[int]): agent track_id or AV (None) agents (List[np.ndarray]): list of agents arrays (same len of frames) agent_current_centroid (np.ndarray): centroid of the agent at timestep 0 agent_current_yaw (float): angle of the agent at timestep 0 Returns: Tuple[np.ndarray, np.ndarray, np.ndarray]: position offsets, angle offsets, availabilities """ #TODO: JERK # Inclusively count the number of available and unavailable frames of this target num_avail = 0 num_unavail = 0 last_avail_index = -1 last_unavail_index = -1 # How much the coordinates differ from the current state in meters. coords_offset = np.zeros((num_frames, 2), dtype=np.float32) vel_offset = np.zeros((num_frames, 2), dtype=np.float32) accel_offset = np.zeros((num_frames, 2), dtype=np.float32) jerk_offset = np.zeros((num_frames, 2), dtype=np.float32) yaws_offset = np.zeros((num_frames, 1), dtype=np.float32) availability = np.zeros((num_frames, ), dtype=np.float32) for i, (frame, agents) in enumerate(zip(frames, agents)): if selected_track_id is None: agent_centroid = frame["ego_translation"][:2] agent_yaw = rotation33_as_yaw(frame["ego_rotation"]) agent_velocity = np.array([0, 0], dtype=np.float32) num_avail = num_avail + 1 else: # it's not guaranteed the target will be in every frame try: agent = filter_agents_by_track_id(agents, selected_track_id)[0] num_avail = num_avail + 1 except IndexError: availability[i] = 0.0 # keep track of invalid futures/history last_unavail_index = i num_unavail = num_unavail + 1 continue agent_centroid = agent["centroid"] agent_velocity = agent["velocity"] agent_yaw = agent["yaw"] #TODO: change my kinetics to the format of # offset for time[i] - time[0] # So tidy up the velocity calculation process, and the acceleration stuff # time 0 state, and time > 0 state. and make the calculations with respect to time passed and the time 0 val # Also provide a means of getting change in time # Note: B is an initial case # State A: the boy scout case. All clean, no trouble # Two or more positions, previous frame is available # State B: One or none positions # State C: Two or more positions, previous frame is unavail coords_offset[i] = agent_centroid - agent_current_centroid # STATE B if num_avail == 1: # There is only the current position, no velocity is possible vel_offset[i] = agent_current_velocity accel_offset[i] = np.array([0, 0], dtype=np.float32) jerk_offset[i] = np.array([0, 0], dtype=np.float32) # there are two or more positions # There has been no unavailabilities yet elif last_unavail_index == -1: # STATE A # Only 1 frame difference in time dx = coords_offset[i] - coords_offset[i - 1] dt = 0.1 vel_offset[ i] = dx / dt # change in position / frame width (0.1 seconds) : [m/s] # accel_offset[i] = (agent_velocity ** 2 - vel_offset[i - 1] ** 2) / (2 * dx) accel_offset[i] = (vel_offset[i] - vel_offset[i - 1]) / dt jerk_offset[i] = (accel_offset[i] - accel_offset[i - 1]) / dt # Unavailability was previous frame elif last_unavail_index == i - 1: # Check number of positions # STATE C # There are positions before the unavailability, calculate the velocity # Get previous existing velocity prev_vel = vel_offset[last_avail_index] # Get previous existing position prev_pos = coords_offset[last_avail_index] # Current Position pos = coords_offset[i] # Delta X dx = pos - prev_pos # Delta Time...0.1 being the frame width dt = (i - last_avail_index) * 0.1 # Calculate velocity vel = (dx / dt) * -prev_vel vel_offset[i] = vel # Now calculate acceleration accel_offset[i] = (((dx - prev_vel * dt) * 2)**(1 / 2)) / dt # Now Jerk jerk_offset[i] = (accel_offset[i] - accel_offset[last_avail_index]) / dt else: # STATE A # Only 1 frame difference in time dx = coords_offset[i] - coords_offset[i - 1] dt = 0.1 vel_offset[ i] = dx / dt # change in position / frame width (0.1 seconds) : [m/s] accel_offset[i] = (agent_velocity**2 - vel_offset[i - 1]**2) / (2 * dx) jerk_offset[i] = (accel_offset[i] - accel_offset[i - 1]) / dt # else: # # # Attempt to get previous velocity # if i == 0: # # No previous velocity # prev_vel = np.array([0, 0], dtype=np.float32) # else: # # There must be a previous velocity # prev_vel = vel_offset[i - 1] # # # Calculate the acceleration # accel_offset[i] = (vel_offset[i] ** 2 - prev_vel ** 2) / (2 * coords_offset[i]) # else: # accel_offset[i] = (agent_velocity ** 2 - agent_current_velocity ** 2) / (2 * coords_offset[i]) yaws_offset[i] = agent_yaw - agent_current_yaw availability[i] = 1.0 # save this index as the last available index last_avail_index = i return coords_offset, vel_offset, accel_offset, jerk_offset, yaws_offset, availability
def rasterize( self, history_frames: np.ndarray, history_agents: List[np.ndarray], history_tl_faces: List[np.ndarray], agent: Optional[np.ndarray] = None, ) -> np.ndarray: # all frames are drawn relative to this one" frame = history_frames[0] if agent is None: ego_translation_m = history_frames[0]["ego_translation"] ego_yaw_rad = rotation33_as_yaw(frame["ego_rotation"]) else: ego_translation_m = np.append(agent["centroid"], history_frames[0]["ego_translation"][-1]) ego_yaw_rad = agent["yaw"] raster_from_world = self.render_context.raster_from_world(ego_translation_m, ego_yaw_rad) # this ensures we always end up with fixed size arrays, +1 is because current time is also in the history out_shape = (self.raster_size[1], self.raster_size[0], self.history_num_frames + 1) agents_unknown_images = np.zeros(out_shape, dtype=np.uint8) agents_car_images = np.zeros(out_shape, dtype=np.uint8) agents_cyclist_images = np.zeros(out_shape, dtype=np.uint8) agents_pedestrian_images = np.zeros(out_shape, dtype=np.uint8) ego_images = np.zeros(out_shape, dtype=np.uint8) if self.enable_selected_agent_channels: assert agent is not None selected_agent_images = np.zeros(out_shape, dtype=np.uint8) for i, (frame, agents) in enumerate(zip(history_frames, history_agents)): # TODO: filter_agents_threshold is not used. # Ignore filter_agents_threshold now threshold = 0.5 agents_unknown = agents[agents["label_probabilities"][:, UNKNOWN_LABEL_INDEX] > threshold] agents_car = agents[agents["label_probabilities"][:, CAR_LABEL_INDEX] > threshold] agents_cyclist = agents[agents["label_probabilities"][:, CYCLIST_LABEL_INDEX] > threshold] agents_pedestrian = agents[agents["label_probabilities"][:, PEDESTRIAN_LABEL_INDEX] > threshold] # agents = filter_agents_by_labels(agents, self.filter_agents_threshold) # note the cast is for legacy support of dataset before April 2020 agents_ego = get_ego_as_agent(frame).astype(agents.dtype) agents_unknown_image = draw_boxes(self.raster_size, raster_from_world, agents_unknown, 255) agents_car_image = draw_boxes(self.raster_size, raster_from_world, agents_car, 255) agents_cyclist_image = draw_boxes(self.raster_size, raster_from_world, agents_cyclist, 255) agents_pedestrian_image = draw_boxes(self.raster_size, raster_from_world, agents_pedestrian, 255) ego_image = draw_boxes(self.raster_size, raster_from_world, agents_ego, 255) agents_unknown_images[..., i] = agents_unknown_image agents_car_images[..., i] = agents_car_image agents_cyclist_images[..., i] = agents_cyclist_image agents_pedestrian_images[..., i] = agents_pedestrian_image ego_images[..., i] = ego_image if self.enable_selected_agent_channels: assert agent is not None selected_agent = filter_agents_by_track_id(agents, agent["track_id"]) if len(selected_agent) == 0: # agent not in this history frame selected_agent_image = np.zeros_like(ego_image) else: # add av to agents and remove the agent from agents selected_agent_image = draw_boxes(self.raster_size, raster_from_world, selected_agent, 255) selected_agent_images[..., i] = selected_agent_image images = [ agents_unknown_images, agents_car_images, agents_cyclist_images, agents_pedestrian_images, ego_images ] if self.enable_selected_agent_channels: images.append(selected_agent_images) out_im = np.concatenate(images, axis=-1) assert out_im.shape[-1] == self.raster_channels return out_im.astype(np.float32) / 255
def generate_kinetic_agent_sample( state_index: int, frames: np.ndarray, agents: np.ndarray, tl_faces: np.ndarray, selected_track_id: Optional[int], raster_size: Tuple[int, int], pixel_size: np.ndarray, ego_center: np.ndarray, history_num_frames: int, history_step_size: int, future_num_frames: int, future_step_size: int, filter_agents_threshold: float, rasterizer: Optional[Rasterizer] = None, perturbation: Optional[Perturbation] = None, ) -> dict: """Generates the inputs and targets to train a deep prediction model. A deep prediction model takes as input the state of the world (here: an image we will call the "raster"), and outputs where that agent will be some seconds into the future. This function has a lot of arguments and is intended for internal use, you should try to use higher level classes and partials that use this function. Args: state_index (int): The anchor frame index, i.e. the "current" timestep in the scene frames (np.ndarray): The scene frames array, can be numpy array or a zarr array agents (np.ndarray): The full agents array, can be numpy array or a zarr array tl_faces (np.ndarray): The full traffic light faces array, can be numpy array or a zarr array selected_track_id (Optional[int]): Either None for AV, or the ID of an agent that you want to predict the future of. This agent is centered in the raster and the returned targets are derived from their future states. raster_size (Tuple[int, int]): Desired output raster dimensions pixel_size (np.ndarray): Size of one pixel in the real world ego_center (np.ndarray): Where in the raster to draw the ego, [0.5,0.5] would be the center history_num_frames (int): Amount of history frames to draw into the rasters history_step_size (int): Steps to take between frames, can be used to subsample history frames future_num_frames (int): Amount of history frames to draw into the rasters future_step_size (int): Steps to take between targets into the future filter_agents_threshold (float): Value between 0 and 1 to use as cutoff value for agent filtering based on their probability of being a relevant agent rasterizer (Optional[Rasterizer]): Rasterizer of some sort that draws a map image perturbation (Optional[Perturbation]): Object that perturbs the input and targets, used to train models that can recover from slight divergence from training set data Raises: ValueError: A ValueError is returned if the specified ``selected_track_id`` is not present in the scene or was filtered by applying the ``filter_agent_threshold`` probability filtering. Returns: dict: a dict object with the raster array, the future offset coordinates (meters), the future yaw angular offset, the future_availability as a binary mask """ agent_velocity = None # the history slice is ordered starting from the latest frame and goes backward in time., ex. slice(100, 91, -2) history_slice = get_history_slice(state_index, history_num_frames, history_step_size, include_current_state=True) future_slice = get_future_slice(state_index, future_num_frames, future_step_size) history_frames = frames[history_slice].copy( ) # copy() required if the object is a np.ndarray future_frames = frames[future_slice].copy() sorted_frames = np.concatenate( (history_frames[::-1], future_frames)) # from past to future # get agents (past and future) agent_slice = get_agents_slice_from_frames(sorted_frames[0], sorted_frames[-1]) agents = agents[agent_slice].copy( ) # this is the minimum slice of agents we need history_frames[ "agent_index_interval"] -= agent_slice.start # sync interval with the agents array future_frames[ "agent_index_interval"] -= agent_slice.start # sync interval with the agents array history_agents = filter_agents_by_frames(history_frames, agents) future_agents = filter_agents_by_frames(future_frames, agents) try: tl_slice = get_tl_faces_slice_from_frames( history_frames[-1], history_frames[0]) # -1 is the farthest # sync interval with the traffic light faces array history_frames["traffic_light_faces_index_interval"] -= tl_slice.start history_tl_faces = filter_tl_faces_by_frames(history_frames, tl_faces[tl_slice].copy()) except ValueError: history_tl_faces = [ np.empty(0, dtype=TL_FACE_DTYPE) for _ in history_frames ] if perturbation is not None: history_frames, future_frames = perturbation.perturb( history_frames=history_frames, future_frames=future_frames) # State you want to predict the future of. cur_frame = history_frames[0] cur_agents = history_agents[0] if selected_track_id is None: agent_centroid = cur_frame["ego_translation"][:2] # FIXME # Assume velocity was 0, but it probably shouldnt be agent_velocity = np.array([0, 0], dtype=np.float32) agent_yaw = rotation33_as_yaw(cur_frame["ego_rotation"]) agent_extent = np.asarray( (EGO_EXTENT_LENGTH, EGO_EXTENT_WIDTH, EGO_EXTENT_HEIGHT)) selected_agent = None else: # this will raise IndexError if the agent is not in the frame or under agent-threshold # this is a strict error, we cannot recover from this situation try: agent = filter_agents_by_track_id( filter_agents_by_labels(cur_agents, filter_agents_threshold), selected_track_id)[0] except IndexError: raise ValueError( f" track_id {selected_track_id} not in frame or below threshold" ) agent_centroid = agent["centroid"] agent_velocity = agent["velocity"] agent_yaw = float(agent["yaw"]) agent_extent = agent["extent"] selected_agent = agent input_im = (None if not rasterizer else rasterizer.rasterize( history_frames, history_agents, history_tl_faces, selected_agent)) world_to_image_space = world_to_image_pixels_matrix( raster_size, pixel_size, ego_translation_m=agent_centroid, ego_yaw_rad=agent_yaw, ego_center_in_image_ratio=ego_center, ) future_coords_offset, future_vel_offset, future_accel_offset, _, future_yaws_offset, future_availability = _create_targets_for_deep_prediction( future_num_frames, future_frames, selected_track_id, future_agents, agent_centroid[:2], agent_velocity[:2], agent_yaw, ) # history_num_frames + 1 because it also includes the current frame history_coords_offset, history_vel_offset, history_accel_offset, history_jerk_offset, history_yaws_offset, \ history_availability = _create_targets_for_deep_prediction(history_num_frames + 1, history_frames, selected_track_id, history_agents, agent_centroid[:2], agent_velocity[:2], agent_yaw, ) # estimated_future_positions = history_jerk_offset estimated_future_positions = _extrapolate_targets( future_num_frames, history_num_frames, history_coords_offset, history_vel_offset, history_accel_offset, history_jerk_offset) error_arr = np.abs(estimated_future_positions - future_coords_offset[10:]) error = np.average(error_arr) return { "image": input_im, "target_positions": future_coords_offset, "target_velocities": future_vel_offset, "target_accelerations": future_accel_offset, "target_yaws": future_yaws_offset, "target_availabilities": future_availability, "history_positions": history_coords_offset, "history_velocities": history_vel_offset, "history_accelerations": history_accel_offset, "history_yaws": history_yaws_offset, "history_availabilities": history_availability, "estimated_future_positions": estimated_future_positions, "world_to_image": world_to_image_space, "centroid": agent_centroid, "yaw": agent_yaw, "extent": agent_extent, }
def generate_frame_sample_without_hist( state_index: int, frames: zarr.core.Array, tl_faces: zarr.core.Array, agents: zarr.core.Array, agents_from_standard_mask_only: bool = False, mask_agent_indices: zarr.core.Array = None, ) -> dict: frame = frames[state_index] if not agents_from_standard_mask_only: agent_slice = get_agents_slice_from_frames(frame) agents = agents[agent_slice].copy() else: masked_indices_slice = slice(*frame["mask_agent_index_interval"]) masked_agent_indices = [ el[0] for el in mask_agent_indices[masked_indices_slice] ] if masked_agent_indices: agents = agents.get_coordinate_selection( masked_agent_indices).copy() else: agents = [] ego_centroid = frame["ego_translation"][:2] # try to estimate ego velocity if state_index > 0: prev_frame_candidate = frames[state_index - 1] prev_ego_centroid = prev_frame_candidate["ego_translation"][:2] translation_m = np.hypot( prev_ego_centroid[0] - ego_centroid[0], prev_ego_centroid[1] - ego_centroid[1], ) if translation_m < 10: timestamp = datetime.fromtimestamp( frame["timestamp"] / 10**9).astimezone(timezone("US/Pacific")) timestamp_prev = datetime.fromtimestamp( prev_frame_candidate["timestamp"] / 10**9).astimezone( timezone("US/Pacific")) timediff_sec = (timestamp - timestamp_prev).total_seconds() if timestamp > timestamp_prev and timediff_sec < 0.2: ego_speed = (ego_centroid - prev_ego_centroid) / timediff_sec else: ego_speed = None else: ego_speed = None else: ego_speed = None try: tl_slice = get_tl_faces_slice_from_frames(frame) # -1 is the farthest frame["traffic_light_faces_index_interval"] -= tl_slice.start tl_faces_this = filter_tl_faces_by_frames([frame], tl_faces[tl_slice].copy())[0] tl_faces_this = filter_tl_faces_by_status(tl_faces_this, "ACTIVE") except ValueError: tl_faces_this = [] return { "ego_centroid": ego_centroid, "ego_speed": ego_speed, "ego_yaw": rotation33_as_yaw(frame["ego_rotation"]), "tl_faces": tl_faces_this, "agents": agents, }
def rasterize( self, history_frames: np.ndarray, history_agents: List[np.ndarray], history_tl_faces: List[np.ndarray], agent: Optional[np.ndarray] = None, ) -> np.ndarray: # all frames are drawn relative to this one" frame = history_frames[0] if agent is None: ego_translation_m = history_frames[0]["ego_translation"] ego_yaw_rad = rotation33_as_yaw(frame["ego_rotation"]) else: ego_translation_m = np.append( agent["centroid"], history_frames[0]["ego_translation"][-1]) ego_yaw_rad = agent["yaw"] raster_from_world = self.render_context.raster_from_world( ego_translation_m, ego_yaw_rad) # this ensures we always end up with fixed size arrays, +1 is because current time is also in the history out_shape = (self.raster_size[1], self.raster_size[0], self.history_num_frames + 1) agents_images = np.zeros(out_shape, dtype=np.uint8) ego_images = np.zeros(out_shape, dtype=np.uint8) # --- 1. prepare agent keep indices for random agent drop augmentation --- track_ids = np.concatenate([a["track_id"] for a in history_agents]) unique_track_ids = np.unique(track_ids).astype(np.int64) n_max_agents = int(np.max(unique_track_ids) + 1) # +1 for host car. unique_track_ids = np.concatenate([[0], unique_track_ids ]) # Add Host car, with id=0. n_unique_agents = len(unique_track_ids) # if not np.all(unique_track_ids == np.arange(np.max(unique_track_ids) + 1)): # # It occured!! --> unique_track_ids is not continuous. Some numbers are filtered out. # print("unique_track_ids", unique_track_ids, "is not continuous!!!") if not self.eval and np.random.uniform() < self.agent_drop_ratio: if self.agent_drop_prob < 0: # Randomly decide number of agents to drop. # 0 represents host car. n_keep_agents = np.random.randint(0, n_unique_agents) agent_keep_indices = np.random.choice(unique_track_ids, n_keep_agents, replace=False) else: # Decide agents to drop or not by agent_drop_prob. agent_keep_indices = unique_track_ids[np.random.uniform( 0.0, 1.0, (n_unique_agents, )) > self.agent_drop_prob] n_keep_agents = len(agent_keep_indices) # Must keep ego agent! if agent["track_id"] not in agent_keep_indices: agent_keep_indices = np.append(agent_keep_indices, agent["track_id"]) else: n_keep_agents = n_unique_agents # keep all agents agent_keep_indices = None # --- 2. prepare extent scale augmentation ratio ---- # TODO: create enough number of extent_ratio array. Actually n_keep_agents suffice but create n_max_agents # for simplicity.. if self.eval: # No augmentation. agents_extent_ratio = np.ones((n_max_agents, 3)) elif self.min_extent_ratio == self.max_extent_ratio: agents_extent_ratio = np.ones( (n_max_agents, 3)) * self.min_extent_ratio else: agents_extent_ratio = np.random.uniform(self.min_extent_ratio, self.max_extent_ratio, (n_max_agents, 3)) ego_extent_ratio = agents_extent_ratio[0] for i, (frame, agents_) in enumerate(zip(history_frames, history_agents)): agents = filter_agents_by_labels(agents_, self.filter_agents_threshold) if agent_keep_indices is not None: # --- 1. apply agent drop augmentation --- agents = agents[np.isin(agents["track_id"], agent_keep_indices)] # note the cast is for legacy support of dataset before April 2020 av_agent = get_ego_as_agent(frame).astype(agents.dtype) # 2. --- apply extent scale augmentation --- # TODO: Need to convert agents["track_id"] --> index based on `agent_keep_indices`, # if we only create `agents_extent_ratio` of size `n_keep_agents`. agents["extent"] *= agents_extent_ratio[agents["track_id"]] av_agent[0]["extent"] *= ego_extent_ratio if agent is None: agents_image = draw_boxes(self.raster_size, raster_from_world, agents, 255) ego_image = draw_boxes(self.raster_size, raster_from_world, av_agent, 255) else: agent_ego = filter_agents_by_track_id(agents, agent["track_id"]) if agent_keep_indices is None or 0 in agent_keep_indices: agents = np.append(agents, av_agent) if len(agent_ego) == 0: # agent not in this history frame agents_image = draw_boxes(self.raster_size, raster_from_world, agents, 255) ego_image = np.zeros_like(agents_image) else: # add av to agents and remove the agent from agents agents = agents[agents != agent_ego[0]] agents_image = draw_boxes(self.raster_size, raster_from_world, agents, 255) ego_image = draw_boxes(self.raster_size, raster_from_world, agent_ego, 255) agents_images[..., i] = agents_image ego_images[..., i] = ego_image # combine such that the image consists of [agent_t, agent_t-1, agent_t-2, ego_t, ego_t-1, ego_t-2] out_im = np.concatenate((agents_images, ego_images), -1) return out_im.astype(np.float32) / 255
def generate_multi_agent_sample( state_index: int, frames: np.ndarray, agents: np.ndarray, tl_faces: np.ndarray, selected_track_id: Optional[int], render_context: RenderContext, history_num_frames: int, history_step_size: int, future_num_frames: int, future_step_size: int, filter_agents_threshold: float, rasterizer: Optional[Rasterizer] = None, perturbation: Optional[Perturbation] = None, min_frame_history: int = MIN_FRAME_HISTORY, min_frame_future: int = MIN_FRAME_FUTURE, ) -> dict: """Generates the inputs and targets to train a deep prediction model. A deep prediction model takes as input the state of the world (here: an image we will call the "raster"), and outputs where that agent will be some seconds into the future. This function has a lot of arguments and is intended for internal use, you should try to use higher level classes and partials that use this function. Args: state_index (int): The anchor frame index, i.e. the "current" timestep in the scene frames (np.ndarray): The scene frames array, can be numpy array or a zarr array agents (np.ndarray): The full agents array, can be numpy array or a zarr array tl_faces (np.ndarray): The full traffic light faces array, can be numpy array or a zarr array selected_track_id (Optional[int]): Either None for AV, or the ID of an agent that you want to predict the future of. This agent is centered in the raster and the returned targets are derived from their future states. render_context (RenderContext): raster_size (Tuple[int, int]): Desired output raster dimensions pixel_size (np.ndarray): Size of one pixel in the real world ego_center (np.ndarray): Where in the raster to draw the ego, [0.5,0.5] would be the center history_num_frames (int): Amount of history frames to draw into the rasters history_step_size (int): Steps to take between frames, can be used to subsample history frames future_num_frames (int): Amount of history frames to draw into the rasters future_step_size (int): Steps to take between targets into the future filter_agents_threshold (float): Value between 0 and 1 to use as cutoff value for agent filtering based on their probability of being a relevant agent rasterizer (Optional[Rasterizer]): Rasterizer of some sort that draws a map image perturbation (Optional[Perturbation]): Object that perturbs the input and targets, used to train models that can recover from slight divergence from training set data Raises: ValueError: A ValueError is returned if the specified ``selected_track_id`` is not present in the scene or was filtered by applying the ``filter_agent_threshold`` probability filtering. Returns: dict: a dict object with the raster array, the future offset coordinates (meters), the future yaw angular offset, the future_availability as a binary mask """ # the history slice is ordered starting from the latest frame and goes backward in time., ex. slice(100, 91, -2) history_slice = get_history_slice(state_index, history_num_frames, history_step_size, include_current_state=True) future_slice = get_future_slice(state_index, future_num_frames, future_step_size) history_frames = frames[history_slice].copy( ) # copy() required if the object is a np.ndarray future_frames = frames[future_slice].copy() sorted_frames = np.concatenate( (history_frames[::-1], future_frames)) # from past to future # get agents (past and future) agent_slice = get_agents_slice_from_frames(sorted_frames[0], sorted_frames[-1]) agents = agents[agent_slice].copy( ) # this is the minimum slice of agents we need history_frames[ "agent_index_interval"] -= agent_slice.start # sync interval with the agents array future_frames[ "agent_index_interval"] -= agent_slice.start # sync interval with the agents array history_agents = filter_agents_by_frames(history_frames, agents) future_agents = filter_agents_by_frames(future_frames, agents) try: tl_slice = get_tl_faces_slice_from_frames( history_frames[-1], history_frames[0]) # -1 is the farthest # sync interval with the traffic light faces array history_frames["traffic_light_faces_index_interval"] -= tl_slice.start history_tl_faces = filter_tl_faces_by_frames(history_frames, tl_faces[tl_slice].copy()) except ValueError: history_tl_faces = [ np.empty(0, dtype=TL_FACE_DTYPE) for _ in history_frames ] if perturbation is not None: history_frames, future_frames = perturbation.perturb( history_frames=history_frames, future_frames=future_frames) # State you want to predict the future of. cur_frame = history_frames[0] cur_agents = history_agents[0] cur_agents = filter_agents_by_labels(cur_agents, filter_agents_threshold) agent_track_ids_u64 = cur_agents["track_id"] # uint64 --> int64 agent_track_ids = agent_track_ids_u64.astype(np.int64) assert np.alltrue(agent_track_ids == agent_track_ids_u64) agent_track_ids = np.concatenate( [np.array([-1], dtype=np.int64), agent_track_ids]) # Draw image with Ego car in center selected_agent = None input_im = (None if not rasterizer else rasterizer.rasterize( history_frames, history_agents, history_tl_faces, selected_agent)) future_coords_offset_list = [] future_yaws_offset_list = [] future_availability_list = [] history_coords_offset_list = [] history_yaws_offset_list = [] history_availability_list = [] agent_centroid_list = [] agent_yaw_list = [] agent_extent_list = [] filtered_track_ids_list = [] for selected_track_id in agent_track_ids: if selected_track_id == -1: agent_centroid = cur_frame["ego_translation"][:2] agent_yaw_rad = rotation33_as_yaw(cur_frame["ego_rotation"]) agent_extent = np.asarray( (EGO_EXTENT_LENGTH, EGO_EXTENT_WIDTH, EGO_EXTENT_HEIGHT)) world_from_agent = compute_agent_pose(agent_centroid, agent_yaw_rad) agent_from_world = np.linalg.inv(world_from_agent) raster_from_world = render_context.raster_from_world( agent_centroid, agent_yaw_rad) agent_origin = np.zeros((2, ), dtype=np.float32) else: # this will raise IndexError if the agent is not in the frame or under agent-threshold # this is a strict error, we cannot recover from this situation try: agent = filter_agents_by_track_id(cur_agents, selected_track_id)[0] except IndexError: raise ValueError( f" track_id {selected_track_id} not in frame or below threshold" ) agent_centroid = agent["centroid"] agent_yaw_rad = agent["yaw"] agent_extent = agent["extent"] agent_origin = transform_point(agent_centroid, agent_from_world) future_coords_offset, future_yaws_offset, future_availability = _create_targets_for_deep_prediction( future_num_frames, future_frames, selected_track_id, future_agents, agent_from_world, agent_yaw_rad, agent_origin) if selected_track_id != -1 and np.sum( future_availability) < min_frame_future: # Not enough future to predict, skip this agent. continue # history_num_frames + 1 because it also includes the current frame history_coords_offset, history_yaws_offset, history_availability = _create_targets_for_deep_prediction( history_num_frames + 1, history_frames, selected_track_id, history_agents, agent_from_world, agent_yaw_rad, agent_origin) if selected_track_id != -1 and np.sum( history_availability) < min_frame_history: # Not enough history to predict, skip this agent. continue future_coords_offset_list.append(future_coords_offset) future_yaws_offset_list.append(future_yaws_offset) future_availability_list.append(future_availability) history_coords_offset_list.append(history_coords_offset) history_yaws_offset_list.append(history_yaws_offset) history_availability_list.append(history_availability) agent_centroid_list.append(agent_centroid) agent_yaw_list.append(agent_yaw_rad) agent_extent_list.append(agent_extent) filtered_track_ids_list.append(selected_track_id) # Get pixel coordinate agent_centroid_array = np.array(agent_centroid_list) agent_centroid_in_pixel = transform_points(agent_centroid_array, raster_from_world) return { "image": input_im, # (h, w, ch) # --- All below is in world coordinate --- "target_positions": np.array(future_coords_offset_list), # (n_agents, num_frames, 2) "target_yaws": np.array(future_yaws_offset_list), # (n_agents, num_frames, 1) "target_availabilities": np.array(future_availability_list), # (n_agents, num_frames) "history_positions": np.array(history_coords_offset_list), # (n_agents, num_frames, 2) "history_yaws": np.array(history_yaws_offset_list), # (n_agents, num_frames, 1) "history_availabilities": np.array(history_availability_list), # (n_agents, num_frames) # "world_to_image": raster_from_world, # (3, 3) "raster_from_world": raster_from_world, # (3, 3) "centroid": agent_centroid_array, # (n_agents, 2) "yaw": np.array(agent_yaw_list), # (n_agents, 1) "extent": np.array(agent_extent_list), # (n_agents, 3) "track_ids": np.array(filtered_track_ids_list), # (n_agents) "centroid_pixel": agent_centroid_in_pixel, # (n_agents, 2) }
def rasterize( self, history_frames: np.ndarray, history_agents: List[np.ndarray], history_tl_faces: List[np.ndarray], agent: Optional[np.ndarray] = None, ) -> Dict[str, np.ndarray]: # all frames are drawn relative to this one" frame = history_frames[0] if agent is None: ego_translation_m = history_frames[0]["ego_translation"] ego_yaw_rad = rotation33_as_yaw(frame["ego_rotation"]) else: ego_translation_m = np.append(agent["centroid"], history_frames[0]["ego_translation"][-1]) ego_yaw_rad = agent["yaw"] raster_from_world = self.render_context.raster_from_world(ego_translation_m, ego_yaw_rad) # this ensures we always end up with fixed size arrays, +1 is because current time is also in the history out_shape = ( self.raster_size[1], self.raster_size[0], len(self.box_history_frames), ) agents_images = np.zeros(out_shape, dtype=np.uint8) ego_images = np.zeros(out_shape, dtype=np.uint8) # for i, (frame, agents) in enumerate(zip(history_frames, history_agents)): for dst_idx, i in enumerate(self.box_history_frames): if i >= history_frames.shape[0]: break frame = history_frames[i] agents = history_agents[i] agents = filter_agents_by_labels(agents, self.filter_agents_threshold) # note the cast is for legacy support of dataset before April 2020 av_agent = get_ego_as_agent(frame).astype(agents.dtype) if agent is None: agents_image = draw_boxes(self.raster_size, raster_from_world, agents, 255) ego_image = draw_boxes(self.raster_size, raster_from_world, av_agent, 255) else: agent_ego = filter_agents_by_track_id(agents, agent["track_id"]) if len(agent_ego) == 0: # agent not in this history frame agents_image = draw_boxes( self.raster_size, raster_from_world, np.append(agents, av_agent), 255, ) ego_image = np.zeros_like(agents_image) else: # add av to agents and remove the agent from agents agents = agents[agents != agent_ego[0]] agents_image = draw_boxes( self.raster_size, raster_from_world, np.append(agents, av_agent), 255, ) ego_image = draw_boxes(self.raster_size, raster_from_world, agent_ego, 255) agents_images[..., dst_idx] = agents_image ego_images[..., dst_idx] = ego_image # combine such that the image consists of [agent_t, agent_t-1, agent_t-2, ego_t, ego_t-1, ego_t-2] out_im = np.concatenate((agents_images, ego_images), -1) return {'image_box': out_im}