def _kinematics_from_tokens(helper: PredictHelper, instance: str, sample: str) -> KinematicsData: """ Returns the 2D position, velocity and acceleration vectors from the given track records, along with the speed, yaw rate, (scalar) acceleration (magnitude), and heading. :param helper: Instance of PredictHelper. :instance: Token of instance. :sample: Token of sample. :return: KinematicsData. """ annotation = helper.get_sample_annotation(instance, sample) x, y, _ = annotation['translation'] yaw = quaternion_yaw(Quaternion(annotation['rotation'])) velocity = helper.get_velocity_for_agent(instance, sample) acceleration = helper.get_acceleration_for_agent(instance, sample) yaw_rate = helper.get_heading_change_rate_for_agent(instance, sample) if np.isnan(velocity): velocity = 0.0 if np.isnan(acceleration): acceleration = 0.0 if np.isnan(yaw_rate): yaw_rate = 0.0 hx, hy = np.cos(yaw), np.sin(yaw) vx, vy = velocity * hx, velocity * hy ax, ay = acceleration * hx, acceleration * hy return x, y, vx, vy, ax, ay, velocity, yaw_rate, acceleration, yaw
def populate_agent_states(center_agent_annotation: Dict[str, Any], center_agent_pixels: Tuple[float, float], annotations: List[Dict[str, Any]], base_image: np.ndarray, helper: PredictHelper, resolution: float = 0.1) -> None: """ Adds agent states to 4 channel base_image :param center_agent_annotation: Annotation record for the agent that is in the center of the image. :param center_agent_pixels: Pixel location of the agent in the center of the image. :param annotations: Annotation records for other agents :param base_image: 4 channel image to populate with agent states. :param helper: Predict helper :param resolution: Size of the image in pixels / meter. :return: None. """ agent_x, agent_y = center_agent_annotation['translation'][:2] for i, annotation in enumerate(annotations): if annotation['instance_token'] != center_agent_annotation[ 'instance_token']: location = annotation['translation'][:2] row_pixel, column_pixel = convert_to_pixel_coords( location, (agent_x, agent_y), center_agent_pixels, resolution) if 0 <= row_pixel < base_image.shape[ 0] and 0 <= column_pixel < base_image.shape[1]: v = helper.get_velocity_for_agent(annotation['instance_token'], annotation['sample_token']) a = helper.get_acceleration_for_agent( annotation['instance_token'], annotation['sample_token']) omega = helper.get_heading_change_rate_for_agent( annotation['instance_token'], annotation['sample_token']) if base_image[row_pixel, column_pixel, 0] == 0: base_image[row_pixel, column_pixel, 0] = 1 if not np.isnan(v): base_image[row_pixel, column_pixel, 1] = v if not np.isnan(a): base_image[row_pixel, column_pixel, 2] = a if not np.isnan(omega): base_image[row_pixel, column_pixel, 3] = omega else: base_image[row_pixel, column_pixel, 0] += 1 if not np.isnan(v): base_image[row_pixel, column_pixel, 1] = min( v, base_image[row_pixel, column_pixel, 1]) if not np.isnan(a): base_image[row_pixel, column_pixel, 2] = min( a, base_image[row_pixel, column_pixel, 2]) if not np.isnan(omega): base_image[row_pixel, column_pixel, 3] = min( omega, base_image[row_pixel, column_pixel, 3])
def test_acceleration_nan_not_enough_data(self): mock_samples = [{ 'token': '1', 'timestamp': 0 }, { 'token': '2', 'timestamp': 0.5e6 }, { 'token': '3', 'timestamp': 1e6 }] nusc = MockNuScenes(self.mock_annotations, mock_samples) helper = PredictHelper(nusc) self.assertTrue(np.isnan(helper.get_acceleration_for_agent('1', '2')))
def test_acceleration_zero(self): mock_samples = [{ 'token': '1', 'timestamp': 0 }, { 'token': '2', 'timestamp': 0.5e6 }, { 'token': '3', 'timestamp': 1e6 }] nusc = MockNuScenes(self.mock_annotations, mock_samples) helper = PredictHelper(nusc) self.assertEqual(helper.get_acceleration_for_agent('1', '3'), 0)
def test_acceleration_nonzero(self): mock_samples = [{ 'token': '1', 'timestamp': 0 }, { 'token': '2', 'timestamp': 0.5e6 }, { 'token': '3', 'timestamp': 1e6 }] mock_annotations = copy.copy(self.mock_annotations) mock_annotations[2]['translation'] = [3, 3, 3] nusc = MockNuScenes(mock_annotations, mock_samples) helper = PredictHelper(nusc) self.assertAlmostEqual(helper.get_acceleration_for_agent('1', '3'), 2 * (np.sqrt(32) - np.sqrt(8)))
class NuScenesFormatTransformer: def __init__(self, DATAROOT='./data/sets/nuscenes', dataset_version='v1.0-mini'): self.DATAROOT = DATAROOT self.dataset_version = dataset_version self.nuscenes = NuScenes(dataset_version, dataroot=self.DATAROOT) self.helper = PredictHelper(self.nuscenes) def get_format_mha_jam(self, samples_agents, out_file="./transformer_format.txt"): instance_token_to_id_dict = {} sample_token_to_id_dict = {} scene_token_dict = {} sample_id = 0 instance_id = 0 for current_sample in tqdm(samples_agents): instance_token, sample_token = current_sample.split("_") scene_token = self.nuscenes.get('sample', sample_token)["scene_token"] if scene_token in scene_token_dict: continue # get the first sample in this sequence scene_token_dict[scene_token] = True first_sample_token = self.nuscenes.get( "scene", scene_token)["first_sample_token"] current_sample = self.nuscenes.get('sample', first_sample_token) while True: if current_sample['token'] not in sample_token_to_id_dict: sample_token_to_id_dict[ current_sample['token']] = sample_id sample_token_to_id_dict[sample_id] = current_sample[ 'token'] sample_id += 1 else: print("should not happen?") instances_in_sample = self.helper.get_annotations_for_sample( current_sample['token']) for sample_instance in instances_in_sample: if sample_instance[ 'instance_token'] not in instance_token_to_id_dict: instance_token_to_id_dict[ sample_instance['instance_token']] = instance_id instance_token_to_id_dict[ instance_id] = sample_instance['instance_token'] instance_id += 1 if current_sample['next'] == "": break current_sample = self.nuscenes.get('sample', current_sample['next']) mode = "train" if out_file.find("_train") != -1 else "val" mini = "mini" if out_file.find("mini") != -1 else "main" with open( "dicts_sample_and_instances_id2token_" + mode + "_" + mini + ".json", 'w') as fw: json.dump([instance_token_to_id_dict, sample_token_to_id_dict], fw) ############# # Converting to the transformer network format # frame_id, agent_id, pos_x, pos_y # todo: # loop on all the agents, if agent not taken: # 1- add it to takens agents (do not retake the agent) # 2- get the number of appearance of this agent # 3- skip this agent if the number is less than 10s (4 + 6) # 4- get the middle agent's token # 5- get the past and future agent's locations relative to its location samples_new_format = [] taken_instances = {} ds_size = 0 # max_past_traj_len = -1 for current_sample in samples_agents: instance_token, sample_token = current_sample.split("_") instance_id = instance_token_to_id_dict[instance_token] if instance_id in taken_instances: continue taken_instances[instance_id] = True # trajectory_full_instances = self.get_trajectory_around_sample(instance_token, sample_token, # just_xy=False) # ////////////////////// future_samples = self.helper.get_future_for_agent( instance_token, sample_token, 6, True, False) past_samples = self.helper.get_past_for_agent( instance_token, sample_token, 1000, True, False)[:MAX_TRAJ_LEN - 1][::-1] current_sample = self.helper.get_sample_annotation( instance_token, sample_token) assert len(past_samples) >= 1 assert len(future_samples) == 12 # assert len(past_samples) < 7 # if len(past_samples) > max_past_traj_len: # max_past_traj_len = len(past_samples) # past_samples = np.append(past_samples, [current_sample], axis=0) ds_size += 1 # get_trajectory at this position center_pos = len(past_samples) future_samples_local = self.helper.get_future_for_agent( instance_token, sample_token, 6, True, True) past_samples_local = self.helper.get_past_for_agent( instance_token, sample_token, 1000, True, True)[:MAX_TRAJ_LEN - 1][::-1] # current_sample = self.helper.get_sample_annotation(instance_token, sample_token) assert len(future_samples_local) == 12 # if len(past_samples) > 7: # past_samples = past_samples[len(past_samples)-7:] # past_samples_local = past_samples_local[past_samples_local.shape[0]-7:] trajectory = np.append(past_samples_local, np.append([[0, 0]], future_samples_local, axis=0), axis=0) past_samples = [ sample_token_to_id_dict[p['sample_token']] for p in past_samples ] future_samples = [ sample_token_to_id_dict[p['sample_token']] for p in future_samples ] trajectory_tokens = np.append( past_samples, np.append([sample_token_to_id_dict[sample_token]], future_samples, axis=0), axis=0) trajectory_ = np.zeros((trajectory.shape[0], 6)) trajectory_[:, 0] = trajectory_tokens[:] trajectory_[:, 1:3] = trajectory trajectory = trajectory_ len_future_samples = len(future_samples) del trajectory_, trajectory_tokens, past_samples, future_samples, past_samples_local, future_samples_local curr_sample = self.helper.get_past_for_agent( instance_token, sample_token, 1000, False, False)[:MAX_TRAJ_LEN][-1] for i in range(trajectory.shape[0]): # instance_id, sample_id, x, y, velocity, acc, yaw velocity = self.helper.get_velocity_for_agent( instance_token, curr_sample["sample_token"]) acceleration = self.helper.get_acceleration_for_agent( instance_token, curr_sample["sample_token"]) heading_change_rate = self.helper.get_heading_change_rate_for_agent( instance_token, curr_sample["sample_token"]) if math.isnan(velocity): velocity = 0 if math.isnan(acceleration): acceleration = 0 if math.isnan(heading_change_rate): heading_change_rate = 0 # need to check paper for relative velocity? same for acc and yaw trajectory[i][3:] = [ velocity, acceleration, heading_change_rate ] # if curr_sample['next'] == '': # import pdb # pdb.set_trace() # No need to get next sample token in case this is last element in the series # prevents bug if i < trajectory.shape[0] - 1: next_sample_token = self.nuscenes.get( 'sample_annotation', curr_sample['next'])['sample_token'] curr_sample = self.helper.get_sample_annotation( instance_token, next_sample_token) s = str(instance_id) + "," # assert (MAX_TRAJ_LEN+len_future_samples) >= trajectory.shape[0] repeat = (MAX_TRAJ_LEN + len_future_samples) - trajectory.shape[0] leading_arr = np.array( repeat * [-1, -64, -64, -64, -64, -64]).reshape((repeat, 6)) trajectory = np.append(leading_arr, trajectory, axis=0) # print("Built In!") # self.nuim.render_trajectory(sample_token, rotation_yaw=0, center_key_pose=True) # print("Bassel's!") # visualize_traffic(trajectory[(trajectory != [-1, -64, -64, -64, -64, -64]).all(axis=1), 1:3].copy()) for i in range(trajectory.shape[0]): sample_id, x, y, velocity, acceleration, heading_change_rate = trajectory[ i] s += str(sample_id) + "," + str(x) + "," + str(y) + "," + str(velocity) + "," \ + str(acceleration) + "," + str(heading_change_rate) if i != trajectory.shape[0] - 1: s += "," else: s += "\n" samples_new_format.append(s) # print("max past trajectory len:",max_past_traj_len) # samples_new_format.sort(key=lambda x: int(x.split(",")[0])) with open(out_file, 'w') as fw: fw.writelines(samples_new_format) print(out_file + "size " + str(ds_size)) def get_format_mha_jam_context(self, states_filepath, out_file): with open(states_filepath) as fr: agents_states = fr.readlines() # format # agent_id, 20x(frame_id, x, y, v, a, yaw_rate)] agents_states = [[float(x.rstrip()) for x in s.split(',')] for s in agents_states] mode = "train" if out_file.find("_train") != -1 else "val" mini = "mini" if out_file.find("mini") != -1 else "main" with open("dicts_sample_and_instances_id2token_" + mode + "_" + mini + ".json") as fr: instance_dict_id_token, sample_dict_id_token = json.load(fr) # Get Context for each sample in states context = [] agent_ind = 0 for agent in tqdm(agents_states): instance_token = instance_dict_id_token[str(int(agent[0]))] mid_frame_id = int(agent[1 + 6 * (MAX_TRAJ_LEN - 1)]) sample_token = sample_dict_id_token[str(mid_frame_id)] frame_annotations = self.helper.get_annotations_for_sample( sample_token) surroundings_agents_coords = [] surroundings_agents_instance_token = [] for ann in frame_annotations: if ann['category_name'].find("vehicle") == -1: continue if ann['instance_token'] == instance_token: agent_ann = ann else: surroundings_agents_coords.append(ann["translation"][:2]) surroundings_agents_instance_token.append( ann["instance_token"]) if len(surroundings_agents_coords) != 0: surroundings_agents_coords = convert_global_coords_to_local( surroundings_agents_coords, agent_ann["translation"], agent_ann["rotation"]) # for i in range(len(surroundings_agents_coords)): # if surroundings_agents_coords[i][0] < -25 or surroundings_agents_coords[i][0] > 25 \ # or surroundings_agents_coords[i][1] < -10 or surroundings_agents_coords[i][1] > 40: # surroundings_agents_coords[i] = None # surroundings_agents_instance_token[i] = None total_area_side = 50 cell_size = 1.5625 map_side_size = int(total_area_side // cell_size) map = [[[-64, -64, -64, -64, -64] for i in range(MAX_TRAJ_LEN)] for j in range(map_side_size * map_side_size)] for n in range(len(surroundings_agents_coords)): # if np.isnan(surroundings_agents_coords[n][0]): # ---> surroundings_agents_coords[n] is None # continue # search for the agent location in the map # agent_found = False # for i in range(map_side_size): # for j in range(map_side_size): # # if agent found in the cell # if surroundings_agents_coords[n][0] >= (j * cell_size) - 25\ # and surroundings_agents_coords[n][0] < (j * cell_size) - 25 + cell_size \ # and surroundings_agents_coords[n][1] < 40 - (i * cell_size) \ # and surroundings_agents_coords[n][1] > 40 - (i * cell_size + cell_size): # found_i, found_j = i, j # break # get the agent location in the map! alpha_y = (surroundings_agents_coords[n][1] - (-10)) / (40 - (-10)) i = (map_side_size - 1) - int(alpha_y * map_side_size + 0) alpha_x = (surroundings_agents_coords[n][0] - (-25)) / (25 - (-25)) j = int(alpha_x * map_side_size + 0) # Confirmation the 2 methods yield the same results # if not(found_i == i and found_j == j): # raise Exception("Calculations error") # prevent out of bound cases (which shall never happen if none is set for out of bound (line 240) if not (i >= 0 and i < map_side_size and j >= 0 and j < map_side_size): # raise Exception("Calculations error") continue pos = i * map_side_size + j past_trajectory = self.get_current_past_trajectory( surroundings_agents_instance_token[n], sample_token, num_seconds=1000)[:MAX_TRAJ_LEN] assert len(past_trajectory) <= MAX_TRAJ_LEN retrieved_trajectory_len = len(past_trajectory) if map[pos][-1][0] != -64: skip_traj = False # Save the trajectory with greater length for ind, map_pos in enumerate(map[pos]): if map_pos[0] != 64: if MAX_TRAJ_LEN - ind > retrieved_trajectory_len: skip_traj = True if skip_traj: agent_found = True break else: # print("new longer agent trajectory in cell") pass past_trajectory = convert_global_coords_to_local( past_trajectory, agent_ann["translation"], agent_ann["rotation"]) if retrieved_trajectory_len != MAX_TRAJ_LEN: past_trajectory = np.concatenate([ np.array([[-64, -64] for _ in range(MAX_TRAJ_LEN - past_trajectory.shape[0])]), past_trajectory ], axis=0) neighbour_agent_features = [] skip_traj = False for k in range(0, MAX_TRAJ_LEN): if retrieved_trajectory_len > k: if k == 0: sample_token_i = sample_dict_id_token[str( mid_frame_id)] else: sample_token_i = self.helper.get_sample_annotation( surroundings_agents_instance_token[n], sample_token_i)["prev"] sample_token_i = self.nuscenes.get( 'sample_annotation', sample_token_i)['sample_token'] try: velocity = self.helper.get_velocity_for_agent( surroundings_agents_instance_token[n], sample_token_i) except: skip_traj = True # print("error") break acceleration = self.helper.get_acceleration_for_agent( surroundings_agents_instance_token[n], sample_token_i) heading_change_rate = self.helper.get_heading_change_rate_for_agent( surroundings_agents_instance_token[n], sample_token_i) if math.isnan(velocity): velocity = 0 if math.isnan(acceleration): acceleration = 0 if math.isnan(heading_change_rate): heading_change_rate = 0 neighbour_agent_features.append( [velocity, acceleration, heading_change_rate]) else: neighbour_agent_features.append([-64, -64, -64]) if skip_traj: print("skipping agent because it has missing data") agent_found = True break past_trajectory = np.concatenate( [past_trajectory, neighbour_agent_features], axis=1) map[pos] = past_trajectory.tolist() # agent_found = True # break # if agent_found: # break map = np.array(map).astype(np.float16) if VISUALIZE_DATA: visualize_traffic_neighbours(map, map_side_size * map_side_size) # context.append(map) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) np.save(out_file.replace("_.txt", "__" + str(agent_ind) + ".txt"), map) agent_ind += 1 # with open(out_file, 'ab') as fw: # pickle.dump(map, fw) # continue # fw.write(map) def get_current_past_trajectory(self, instance_token, sample_token, num_seconds, just_xy=True, in_agent_frame=False): past_samples = self.helper.get_past_for_agent( instance_token, sample_token, num_seconds, in_agent_frame, just_xy)[::-1] #[0:7][::-1] current_sample = self.helper.get_sample_annotation( instance_token, sample_token) if just_xy: current_sample = current_sample["translation"][:2] if past_samples.shape[0] == 0: trajectory = np.array([current_sample]) else: trajectory = np.append(past_samples, [current_sample], axis=0) else: trajectory = np.append(past_samples, [current_sample], axis=0) return trajectory def get_format_mha_jam_maps(self, states_filepath, out_file): with open(states_filepath) as fr: agents_states = fr.readlines() # format # agen t_id, 20x(frame_id, x, y, v, a, yaw_rate)] agents_states = [[float(x.rstrip()) for x in s.split(',')] for s in agents_states] mode = "train" if out_file.find("_train") != -1 else "val" mini = "mini" if out_file.find("mini") != -1 else "main" with open("dicts_sample_and_instances_id2token_" + mode + "_" + mini + ".json") as fr: instance_dict_id_token, sample_dict_id_token = json.load(fr) # Get map for each sample in states agent_ind = 0 static_layer_rasterizer = StaticLayerRasterizer(self.helper) agent_rasterizer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=1) mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer()) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) for agent in tqdm(agents_states): instance_token = instance_dict_id_token[str(int(agent[0]))] mid_frame_id = int(agent[1 + 6 * (MAX_TRAJ_LEN)]) sample_token = sample_dict_id_token[str(mid_frame_id)] img = mtp_input_representation.make_input_representation( instance_token, sample_token) # img = cv2.resize(img, (1024, 1024)) cv2.imwrite( out_file.replace("_.jpg", "__" + str(agent_ind) + ".jpg"), img) agent_ind += 1 def run(self, out_dir): if self.dataset_version.find("mini") != -1: train_agents = get_prediction_challenge_split( "mini_train", dataroot=self.DATAROOT) val_agents = get_prediction_challenge_split("mini_val", dataroot=self.DATAROOT) else: train_agents = get_prediction_challenge_split( "train", dataroot=self.DATAROOT) train_agents.extend( get_prediction_challenge_split("train_val", dataroot=self.DATAROOT)) val_agents = get_prediction_challenge_split("val", dataroot=self.DATAROOT) ## Statistics # mx =-1 # for in train_agents: # instance_token, sample_token = current_sample.split("_") # past_samples_local = self.helper.get_past_for_agent(instance_token, sample_token, 100, True, True)[::-1] # if len(past_samples_local) > mx: # mx = len(past_samples_local) # print("max length of the past sequences for trainval is:",mx) # for instance_token, sample_token in train_agents: # past_samples_local = self.helper.get_past_for_agent(instance_token, sample_token, 100, True, True)[::-1] # if len(past_samples_local) > mx: # mx = len(past_samples_local) # print("max length of the past sequence for val is:",mx) # return self.get_format_mha_jam( train_agents, os.path.join(out_dir, "states_train_" + self.dataset_version + ".txt")) self.get_format_mha_jam_context( os.path.join(out_dir, "states_train_" + self.dataset_version + ".txt"), os.path.join(out_dir, "context_train_" + self.dataset_version, "context_train_.txt")) self.get_format_mha_jam_maps( os.path.join(out_dir, "states_train_" + self.dataset_version + ".txt"), os.path.join(out_dir, "maps_train_" + self.dataset_version, "maps_train_.jpg")) # 25 self.get_format_mha_jam( val_agents, os.path.join(out_dir, "states_val_" + self.dataset_version + ".txt")) self.get_format_mha_jam_context( os.path.join(out_dir, "states_val_" + self.dataset_version + ".txt"), os.path.join(out_dir, "context_val_" + self.dataset_version, "context_val_.txt")) self.get_format_mha_jam_maps( os.path.join(out_dir, "states_val_" + self.dataset_version + ".txt"), os.path.join(out_dir, "maps_val_" + self.dataset_version, "maps_val_.jpg"))
class NS(Dataset): def __init__(self, dataroot: str, split: str, t_h: float = 2, t_f: float = 6, grid_dim: int = 25, img_size: int = 200, horizon: int = 40, grid_extent: Tuple[int, int, int, int] = (-25, 25, -10, 40), num_actions: int = 4, image_extraction_mode: bool = False): """ Initializes dataset class for nuScenes prediction :param dataroot: Path to tables and data :param split: Dataset split for prediction benchmark ('train'/'train_val'/'val') :param t_h: Track history in seconds :param t_f: Prediction horizon in seconds :param grid_dim: Size of grid, default: 25x25 :param img_size: Size of raster map image in pixels, default: 200x200 :param horizon: MDP horizon :param grid_extent: Map extents in meters, (-left, right, -behind, front) :param num_actions: Number of actions for each state (4: [D,R,U,L] or 8: [D, R, U, L, DR, UR, DL, UL]) :param image_extraction_mode: Whether dataset class is being used for image extraction """ # Nuscenes dataset and predict helper self.dataroot = dataroot self.ns = NuScenes('v1.0-trainval', dataroot=dataroot) self.helper = PredictHelper(self.ns) self.token_list = get_prediction_challenge_split(split, dataroot=dataroot) # Useful parameters self.grid_dim = grid_dim self.grid_extent = grid_extent self.img_size = img_size self.t_f = t_f self.t_h = t_h self.horizon = horizon self.num_actions = num_actions # Map row, column and velocity states to actual values grid_size_m = self.grid_extent[1] - self.grid_extent[0] self.row_centers = np.linspace( self.grid_extent[3] - grid_size_m / (self.grid_dim * 2), self.grid_extent[2] + grid_size_m / (self.grid_dim * 2), self.grid_dim) self.col_centers = np.linspace( self.grid_extent[0] + grid_size_m / (self.grid_dim * 2), self.grid_extent[1] - grid_size_m / (self.grid_dim * 2), self.grid_dim) # Surrounding agent input representation: populate grid with velocity, acc, yaw-rate self.agent_ip = AgentMotionStatesOnGrid(self.helper, resolution=grid_size_m / img_size, meters_ahead=grid_extent[3], meters_behind=-grid_extent[2], meters_left=-grid_extent[0], meters_right=grid_extent[1]) # Image extraction mode is used for extracting map images offline prior to training self.image_extraction_mode = image_extraction_mode if self.image_extraction_mode: # Raster map representation self.map_ip = StaticLayerRasterizer(self.helper, resolution=grid_size_m / img_size, meters_ahead=grid_extent[3], meters_behind=-grid_extent[2], meters_left=-grid_extent[0], meters_right=grid_extent[1]) # Raster map with agent boxes. Only used for visualization static_layer_rasterizer = StaticLayerRasterizer( self.helper, resolution=grid_size_m / img_size, meters_ahead=grid_extent[3], meters_behind=-grid_extent[2], meters_left=-grid_extent[0], meters_right=grid_extent[1]) agent_rasterizer = AgentBoxesWithFadedHistory( self.helper, seconds_of_history=1, resolution=grid_size_m / img_size, meters_ahead=grid_extent[3], meters_behind=-grid_extent[2], meters_left=-grid_extent[0], meters_right=grid_extent[1]) self.map_ip_agents = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer()) def __len__(self): return len(self.token_list) def __getitem__(self, idx): """ Returns inputs, ground truth values and other utilities for data point at given index :return hist: snippet of track history, default 2s at 0.5 Hz sampling frequency :return fut: ground truth future trajectory, default 6s at 0.5 Hz sampling frequency :return img: Imagenet normalized bird's eye view map around the target vehicle :return svf_e: Goal and path state visitation frequencies for expert demonstration, ie. path from train set :return motion_feats: motion and position features used for reward model :return waypts_e: (x,y) BEV co-ordinates corresponding to grid cells of svf_e :return agents: tensor of surrounding agent states populated in grid around target agent :return grid_idcs: grid co-ordinates of svf_e :return bc_targets: ground truth actions for training behavior cloning model :return img_agents: image with agent boxes for visualization / debugging :return instance_token: nuScenes instance token for prediction instance :return sample_token: nuScenes sample token for prediction instance :return idx: instance id (mainly for debugging) """ # Nuscenes instance and sample token for prediction data point instance_token, sample_token = self.token_list[idx].split("_") # If dataset is being used for image extraction grid_size_m = self.grid_extent[1] - self.grid_extent[0] if self.image_extraction_mode: # Make directory to store raster map images img_dir = os.path.join( self.dataroot, 'prediction_raster_maps', 'images' + str(self.img_size) + "_" + str(int(grid_size_m)) + 'm') if not os.path.isdir(img_dir): os.mkdir(img_dir) # Generate and save raster map image with just static elements img = self.map_ip.make_representation(instance_token, sample_token) img_save = Image.fromarray(img) img_save.save( os.path.join(img_dir, instance_token + "_" + sample_token + '.png')) # Generate and save raster map image with static elements and agent boxes (for visualization only) img_agents = self.map_ip_agents.make_input_representation( instance_token, sample_token) img_agents_save = Image.fromarray(img_agents) img_agents_save.save( os.path.join( img_dir, instance_token + "_" + sample_token + 'agents.png')) # Return dummy values return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 # If dataset is being used for training/validation/testing else: # Get track history for agent: hist = self.get_hist(instance_token, sample_token) hist = torch.from_numpy(hist) # Get ground truth future for agent: fut = self.helper.get_future_for_agent(instance_token, sample_token, seconds=self.t_f, in_agent_frame=True) fut = torch.from_numpy(fut) # Get indefinite future for computing expert State visitation frequencies (SVF): fut_indefinite = self.helper.get_future_for_agent( instance_token, sample_token, seconds=300, in_agent_frame=True) # Up sample indefinite future by a factor of 10 fut_interpolated = np.zeros((fut_indefinite.shape[0] * 10 + 1, 2)) param_query = np.linspace(0, fut_indefinite.shape[0], fut_indefinite.shape[0] * 10 + 1) param_given = np.linspace(0, fut_indefinite.shape[0], fut_indefinite.shape[0] + 1) val_given_x = np.concatenate(([0], fut_indefinite[:, 0])) val_given_y = np.concatenate(([0], fut_indefinite[:, 1])) fut_interpolated[:, 0] = np.interp(param_query, param_given, val_given_x) fut_interpolated[:, 1] = np.interp(param_query, param_given, val_given_y) # Read pre-extracted raster map image img_dir = os.path.join( self.dataroot, 'prediction_raster_maps', 'images' + str(self.img_size) + "_" + str(int(grid_size_m)) + 'm') img = cv2.imread( os.path.join(img_dir, instance_token + "_" + sample_token + '.png')) # Pre-process image img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = torch.from_numpy(img) img = img.permute((2, 0, 1)).float() / 255 # Normalize using Imagenet stats img = normalize_imagenet(img) # Read pre-extracted raster map with agent boxes (for visualization + debugging) img_agents = cv2.imread( os.path.join( img_dir, instance_token + "_" + sample_token + 'agents.png')) # Pre-process image img_agents = cv2.cvtColor(img_agents, cv2.COLOR_BGR2RGB) img_agents = torch.from_numpy(img_agents) img_agents = img_agents.permute((2, 0, 1)).float() / 255 # Get surrounding agent states agents = torch.from_numpy( self.agent_ip.make_representation(instance_token, sample_token)) agents = agents.permute((2, 0, 1)).float() # Sum pool states to down-sample to grid dimensions agents = f.avg_pool2d(agents[None, :, :, :], self.img_size // self.grid_dim) agents = agents.squeeze(dim=0) * ( (self.img_size // self.grid_dim)**2) # Get expert SVF: svf_e, waypts_e, grid_idcs = self.get_expert_waypoints( fut_interpolated) svf_e = torch.from_numpy(svf_e) waypts_e = torch.from_numpy(waypts_e) grid_idcs = torch.from_numpy(grid_idcs) # Get motion and position feats: motion_feats = self.get_motion_feats(instance_token, sample_token) motion_feats = torch.from_numpy(motion_feats) # Targets for behavior cloning model: bc_targets = self.get_bc_targets(fut_interpolated) bc_targets = torch.from_numpy(bc_targets) return hist, fut, img, svf_e, motion_feats, waypts_e, agents, grid_idcs, bc_targets, img_agents, \ instance_token, sample_token, idx def get_hist(self, instance_token: str, sample_token: str): """ Function to get track history of agent :param instance_token: nuScenes instance token for datapoint :param sample_token nuScenes sample token for datapoint """ # x, y co-ordinates in agent's frame of reference xy = self.helper.get_past_for_agent(instance_token, sample_token, seconds=self.t_h, in_agent_frame=True) # Get all history records for obtaining velocity, acceleration and turn rate values hist_records = self.helper.get_past_for_agent(instance_token, sample_token, seconds=self.t_h, in_agent_frame=True, just_xy=False) if xy.shape[0] > self.t_h * 2: xy = xy[0:int(self.t_h) * 2] if len(hist_records) > self.t_h * 2: hist_records = hist_records[0:int(self.t_h) * 2] # Initialize hist tensor and set x and y co-ordinates returned by prediction helper hist = np.zeros((xy.shape[0], 5)) hist[:, 0:2] = xy # Instance and sample tokens from history records i_tokens = [ hist_records[i]['instance_token'] for i in range(len(hist_records)) ] i_tokens.insert(0, instance_token) s_tokens = [ hist_records[i]['sample_token'] for i in range(len(hist_records)) ] s_tokens.insert(0, sample_token) # Set velocity, acc and turn rate values for hist for k in range(hist.shape[0]): i_t = i_tokens[k] s_t = s_tokens[k] v = self.helper.get_velocity_for_agent(i_t, s_t) a = self.helper.get_acceleration_for_agent(i_t, s_t) theta = self.helper.get_heading_change_rate_for_agent(i_t, s_t) # If function returns nan values due to short tracks, set corresponding value to 0 if np.isnan(v): v = 0 if np.isnan(a): a = 0 if np.isnan(theta): theta = 0 hist[k, 2] = v hist[k, 3] = a hist[k, 4] = theta # Zero pad for track histories shorter than t_h hist_zeropadded = np.zeros((int(self.t_h) * 2, 5)) # Flip to have correct order of timestamps hist = np.flip(hist, 0) hist_zeropadded[-hist.shape[0]:] = hist return hist_zeropadded def get_expert_waypoints(self, fut: np.ndarray): """ Function to get the expert's state visitation frequencies based on their trajectory :param fut: numpy array with future trajectory of for all available future timestamps, up-sampled by 10 """ # Expert state visitation frequencies for training reward model, waypoints in meters and grid indices svf_e = np.zeros((2, self.grid_dim, self.grid_dim)) waypts_e = np.zeros((self.horizon, 2)) grid_idcs = np.zeros((self.horizon, 2)) count = 0 row_prev = np.nan column_prev = np.nan for k in range(fut.shape[0]): # Convert trajectory (x,y) co-ordinates to grid locations: column = np.argmin(np.absolute(fut[k, 0] - self.col_centers)) row = np.argmin(np.absolute(fut[k, 1] - self.row_centers)) # Demonstration ends when expert leaves the image crop corresponding to the grid: if self.grid_extent[0] <= fut[k, 0] <= self.grid_extent[1] and \ self.grid_extent[2] <= fut[k, 1] <= self.grid_extent[3]: # Check if cell location has changed if row != row_prev or column != column_prev: # Add cell location to path states of expert svf_e[0, row.astype(int), column.astype(int)] = 1 if count < self.horizon: # Get BEV coordinates corresponding to cell locations waypts_e[count, 0] = self.row_centers[row] waypts_e[count, 1] = self.col_centers[column] grid_idcs[count, 0] = row grid_idcs[count, 1] = column count += 1 else: break column_prev = column row_prev = row # Last cell location where demonstration terminates is the goal state: svf_e[1, row_prev.astype(int), column_prev.astype(int)] = 1 return svf_e, waypts_e, grid_idcs def get_motion_feats(self, instance_token: str, sample_token: str): """ Function to get motion and position features over grid for reward model :param instance_token: NuScenes instance token for datapoint :param sample_token: NuScenes sample token for datapoint """ feats = np.zeros((3, self.grid_dim, self.grid_dim)) # X and Y co-ordinates over grid grid_size_m = self.grid_extent[1] - self.grid_extent[0] y = (np.linspace( self.grid_extent[3] - grid_size_m / (self.grid_dim * 2), self.grid_extent[2] + grid_size_m / (self.grid_dim * 2), self.grid_dim)).reshape(-1, 1).repeat(self.grid_dim, axis=1) x = (np.linspace( self.grid_extent[0] + grid_size_m / (self.grid_dim * 2), self.grid_extent[1] - grid_size_m / (self.grid_dim * 2), self.grid_dim)).reshape(-1, 1).repeat(self.grid_dim, axis=1).transpose() # Velocity of agent v = self.helper.get_velocity_for_agent(instance_token, sample_token) if np.isnan(v): v = 0 # Normalize X and Y co-ordinates over grid feats[0] = v feats[1] = x / grid_size_m feats[2] = y / grid_size_m return feats def get_bc_targets(self, fut: np.ndarray): """ Function to get targets for behavior cloning model :param fut: numpy array with future trajectory of for all available future timestamps, up-sampled by 10 """ bc_targets = np.zeros( (self.num_actions + 1, self.grid_dim, self.grid_dim)) column_prev = np.argmin(np.absolute(fut[0, 0] - self.col_centers)) row_prev = np.argmin(np.absolute(fut[0, 1] - self.row_centers)) for k in range(fut.shape[0]): # Convert trajectory (x,y) co-ordinates to grid locations: column = np.argmin(np.absolute(fut[k, 0] - self.col_centers)) row = np.argmin(np.absolute(fut[k, 1] - self.row_centers)) # Demonstration ends when expert leaves the image crop corresponding to the grid: if self.grid_extent[0] <= fut[k, 0] <= self.grid_extent[1] and self.grid_extent[2] <= fut[k, 1] <= \ self.grid_extent[3]: # Check if cell location has changed if row != row_prev or column != column_prev: bc_targets[:, int(row_prev), int(column_prev)] = 0 d_x = column - column_prev d_y = row - row_prev theta = np.arctan2(d_y, d_x) # Assign ground truth actions for expert demonstration if self.num_actions == 4: # [D,R,U,L,end] if np.pi / 4 <= theta < 3 * np.pi / 4: bc_targets[0, int(row_prev), int(column_prev)] = 1 elif -np.pi / 4 <= theta < np.pi / 4: bc_targets[1, int(row_prev), int(column_prev)] = 1 elif -3 * np.pi / 4 <= theta < -np.pi / 4: bc_targets[2, int(row_prev), int(column_prev)] = 1 else: bc_targets[3, int(row_prev), int(column_prev)] = 1 else: # [D, R, U, L, DR, UR, DL, UL, end] if 3 * np.pi / 8 <= theta < 5 * np.pi / 8: bc_targets[0, int(row_prev), int(column_prev)] = 1 elif -np.pi / 8 <= theta < np.pi / 8: bc_targets[1, int(row_prev), int(column_prev)] = 1 elif -5 * np.pi / 8 <= theta < -3 * np.pi / 8: bc_targets[2, int(row_prev), int(column_prev)] = 1 elif np.pi / 8 <= theta < 3 * np.pi / 8: bc_targets[4, int(row_prev), int(column_prev)] = 1 elif -3 * np.pi / 8 <= theta < -np.pi / 8: bc_targets[5, int(row_prev), int(column_prev)] = 1 elif 5 * np.pi / 8 <= theta < 7 * np.pi / 8: bc_targets[6, int(row_prev), int(column_prev)] = 1 elif -7 * np.pi / 8 <= theta < -5 * np.pi / 8: bc_targets[7, int(row_prev), int(column_prev)] = 1 else: bc_targets[3, int(row_prev), int(column_prev)] = 1 else: break column_prev = column row_prev = row # Final action is the end action to transition to the goal state: bc_targets[self.num_actions, int(row_prev), int(column_prev)] = 1 return bc_targets
agent_rasterizer, Rasterizer()) if os.path.isfile(os.path.join(DATAPATH, 'fn_list.txt')): os.remove(os.path.join(DATAPATH, 'fn_list.txt')) fn_list = open(os.path.join(DATAPATH, 'fn_list.txt'), 'w') for inst_samp_pair in tqdm(inst_samp_pair_list): fn_list.write(inst_samp_pair + '\n') instance_token, sample_token = inst_samp_pair.split("_") img = mtp_input_representation.make_input_representation( instance_token, sample_token) agent_state_vector = torch.Tensor([ helper.get_velocity_for_agent(instance_token, sample_token), helper.get_acceleration_for_agent(instance_token, sample_token), helper.get_heading_change_rate_for_agent(instance_token, sample_token) ]) future_xy_local = torch.Tensor( helper.get_future_for_agent(instance_token, sample_token, seconds=FUTURE_SEC, in_agent_frame=True)) Image.fromarray(img).save( os.path.join(dpathlist_[0], inst_samp_pair + '.jpg')) torch.save(agent_state_vector, os.path.join(dpathlist_[1], inst_samp_pair + '.state')) torch.save(future_xy_local, os.path.join(dpathlist_[2], inst_samp_pair + '.traj'))
def load_a_scene(self, scene_num=0): """ For mini_train/mini_val: Train and val splits of the mini subset used for visualization and debugging (8/2 scenes). {instance_token}_{sample_token}: ['bc38961ca0ac4b14ab90e547ba79fbb6_39586f9d59004284a7114a68825e8eec', 'bc38961ca0ac4b14ab90e547ba79fbb6_f1e3d9d08f044c439ce86a2d6fcca57b'] :return a list of agent variables at each frame. say there are 30 frames, the list has a length of 30. In each element, the length is the number of agents in that frame. In each agent, the list is 7 (x, y, yaw, width, length(the larger one), v, a) """ try: nuscenes = NuScenes('v1.0-mini', dataroot=str(self.data_root)) except ImportError: print("NuScenes not found") my_scene = nuscenes.scene[scene_num] # get all keyframes in the scene helper = PredictHelper(nuscenes) frame_list = [] minimal_x = 999999999 minimal_y = 999999999 max_x = 0 max_y = 0 sample_tokens = nuscenes.field2token('sample', 'scene_token', my_scene["token"]) for sample_token in sample_tokens: # each frame agent_list = [] my_sample = nuscenes.get('sample', sample_token) annotation_tokens = my_sample['anns'] for i, annotation_token in enumerate(annotation_tokens): # each agent agent_parameter_list = [] annotation_metadata = nuscenes.get('sample_annotation', annotation_token) category_name = annotation_metadata['category_name'] if category_name.split('.')[0] == 'vehicle': # current version does not cover pedestrian yet agent_parameter_list += annotation_metadata[ 'translation'][:2] # load quaternion rotation to yaw towards world z coordinates yaw = Quaternion( annotation_metadata['rotation']).yaw_pitch_roll[0] agent_parameter_list.append( normalize_angle(yaw + math.pi / 2)) agent_parameter_list += annotation_metadata['size'][:2] # note velocity and acceleration can be None at frame 1 and 1+2 agent_parameter_list.append( helper.get_velocity_for_agent( annotation_metadata['instance_token'], annotation_metadata['sample_token'])) agent_parameter_list.append( helper.get_acceleration_for_agent( annotation_metadata['instance_token'], annotation_metadata['sample_token'])) agent_list.append(agent_parameter_list) # handle offset x, y, _ = annotation_metadata['translation'] if x < minimal_x: minimal_x = x if x > max_x: max_x = x if y < minimal_y: minimal_y = y if y > max_y: max_y = y frame_list.append(agent_list) self.offsets = -abs(max_x - minimal_x) / 2 - minimal_x, -abs( max_y - minimal_y) / 2 - minimal_y return frame_list