def get_future_egomotion(self, rec, index): rec_t0 = rec # Identity future_egomotion = np.eye(4, dtype=np.float32) if index < len(self.ixes) - 1: rec_t1 = self.ixes[index + 1] if rec_t0['scene_token'] == rec_t1['scene_token']: egopose_t0 = self.nusc.get( 'ego_pose', self.nusc.get( 'sample_data', rec_t0['data']['LIDAR_TOP'])['ego_pose_token']) egopose_t1 = self.nusc.get( 'ego_pose', self.nusc.get( 'sample_data', rec_t1['data']['LIDAR_TOP'])['ego_pose_token']) egopose_t0 = convert_egopose_to_matrix_numpy(egopose_t0) egopose_t1 = convert_egopose_to_matrix_numpy(egopose_t1) future_egomotion = invert_matrix_egopose_numpy(egopose_t1).dot( egopose_t0) future_egomotion[3, :3] = 0.0 future_egomotion[3, 3] = 1.0 future_egomotion = torch.Tensor(future_egomotion).float() # Convert to 6DoF vector future_egomotion = mat2pose_vec(future_egomotion) return future_egomotion.unsqueeze(0)
def __getitem__(self, index): """ Returns ------- data: dict with the following keys: image: torch.Tensor<float> (T, N, 3, H, W) normalised cameras images with T the sequence length, and N the number of cameras. intrinsics: torch.Tensor<float> (T, N, 3, 3) intrinsics containing resizing and cropping parameters. extrinsics: torch.Tensor<float> (T, N, 4, 4) 6 DoF pose from world coordinates to camera coordinates. segmentation: torch.Tensor<int64> (T, 1, H_bev, W_bev) (H_bev, W_bev) are the pixel dimensions in bird's-eye view. instance: torch.Tensor<int64> (T, 1, H_bev, W_bev) centerness: torch.Tensor<float> (T, 1, H_bev, W_bev) offset: torch.Tensor<float> (T, 2, H_bev, W_bev) flow: torch.Tensor<float> (T, 2, H_bev, W_bev) future_egomotion: torch.Tensor<float> (T, 6) 6 DoF egomotion t -> t+1 sample_token: List<str> (T,) 'z_position': list_z_position, 'attribute': list_attribute_label, """ data = {} keys = [ 'image', 'intrinsics', 'extrinsics', 'segmentation', 'instance', 'centerness', 'offset', 'flow', 'future_egomotion', 'sample_token', 'z_position', 'attribute' ] for key in keys: data[key] = [] instance_map = {} # Loop over all the frames in the sequence. for index_t in self.indices[index]: rec = self.ixes[index_t] images, intrinsics, extrinsics = self.get_input_data(rec) segmentation, instance, z_position, instance_map, attribute_label = self.get_label( rec, instance_map) future_egomotion = self.get_future_egomotion(rec, index_t) data['image'].append(images) data['intrinsics'].append(intrinsics) data['extrinsics'].append(extrinsics) data['segmentation'].append(segmentation) data['instance'].append(instance) data['future_egomotion'].append(future_egomotion) data['sample_token'].append(rec['token']) data['z_position'].append(z_position) data['attribute'].append(attribute_label) for key, value in data.items(): if key in ['sample_token', 'centerness', 'offset', 'flow']: continue data[key] = torch.cat(value, dim=0) # If lyft need to subsample, and update future_egomotions if self.cfg.MODEL.SUBSAMPLE: for key, value in data.items(): if key in [ 'future_egomotion', 'sample_token', 'centerness', 'offset', 'flow' ]: continue data[key] = data[key][::2].clone() data['sample_token'] = data['sample_token'][::2] # Update future egomotions future_egomotions_matrix = pose_vec2mat(data['future_egomotion']) future_egomotion_accum = torch.zeros_like(future_egomotions_matrix) future_egomotion_accum[: -1] = future_egomotions_matrix[: -1] @ future_egomotions_matrix[ 1:] future_egomotion_accum = mat2pose_vec(future_egomotion_accum) data['future_egomotion'] = future_egomotion_accum[::2].clone() instance_centerness, instance_offset, instance_flow = convert_instance_mask_to_center_and_offset_label( data['instance'], data['future_egomotion'], num_instances=len(instance_map), ignore_index=self.cfg.DATASET.IGNORE_INDEX, subtract_egomotion=True, spatial_extent=self.spatial_extent, ) data['centerness'] = instance_centerness data['offset'] = instance_offset data['flow'] = instance_flow return data
def convert_instance_mask_to_center_and_offset_label(instance_img, future_egomotion, num_instances, ignore_index=255, subtract_egomotion=True, sigma=3, spatial_extent=None): seq_len, h, w = instance_img.shape center_label = torch.zeros(seq_len, 1, h, w) offset_label = ignore_index * torch.ones(seq_len, 2, h, w) future_displacement_label = ignore_index * torch.ones(seq_len, 2, h, w) # x is vertical displacement, y is horizontal displacement x, y = torch.meshgrid(torch.arange(h, dtype=torch.float), torch.arange(w, dtype=torch.float)) if subtract_egomotion: future_egomotion_inv = mat2pose_vec(pose_vec2mat(future_egomotion).inverse()) # Compute warped instance segmentation warped_instance_seg = {} for t in range(1, seq_len): warped_inst_t = warp_features(instance_img[t].unsqueeze(0).unsqueeze(1).float(), future_egomotion_inv[t - 1].unsqueeze(0), mode='nearest', spatial_extent=spatial_extent) warped_instance_seg[t] = warped_inst_t[0, 0] # Ignore id 0 which is the background for instance_id in range(1, num_instances+1): prev_xc = None prev_yc = None prev_mask = None for t in range(seq_len): instance_mask = (instance_img[t] == instance_id) if instance_mask.sum() == 0: # this instance is not in this frame prev_xc = None prev_yc = None prev_mask = None continue xc = x[instance_mask].mean().round().long() yc = y[instance_mask].mean().round().long() off_x = xc - x off_y = yc - y g = torch.exp(-(off_x ** 2 + off_y ** 2) / sigma ** 2) center_label[t, 0] = torch.maximum(center_label[t, 0], g) offset_label[t, 0, instance_mask] = off_x[instance_mask] offset_label[t, 1, instance_mask] = off_y[instance_mask] if prev_xc is not None: # old method # cur_pt = torch.stack((xc, yc)).unsqueeze(0).float() # if subtract_egomotion: # cur_pt = warp_points(cur_pt, future_egomotion_inv[t - 1]) # cur_pt = cur_pt.squeeze(0) warped_instance_mask = warped_instance_seg[t] == instance_id if warped_instance_mask.sum() > 0: warped_xc = x[warped_instance_mask].mean().round() warped_yc = y[warped_instance_mask].mean().round() delta_x = warped_xc - prev_xc delta_y = warped_yc - prev_yc future_displacement_label[t - 1, 0, prev_mask] = delta_x future_displacement_label[t - 1, 1, prev_mask] = delta_y prev_xc = xc prev_yc = yc prev_mask = instance_mask return center_label, offset_label, future_displacement_label