def crop_sample_supervision(sample, borders): """ Crops the output information of a sample (i.e. ground-truth supervision) Parameters ---------- sample : dict Dictionary with sample values (output from a dataset's __getitem__ method) borders : tuple Borders used for cropping Returns ------- sample : dict Cropped sample """ # Crop maps for key in filter_dict(sample, [ 'depth', 'bbox2d_depth', 'bbox3d_depth', 'semantic', 'bwd_optical_flow', 'fwd_optical_flow', 'valid_fwd_optical_flow', 'bwd_scene_flow', 'fwd_scene_flow', ]): sample[key] = crop_depth(sample[key], borders) # Crop context maps for key in filter_dict(sample, [ 'depth_context', 'semantic_context', 'bwd_optical_flow_context', 'fwd_optical_flow_context', 'bwd_scene_flow_context', 'fwd_scene_flow_context', ]): sample[key] = [crop_depth(k, borders) for k in sample[key]] # Return cropped sample return sample
def flip_batch_input(batch): """ Flip batch input information (copies data first) Parameters ---------- batch : dict Batch information Returns ------- batch : dict Flipped batch """ # Flip tensors for key in filter_dict(batch, [ 'rgb', 'rgb_context', 'input_depth', 'input_depth_context', ]): batch[key] = flip(batch[key], flip_lr) # Flip intrinsics for key in filter_dict(batch, ['intrinsics']): batch[key] = batch[key].clone() batch[key][:, 0, 2] = batch['rgb'].shape[3] - batch[key][:, 0, 2] # Return flipped batch return batch
def resize_sample(sample, shape, image_interpolation=Image.ANTIALIAS): """ Resizes a sample, including image, intrinsics and depth maps. Parameters ---------- sample : dict Dictionary with sample values shape : tuple (H,W) Output shape image_interpolation : int Interpolation mode Returns ------- sample : dict Resized sample """ # Resize image and intrinsics sample = resize_sample_image_and_intrinsics_multifocal( sample, shape, image_interpolation) # Resize depth maps for key in filter_dict(sample, [ 'depth', ]): sample[key] = resize_depth(sample[key], shape) # Resize depth contexts for key in filter_dict(sample, [ 'depth_temporal_context', 'depth_geometric_context', 'depth_temporal_context_geometric_context', ]): sample[key] = [resize_depth(k, shape) for k in sample[key]] # Return resized sample return sample
def duplicate_sample(sample): """ Duplicates sample images and contexts to preserve their unaugmented versions. Parameters ---------- sample : dict Input sample Returns ------- sample : dict Sample including [+"_original"] keys with copies of images and contexts. """ # Duplicate single items for key in filter_dict(sample, ['rgb']): sample['{}_original'.format(key)] = sample[key].copy() # Duplicate lists for key in filter_dict(sample, [ 'rgb_temporal_context', 'rgb_geometric_context', 'rgb_geometric_context_temporal_context', ]): sample['{}_original'.format(key)] = [k.copy() for k in sample[key]] # Return duplicated sample return sample
def colorjitter_sample(sample, parameters, prob=1.0): """ Jitters input images as data augmentation. Parameters ---------- sample : dict Input sample parameters : tuple (brightness, contrast, saturation, hue) Color jittering parameters prob : float Jittering probability Returns ------- sample : dict Jittered sample """ if random.random() < prob: # Prepare transformation color_augmentation = transforms.ColorJitter() brightness, contrast, saturation, hue = parameters augment_image = color_augmentation.get_params( brightness=[max(0, 1 - brightness), 1 + brightness], contrast=[max(0, 1 - contrast), 1 + contrast], saturation=[max(0, 1 - saturation), 1 + saturation], hue=[-hue, hue]) # Jitter single items for key in filter_dict(sample, ['rgb']): sample[key] = augment_image(sample[key]) # Jitter lists for key in filter_dict(sample, ['rgb_context']): sample[key] = [augment_image(k) for k in sample[key]] # Return jittered (?) sample return sample
def upsample_output(output, mode='nearest', align_corners=None): """ Upsample multi-scale outputs to full resolution. Parameters ---------- output : dict Dictionary of model outputs (e.g. with keys like 'inv_depths' and 'uncertainty') mode : str Which interpolation mode is used align_corners: bool or None Whether corners will be aligned during interpolation Returns ------- output : dict Upsampled output """ for key in filter_dict(output, ['inv_depths', 'uncertainty']): output[key] = interpolate_scales(output[key], mode=mode, align_corners=align_corners) for key in filter_dict(output, ['inv_depths_context']): output[key] = [ interpolate_scales(val, mode=mode, align_corners=align_corners) for val in output[key] ] return output
def to_tensor_sample(sample, tensor_type='torch.FloatTensor'): """ Casts the keys of sample to tensors. Parameters ---------- sample : dict Input sample tensor_type : str Type of tensor we are casting to Returns ------- sample : dict Sample with keys cast as tensors """ transform = transforms.ToTensor() # Convert single items for key in filter_dict(sample, [ 'rgb', 'rgb_original', 'depth', ]): sample[key] = transform(sample[key]).type(tensor_type) # Convert lists for key in filter_dict( sample, ['rgb_context', 'rgb_context_original', 'depth_context']): sample[key] = [transform(k).type(tensor_type) for k in sample[key]] # Return converted sample return sample
def resize_sample_image_and_intrinsics(sample, shape, image_interpolation=Image.ANTIALIAS): """ Resizes the image and intrinsics of a sample Parameters ---------- sample : dict Dictionary with sample values shape : tuple (H,W) Output shape image_interpolation : int Interpolation mode Returns ------- sample : dict Resized sample """ # Resize image and corresponding intrinsics image_transform = transforms.Resize(shape, interpolation=image_interpolation) (orig_w, orig_h) = sample['rgb'].size (out_h, out_w) = shape # Scale intrinsics for key in filter_dict(sample, [ 'intrinsics', 'intrinsics_context', 'intrinsics_left', 'intrinsics_context_left', 'intrinsics_right', 'intrinsics_context_right', ]): intrinsics = np.copy(sample[key]) if len(intrinsics.shape) == 2: # single intrinsics intrinsics[0] *= out_w / orig_w intrinsics[1] *= out_h / orig_h else: for c in range(intrinsics.shape[0]): intrinsics[c, 0] *= out_w / orig_w intrinsics[c, 1] *= out_h / orig_h sample[key] = intrinsics # Scale images for key in filter_dict(sample, [ 'rgb', 'rgb_original', 'rgb_left', 'rgb_left_original', 'rgb_right', 'rgb_right_original' ]): sample[key] = image_transform(sample[key]) # Scale context images for key in filter_dict(sample, [ 'rgb_context', 'rgb_context_original', ]): sample[key] = [image_transform(k) for k in sample[key]] # Return resized sample return sample
def depth_net_flipping(self, batch, flip): """ Runs depth net with the option of flipping Parameters ---------- batch : dict Input batch flip : bool True if the flip is happening Returns ------- output : dict Dictionary with depth network output (e.g. 'inv_depths' and 'uncertainty') """ # Which keys are being passed to the depth network batch_input = { key: batch[key] for key in filter_dict(batch, self._input_keys) } if flip: # Run depth network with flipped inputs output = self.depth_net(**flip_batch_input(batch_input)) # Flip output back if training output = flip_output(output) else: # Run depth network output = self.depth_net(**batch_input) return output
def flip_output(output): """ Flip output information Parameters ---------- output : dict Dictionary of model outputs (e.g. with keys like 'inv_depths' and 'uncertainty') Returns ------- output : dict Flipped output """ # Flip tensors for key in filter_dict(output, [ 'uncertainty', 'logits_semantic', 'ord_probability', 'inv_depths', 'inv_depths_context', 'inv_depths1', 'inv_depths2', 'pred_depth', 'pred_depth_context', 'pred_depth1', 'pred_depth2', 'pred_inv_depth', 'pred_inv_depth_context', 'pred_inv_depth1', 'pred_inv_depth2', ]): output[key] = flip(output[key], flip_lr) return output
def crop_sample_input(sample, borders): """ Crops the input information of a sample (i.e. that go to the networks) Parameters ---------- sample : dict Dictionary with sample values (output from a dataset's __getitem__ method) borders : tuple Borders used for cropping (left, top, right, bottom) Returns ------- sample : dict Cropped sample """ # Crop intrinsics for key in filter_dict(sample, [ 'intrinsics' ]): # Create copy of full intrinsics if key + '_full' not in sample.keys(): sample[key + '_full'] = np.copy(sample[key]) sample[key] = crop_intrinsics(sample[key], borders) # Crop images for key in filter_dict(sample, [ 'rgb', 'rgb_original', 'warped_rgb', ]): sample[key] = crop_image(sample[key], borders) # Crop context images for key in filter_dict(sample, [ 'rgb_context', 'rgb_context_original', ]): sample[key] = [crop_image(val, borders) for val in sample[key]] # Crop input depth maps for key in filter_dict(sample, [ 'input_depth', 'bbox2d_depth', 'bbox3d_depth' ]): sample[key] = crop_depth(sample[key], borders) # Crop context input depth maps for key in filter_dict(sample, [ 'input_depth_context', ]): sample[key] = [crop_depth(val, borders) for val in sample[key]] # Return cropped sample return sample
def colorjitter_sample(sample, parameters, prob=1.0): """ Jitters input images as data augmentation. Parameters ---------- sample : dict Input sample parameters : tuple (brightness, contrast, saturation, hue, color) Color jittering parameters prob : float Jittering probability Returns ------- sample : dict Jittered sample """ if random.random() < prob: # Prepare jitter transformation color_jitter_transform = random_color_jitter_transform(parameters[:4]) # Prepare color transformation if requested if len(parameters) > 4 and parameters[4] > 0: matrix = (random.uniform(1. - parameters[4], 1 + parameters[4]), 0, 0, 0, 0, random.uniform(1. - parameters[4], 1 + parameters[4]), 0, 0, 0, 0, random.uniform(1. - parameters[4], 1 + parameters[4]), 0) else: matrix = None # Jitter single items for key in filter_dict(sample, [ 'rgb' ]): sample[key] = color_jitter_transform(sample[key]) if matrix is not None: # If applying color transformation sample[key] = sample[key].convert('RGB', matrix) # Jitter lists for key in filter_dict(sample, [ 'rgb_context' ]): sample[key] = [color_jitter_transform(k) for k in sample[key]] if matrix is not None: # If applying color transformation sample[key] = [k.convert('RGB', matrix) for k in sample[key]] # Return jittered (?) sample return sample
def rotate_sample(sample, degrees=20): """ Rotates input images as data augmentation. Assumes the intrinsics to be scaled w.r.t the image. i.e. this step should be followed by (image &intrinsics) resizing Parameters ---------- sample : dict Input sample degrees : float sample a random rotation angle between [-degrees,, degrees] Returns ------- sample : dict Rotated sample Author: Zeeshan Khan Suri """ if degrees == 0: return sample # Do same rotation transform for image and context imgs rand_degree = (torch.rand(1) - 0.5) * 2 * degrees # Get rotation center as principal point from intrinsic matrix center = sample["intrinsics"][:2, 2] # Jitter single items for key in filter_dict(sample, ['rgb', 'rgb_original', 'depth']): sample[key] = TF.rotate( sample[key], rand_degree.item(), resample=Image.NEAREST if key == 'depth' else Image.BILINEAR, center=center.tolist()) # Jitter lists for key in filter_dict( sample, ['rgb_context', 'rgb_context_original', 'depth_context']): sample[key] = [ TF.rotate(k, rand_degree.item(), resample=Image.NEAREST if key == 'depth_context' else Image.BILINEAR, center=center.tolist()) for k in sample[key] ] # Return rotated (?) sample return sample
def resize_sample_image_and_intrinsics(sample, shape, image_interpolation=Image.ANTIALIAS): """ Resizes the image and intrinsics of a sample Parameters ---------- sample : dict Dictionary with sample values shape : tuple (H,W) Output shape image_interpolation : int Interpolation mode Returns ------- sample : dict Resized sample """ # Resize image and corresponding intrinsics image_transform = transforms.Resize(shape, interpolation=image_interpolation) (orig_w, orig_h) = sample['rgb'].size (out_h, out_w) = shape # Scale intrinsics for key in filter_dict(sample, ['intrinsics']): sample[key] = scale_intrinsics(np.copy(sample[key]), out_w / orig_w, out_h / orig_h) # Scale images for key in filter_dict(sample, [ 'rgb', 'rgb_original', ]): sample[key] = image_transform(sample[key]) # Scale context images for key in filter_dict(sample, [ 'rgb_context', 'rgb_context_original', ]): sample[key] = [image_transform(k) for k in sample[key]] # Return resized sample return sample
def colorjitter_sample(sample, parameters, prob=1.0): """ Jitters input images as data augmentation. Parameters ---------- sample : dict Input sample parameters : tuple (brightness, contrast, saturation, hue) Color jittering parameters prob : float Jittering probability Returns ------- sample : dict Jittered sample """ if random.random() < prob: # Prepare transformation if int(torchvision.__version__[2]) >= 7: color_augmentation = transforms.ColorJitter(*parameters) augment_image = color_augmentation.forward elif torchvision.__version__[2] == '7': color_augmentation = transforms.ColorJitter() brightness, contrast, saturation, hue = parameters augment_image = color_augmentation.get_params( brightness=[max(0, 1 - brightness), 1 + brightness], contrast=[max(0, 1 - contrast), 1 + contrast], saturation=[max(0, 1 - saturation), 1 + saturation], hue=[-hue, hue]) else: raise NotImplementedError( "torchvision version error: must be 0.9 or 0.7") # Jitter single items for key in filter_dict(sample, ['rgb']): sample[key] = augment_image(sample[key]) # Jitter lists for key in filter_dict(sample, ['rgb_context']): sample[key] = [augment_image(k) for k in sample[key]] # Return jittered (?) sample return sample
def center_crop_sample(sample, size): """ Random center crops of sample as data augmentation. Parameters ---------- sample : dict Input sample size : (sequence of (h,w)) output size of the crop. (h, w) Returns ------- sample : dict Cropped sample """ (w, h) = sample['rgb'].size crop_h, crop_w = size center_crop = transforms.CenterCrop((crop_h, crop_w)) # Center cropping does **not** change (normalized) image intrinsics but position changes sample["intrinsics"][0, 2] = sample["intrinsics"][0, 2] * (crop_w / w) sample["intrinsics"][1, 2] = sample["intrinsics"][1, 2] * (crop_h / h) # So, only cropping the rest for key in filter_dict(sample, [ 'rgb', ]): sample[key] = center_crop(sample[key]) # Jitter lists for key in filter_dict(sample, [ 'rgb_context', ]): sample[key] = [center_crop(k) for k in sample[key]] # Return random cropped (?) sample return sample
def resize_sample_image_and_intrinsics_fisheye(sample, shape, image_interpolation=Image. ANTIALIAS): """ Resizes the image and intrinsics of a sample Parameters ---------- sample : dict Dictionary with sample values shape : tuple (H,W) Output shape image_interpolation : int Interpolation mode Returns ------- sample : dict Resized sample """ # Resize image and corresponding intrinsics image_transform = transforms.Resize(shape, interpolation=image_interpolation) (orig_w, orig_h) = sample['rgb'].size (out_h, out_w) = shape rescale_factor_h = out_h / orig_h assert rescale_factor_h == out_w / orig_w # Scale intrinsics for key in filter_dict(sample, ['intrinsics_poly_coeffs']): intrinsics_poly_coeffs = np.copy(sample[key]) intrinsics_poly_coeffs *= rescale_factor_h sample[key] = intrinsics_poly_coeffs for key in filter_dict(sample, ['intrinsics_principal_point']): intrinsics_principal_point = np.copy(sample[key]) intrinsics_principal_point *= rescale_factor_h sample[key] = intrinsics_principal_point for key in filter_dict(sample, ['path_to_theta_lut']): path_to_theta_lut = sample[key] path_to_theta_lut_clone = path_to_theta_lut splitted_path = path_to_theta_lut_clone.split('_') w_res_str = splitted_path[-2] splitted_path[-2] = str(int(rescale_factor_h * int(w_res_str))) h_res_str_with_ext = splitted_path[-1] h_res_str_splitted = h_res_str_with_ext.split('.') h_res_new = str(int(rescale_factor_h * int(h_res_str_splitted[0]))) splitted_path[-1] = '.'.join([h_res_new, h_res_str_splitted[1]]) sample[key] = '_'.join(splitted_path) # Scale intrinsics for key in filter_dict(sample, ['intrinsics_poly_coeffs_context']): res = [] for k in sample[key]: intrinsics_poly_coeffs = np.copy(k) intrinsics_poly_coeffs *= rescale_factor_h res.append(intrinsics_poly_coeffs) sample[key] = res for key in filter_dict(sample, ['intrinsics_principal_point_context']): res = [] for k in sample[key]: intrinsics_principal_point = np.copy(k) intrinsics_principal_point *= rescale_factor_h res.append(intrinsics_principal_point) sample[key] = res for key in filter_dict(sample, ['path_to_theta_lut_context']): res = [] for k in sample[key]: path_to_theta_lut = k path_to_theta_lut_clone = path_to_theta_lut splitted_path = path_to_theta_lut_clone.split('_') w_res_str = splitted_path[-2] splitted_path[-2] = str(int(rescale_factor_h * int(w_res_str))) h_res_str_with_ext = splitted_path[-1] h_res_str_splitted = h_res_str_with_ext.split('.') h_res_new = str(int(rescale_factor_h * int(h_res_str_splitted[0]))) splitted_path[-1] = '.'.join([h_res_new, h_res_str_splitted[1]]) res.append('_'.join(splitted_path)) sample[key] = res # Scale images for key in filter_dict(sample, [ 'rgb', 'rgb_original', ]): sample[key] = image_transform(sample[key]) # Scale context images for key in filter_dict(sample, [ 'rgb_context', 'rgb_context_original', ]): sample[key] = [image_transform(k) for k in sample[key]] # Return resized sample return sample
def resize_sample_image_and_intrinsics_multifocal(sample, shape, image_interpolation=Image. ANTIALIAS): """ Resizes the image and intrinsics of a sample Parameters ---------- sample : dict Dictionary with sample values shape : tuple (H,W) Output shape image_interpolation : int Interpolation mode Returns ------- sample : dict Resized sample """ # Resize image and corresponding intrinsics image_transform = transforms.Resize(shape, interpolation=image_interpolation) (orig_w, orig_h) = sample['rgb'].size (out_h, out_w) = shape rescale_factor_h = out_h / orig_h assert rescale_factor_h == out_w / orig_w # Scale intrinsics for key in filter_dict(sample, ['intrinsics_poly_coeffs']): intrinsics_poly_coeffs = np.copy(sample[key]) intrinsics_poly_coeffs *= rescale_factor_h sample[key] = intrinsics_poly_coeffs for key in filter_dict(sample, ['intrinsics_principal_point']): intrinsics_principal_point = np.copy(sample[key]) intrinsics_principal_point *= rescale_factor_h sample[key] = intrinsics_principal_point # Scale intrinsics for key in filter_dict(sample, ['intrinsics_poly_coeffs_geometric_context']): res = [] for k in sample[key]: intrinsics_poly_coeffs = np.copy(k) intrinsics_poly_coeffs *= rescale_factor_h res.append(intrinsics_poly_coeffs) sample[key] = res for key in filter_dict(sample, ['intrinsics_principal_point_geometric_context']): res = [] for k in sample[key]: intrinsics_principal_point = np.copy(k) intrinsics_principal_point *= rescale_factor_h res.append(intrinsics_principal_point) sample[key] = res for key in filter_dict(sample, ['intrinsics_K']): intrinsics = np.copy(sample[key]) intrinsics[0] *= out_w / orig_w intrinsics[1] *= out_h / orig_h sample[key] = intrinsics for key in filter_dict(sample, ['intrinsics_K_geometric_context']): res = [] for k in sample[key]: intrinsics = np.copy(k) intrinsics[0] *= out_w / orig_w intrinsics[1] *= out_h / orig_h res.append(intrinsics) sample[key] = res # Scale images for key in filter_dict(sample, ['rgb', 'rgb_original']): sample[key] = image_transform(sample[key]) # Scale context images for key in filter_dict(sample, [ 'rgb_temporal_context', 'rgb_geometric_context', 'rgb_geometric_context_temporal_context', 'rgb_temporal_context_original', 'rgb_geometric_context_original', 'rgb_geometric_context_temporal_context_original' ]): sample[key] = [image_transform(k) for k in sample[key]] # Return resized sample return sample
def resize_sample_image_and_intrinsics_fisheye(sample, shape, image_interpolation=Image. ANTIALIAS): """ Resizes the image and intrinsics of a sample Parameters ---------- sample : dict Dictionary with sample values shape : tuple (H,W) Output shape image_interpolation : int Interpolation mode Returns ------- sample : dict Resized sample """ # Resize image and corresponding intrinsics image_transform = transforms.Resize(shape, interpolation=image_interpolation) (orig_w, orig_h) = sample['rgb'].size (out_h, out_w) = shape rescale_factor_w = out_w / orig_w assert rescale_factor_w == out_h / orig_h * 966. / 960. # Scale intrinsics for key in filter_dict(sample, ['intrinsics_poly_coeffs']): intrinsics_poly_coeffs = np.copy(sample[key]) intrinsics_poly_coeffs *= rescale_factor_w sample[key] = intrinsics_poly_coeffs for key in filter_dict(sample, ['intrinsics_principal_point']): intrinsics_principal_point = np.copy(sample[key]) intrinsics_principal_point *= rescale_factor_w sample[key] = intrinsics_principal_point for key in filter_dict(sample, ['path_to_theta_lut']): path_to_theta_lut = sample[key] dir = os.path.dirname(path_to_theta_lut) base_clone, ext = os.path.splitext(os.path.basename(path_to_theta_lut)) base_clone_splitted = base_clone.split('_') base_clone_splitted[-1] = str( int(rescale_factor_w * float(base_clone_splitted[-1]))) base_clone_splitted[-2] = str( int(rescale_factor_w * float(base_clone_splitted[-2]))) path_to_theta_lut = os.path.join( dir, '_'.join(base_clone_splitted) + '.npy') sample[key] = path_to_theta_lut # Scale images for key in filter_dict(sample, [ 'rgb', 'rgb_original', ]): sample[key] = image_transform(sample[key]) # Scale context images for key in filter_dict(sample, [ 'rgb_context', 'rgb_context_original', ]): sample[key] = [image_transform(k) for k in sample[key]] # Return resized sample return sample