def random_rectangles_params_gen(batch_size: int, height: int, width: int, erase_scale_range: Tuple[float, float], aspect_ratio_range: Tuple[float, float], value: float = 0., same_on_batch: bool = False) -> Dict[str, torch.Tensor]: images_area = height * width target_areas = _adapted_uniform( (batch_size,), erase_scale_range[0], erase_scale_range[1], same_on_batch) * images_area if aspect_ratio_range[0] < 1. and aspect_ratio_range[1] > 1.: aspect_ratios1 = _adapted_uniform((batch_size,), aspect_ratio_range[0], 1, same_on_batch) aspect_ratios2 = _adapted_uniform((batch_size,), 1, aspect_ratio_range[1], same_on_batch) rand_idxs = torch.round(torch.rand((batch_size,))).bool() aspect_ratios = torch.where(rand_idxs, aspect_ratios1, aspect_ratios2) else: aspect_ratios = _adapted_uniform((batch_size,), aspect_ratio_range[0], aspect_ratio_range[1], same_on_batch) # based on target areas and aspect ratios, rectangle params are computed heights = torch.min( torch.max(torch.round((target_areas * aspect_ratios) ** (1 / 2)), torch.tensor(1.)), torch.tensor(float(height)) ).int() widths = torch.min( torch.max(torch.round((target_areas / aspect_ratios) ** (1 / 2)), torch.tensor(1.)), torch.tensor(float(width)) ).int() xs = (torch.rand((batch_size,)) * (torch.tensor(width) - widths + 1).float()).int() ys = (torch.rand((batch_size,)) * (torch.tensor(height) - heights + 1).float()).int() params: Dict[str, torch.Tensor] = {} params["widths"] = widths params["heights"] = heights params["xs"] = xs params["ys"] = ys params["values"] = torch.tensor([value] * batch_size) return params
def motion_blur_param_generator( batch_size: int, kernel_size: Union[int, Tuple[int, int]], angle: UnionFloat, direction: UnionFloat, border_type: Union[int, str, BorderType] = BorderType.CONSTANT.name, same_on_batch: bool = True) -> Dict[str, torch.Tensor]: angle_bound: torch.Tensor = _check_and_bound(angle, 'angle', center=0.) direction_bound: torch.Tensor = _check_and_bound(direction, 'direction', center=0., bounds=(-1, 1)) if isinstance(kernel_size, int): ksize_factor = torch.tensor([kernel_size] * batch_size) elif isinstance(kernel_size, tuple): ksize_x, ksize_y = kernel_size ksize_factor = _adapted_uniform( (batch_size, ), ksize_x // 2, ksize_y // 2, same_on_batch).int() * 2 + 1 else: raise TypeError(f"Unsupported type: {type(kernel_size)}") angle_factor = _adapted_uniform((batch_size, ), angle_bound[0], angle_bound[1], same_on_batch) direction_factor = _adapted_uniform((batch_size, ), direction_bound[0], direction_bound[1], same_on_batch) return dict(ksize_factor=ksize_factor, angle_factor=angle_factor, direction_factor=direction_factor, border_type=torch.tensor(BorderType.get(border_type).value))
def random_crop_generator( batch_size: int, input_size: Tuple[int, int], size: Tuple[int, int], resize_to: Optional[Tuple[int, int]] = None, interpolation: Union[str, int, Resample] = Resample.BILINEAR.name, same_on_batch: bool = False, align_corners: bool = False) -> Dict[str, torch.Tensor]: r"""Get parameters for ```crop``` transformation for crop transform. Args: batch_size (int): the tensor batch size. input_size (tuple): Input image shape, like (h, w). size (tuple): Desired size of the crop operation, like (h, w). resize_to (tuple): Desired output size of the crop, like (h, w). If None, no resize will be performed. interpolation (int, str or kornia.Resample): Default: Resample.BILINEAR same_on_batch (bool): apply the same transformation across the batch. Default: False align_corners (bool): interpolation flag. Default: False. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ x_diff = input_size[1] - size[1] y_diff = input_size[0] - size[0] if x_diff < 0 or y_diff < 0: raise ValueError( "input_size %s cannot be smaller than crop size %s in any dimension." % (str(input_size), str(size))) x_start = _adapted_uniform((batch_size, ), 0, x_diff + 1, same_on_batch).long() y_start = _adapted_uniform((batch_size, ), 0, y_diff + 1, same_on_batch).long() crop = torch.tensor([[ [0, 0], [size[1] - 1, 0], [size[1] - 1, size[0] - 1], [0, size[0] - 1], ]]).repeat(batch_size, 1, 1) crop_src = crop.clone() crop_src[:, :, 0] += x_start.unsqueeze(dim=0).reshape(batch_size, 1) crop_src[:, :, 1] += y_start.unsqueeze(dim=0).reshape(batch_size, 1) if resize_to is None: crop_dst = crop else: crop_dst = torch.tensor([[ [0, 0], [resize_to[1] - 1, 0], [resize_to[1] - 1, resize_to[0] - 1], [0, resize_to[0] - 1], ]]).repeat(batch_size, 1, 1) return dict(src=crop_src, dst=crop_dst, interpolation=torch.tensor(Resample.get(interpolation).value), align_corners=torch.tensor(align_corners))
def random_crop_gen(batch_size: int, input_size: Tuple[int, int], size: Tuple[int, int], resize_to: Optional[Tuple[int, int]] = None, same_on_batch: bool = False) -> Dict[str, torch.Tensor]: x_diff = input_size[1] - size[1] y_diff = input_size[0] - size[0] if x_diff < 0 or y_diff < 0: raise ValueError("input_size %s cannot be smaller than crop size %s in any dimension." % (str(input_size), str(size))) x_start = _adapted_uniform((batch_size,), 0, x_diff + 1, same_on_batch).long() y_start = _adapted_uniform((batch_size,), 0, y_diff + 1, same_on_batch).long() crop = torch.tensor([[ [0, 0], [size[1] - 1, 0], [size[1] - 1, size[0] - 1], [0, size[0] - 1], ]]).repeat(batch_size, 1, 1) crop_src = crop.clone() crop_src[:, :, 0] += x_start.unsqueeze(dim=0).reshape(batch_size, 1) crop_src[:, :, 1] += y_start.unsqueeze(dim=0).reshape(batch_size, 1) if resize_to is None: crop_dst = crop else: crop_dst = torch.tensor([[ [0, 0], [resize_to[1] - 1, 0], [resize_to[1] - 1, resize_to[0] - 1], [0, resize_to[0] - 1], ]]).repeat(batch_size, 1, 1) return {'src': crop_src, 'dst': crop_dst}
def random_crop_size_gen(size: Tuple[int, int], scale: Tuple[float, float], ratio: Tuple[float, float], same_on_batch: bool = False) -> Tuple[torch.Tensor, torch.Tensor]: area = _adapted_uniform((10,), scale[0] * size[0] * size[1], scale[1] * size[0] * size[1], same_on_batch) log_ratio = _adapted_uniform((10,), math.log(ratio[0]), math.log(ratio[1]), same_on_batch) aspect_ratio = torch.exp(log_ratio) w = torch.sqrt(area * aspect_ratio).int() h = torch.sqrt(area / aspect_ratio).int() # Element-wise w, h condition cond = ((0 < h) * (h < size[1]) * (0 < w) * (w < size[0])).int() if torch.sum(cond) > 0: return (h[torch.argmax(cond)], w[torch.argmax(cond)]) # Fallback to center crop in_ratio = float(size[0]) / float(size[1]) if (in_ratio < min(ratio)): w = torch.tensor(size[0]) h = torch.round(w / min(ratio)) elif (in_ratio > max(ratio)): h = torch.tensor(size[1]) w = torch.round(h * max(ratio)) else: # whole image w = torch.tensor(size[0]) h = torch.tensor(size[1]) return (h, w)
def random_color_jitter_generator( batch_size: int, brightness: FloatUnionType = 0., contrast: FloatUnionType = 0., saturation: FloatUnionType = 0., hue: FloatUnionType = 0., same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Generator random color jiter parameters for a batch of images. Args: batch_size (int): the number of images. brightness (float or tuple): Default value is 0 contrast (float or tuple): Default value is 0 saturation (float or tuple): Default value is 0 hue (float or tuple): Default value is 0 same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ brightness_bound: torch.Tensor = _check_and_bound(brightness, 'brightness', center=1., bounds=(0, float('inf'))) contrast_bound: torch.Tensor = _check_and_bound(contrast, 'contrast', center=1., bounds=(0, float('inf'))) saturation_bound: torch.Tensor = _check_and_bound(saturation, 'saturation', center=1., bounds=(0, float('inf'))) hue_bound: torch.Tensor = _check_and_bound(hue, 'hue', bounds=(-0.5, 0.5)) brightness_factor = _adapted_uniform((batch_size, ), brightness_bound[0], brightness_bound[1], same_on_batch) contrast_factor = _adapted_uniform((batch_size, ), contrast_bound[0], contrast_bound[1], same_on_batch) hue_factor = _adapted_uniform((batch_size, ), hue_bound[0], hue_bound[1], same_on_batch) saturation_factor = _adapted_uniform((batch_size, ), saturation_bound[0], saturation_bound[1], same_on_batch) return dict(brightness_factor=brightness_factor, contrast_factor=contrast_factor, hue_factor=hue_factor, saturation_factor=saturation_factor, order=torch.randperm(4))
def random_solarize_generator( batch_size: int, thresholds: torch.Tensor = torch.tensor([0.4, 0.6]), additions: torch.Tensor = torch.tensor([-0.1, 0.1]), same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Generate random solarize parameters for a batch of images. For each pixel in the image less than threshold, we add 'addition' amount to it and then clip the pixel value to be between 0 and 1.0 Args: batch_size (int): the number of images. thresholds (torch.Tensor): Pixels less than threshold will selected. Otherwise, subtract 1.0 from the pixel. Takes in a range tensor of (0, 1). Default value will be sampled from [0.4, 0.6]. additions (torch.Tensor): The value is between -0.5 and 0.5. Default value will be sampled from [-0.1, 0.1] same_on_batch (bool): apply the same transformation across the batch. Default: False. device (torch.device): the device on which the random numbers will be generated. Default: cpu. dtype (torch.dtype): the data type of the generated random numbers. Default: float32. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - thresholds_factor (torch.Tensor): element-wise thresholds factors with a shape of (B,). - additions_factor (torch.Tensor): element-wise additions factors with a shape of (B,). Note: The generated random numbers are not reproducible across different devices and dtypes. """ _common_param_check(batch_size, same_on_batch) _joint_range_check(thresholds, 'thresholds', (0, 1)) _joint_range_check(additions, 'additions', (-0.5, 0.5)) _device, _dtype = _extract_device_dtype([thresholds, additions]) thresholds_factor = _adapted_uniform( (batch_size, ), thresholds[0].to(device=device, dtype=dtype), thresholds[1].to(device=device, dtype=dtype), same_on_batch, ) additions_factor = _adapted_uniform( (batch_size, ), additions[0].to(device=device, dtype=dtype), additions[1].to(device=device, dtype=dtype), same_on_batch, ) return dict( thresholds_factor=thresholds_factor.to(device=_device, dtype=_dtype), additions_factor=additions_factor.to(device=_device, dtype=_dtype), )
def random_color_jitter_generator( batch_size: int, brightness: FloatUnionType = 0., contrast: FloatUnionType = 0., saturation: FloatUnionType = 0., hue: FloatUnionType = 0., same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Generator random color jiter parameters for a batch of images. Args: batch_size (int): the number of images. brightness (float or tuple): Default value is 0 contrast (float or tuple): Default value is 0 saturation (float or tuple): Default value is 0 hue (float or tuple): Default value is 0 same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: dict: generated parameter dictionary. See :class:`~kornia.augmentation.ColorJitter` for details. """ brightness_bound: torch.Tensor = _check_and_bound(brightness, 'brightness', center=1., bounds=(0, 2)) contrast_bound: torch.Tensor = _check_and_bound(contrast, 'contrast', center=1.) saturation_bound: torch.Tensor = _check_and_bound(saturation, 'saturation', center=1.) hue_bound: torch.Tensor = _check_and_bound(hue, 'hue', bounds=(-0.5, 0.5)) brightness_factor = _adapted_uniform((batch_size, ), brightness_bound[0], brightness_bound[1], same_on_batch) contrast_factor = _adapted_uniform((batch_size, ), contrast_bound[0], contrast_bound[1], same_on_batch) hue_factor = _adapted_uniform((batch_size, ), hue_bound[0], hue_bound[1], same_on_batch) saturation_factor = _adapted_uniform((batch_size, ), saturation_bound[0], saturation_bound[1], same_on_batch) return { "brightness_factor": brightness_factor, "contrast_factor": contrast_factor, "hue_factor": hue_factor, "saturation_factor": saturation_factor, "order": torch.randperm(4) }
def random_rotation_gen(batch_size: int, degrees: FloatUnionType, same_on_batch: bool = False) -> Dict[str, torch.Tensor]: if not torch.is_tensor(degrees): if isinstance(degrees, float): if degrees < 0: raise ValueError(f"If Degrees is only one number it must be a positive number. Got{degrees}") degrees = torch.tensor([-degrees, degrees]) elif isinstance(degrees, (tuple, list)): degrees = torch.tensor(degrees) else: raise TypeError(f"Degrees should be a float number a sequence or a tensor. Got {type(degrees)}") # https://mypy.readthedocs.io/en/latest/casts.html cast to please mypy gods degrees = cast(torch.Tensor, degrees) if degrees.numel() != 2: raise ValueError("If degrees is a sequence it must be of length 2") params: Dict[str, torch.Tensor] = {} params["degrees"] = _adapted_uniform((batch_size,), degrees[0], degrees[1], same_on_batch) return params
def random_sharpness_generator( batch_size: int, sharpness: Union[float, Tuple[float, float], torch.Tensor] = 1., same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Generator random sharpness parameters for a batch of images. Args: batch_size (int): the number of images. sharpness (float or tuple): Default value is 1. Must be above 0. same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ if not isinstance(sharpness, torch.Tensor): sharpness = torch.tensor(sharpness) if len(sharpness.size()) == 0: lower = torch.tensor(0) upper = sharpness elif len(sharpness.size()) == 1 and sharpness.size(0) == 2: lower = sharpness[0] upper = sharpness[1] else: raise ValueError(f"Expect float or tuple. Got {sharpness}.") sharpness_factor = _adapted_uniform((batch_size, ), lower.float(), upper.float(), same_on_batch) return dict(sharpness_factor=sharpness_factor)
def random_posterize_generator( batch_size: int, bits: Union[int, Tuple[int, int], torch.Tensor] = 3, same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Generator random posterize parameters for a batch of images. Args: batch_size (int): the number of images. bits (int or tuple): Default value is 3. Integer that ranged from 0 ~ 8. same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ if not isinstance(bits, torch.Tensor): bits = torch.tensor(bits) if len(bits.size()) == 0: lower = bits upper = torch.tensor(8) elif len(bits.size()) == 1 and bits.size(0) == 2: lower = bits[0] upper = bits[1] else: raise ValueError(f"Expect float or tuple. Got {bits}.") bits_factor = _adapted_uniform((batch_size, ), lower.float(), upper.float(), same_on_batch).int() return dict(bits_factor=bits_factor)
def random_rotation_generator( batch_size: int, degrees: torch.Tensor, same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Get parameters for ``rotate`` for a random rotate transform. Args: batch_size (int): the tensor batch size. degrees (torch.Tensor): range of degrees with shape (2) to select from. same_on_batch (bool): apply the same transformation across the batch. Default: False. device (torch.device): the device on which the random numbers will be generated. Default: cpu. dtype (torch.dtype): the data type of the generated random numbers. Default: float32. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - degrees (torch.Tensor): element-wise rotation degrees with a shape of (B,). Note: The generated random numbers are not reproducible across different devices and dtypes. """ _common_param_check(batch_size, same_on_batch) _joint_range_check(degrees, "degrees") _degrees = _adapted_uniform( (batch_size, ), degrees[0].to(device=device, dtype=dtype), degrees[1].to(device=device, dtype=dtype), same_on_batch, ) _degrees = _degrees.to(device=degrees.device, dtype=degrees.dtype) return dict(degrees=_degrees)
def random_sharpness_generator( batch_size: int, sharpness: torch.Tensor = torch.tensor([0, 1.0]), same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Generate random sharpness parameters for a batch of images. Args: batch_size (int): the number of images. sharpness (torch.Tensor): Must be above 0. Default value is sampled from (0, 1). same_on_batch (bool): apply the same transformation across the batch. Default: False. device (torch.device): the device on which the random numbers will be generated. Default: cpu. dtype (torch.dtype): the data type of the generated random numbers. Default: float32. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - sharpness_factor (torch.Tensor): element-wise sharpness factors with a shape of (B,). Note: The generated random numbers are not reproducible across different devices and dtypes. """ _common_param_check(batch_size, same_on_batch) _joint_range_check(sharpness, 'sharpness', bounds=(0, float('inf'))) sharpness_factor = _adapted_uniform( (batch_size, ), sharpness[0].to(device=device, dtype=dtype), sharpness[1].to(device=device, dtype=dtype), same_on_batch, ) return dict(sharpness_factor=sharpness_factor.to(device=sharpness.device, dtype=sharpness.dtype))
def random_perspective_generator( batch_size: int, height: int, width: int, distortion_scale: torch.Tensor, same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Get parameters for ``perspective`` for a random perspective transform. Args: batch_size (int): the tensor batch size. height (int) : height of the image. width (int): width of the image. distortion_scale (torch.Tensor): it controls the degree of distortion and ranges from 0 to 1. same_on_batch (bool): apply the same transformation across the batch. Default: False. device (torch.device): the device on which the random numbers will be generated. Default: cpu. dtype (torch.dtype): the data type of the generated random numbers. Default: float32. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - start_points (torch.Tensor): element-wise perspective source areas with a shape of (B, 4, 2). - end_points (torch.Tensor): element-wise perspective target areas with a shape of (B, 4, 2). Note: The generated random numbers are not reproducible across different devices and dtypes. """ _common_param_check(batch_size, same_on_batch) if not (distortion_scale.dim() == 0 and 0 <= distortion_scale <= 1): raise AssertionError(f"'distortion_scale' must be a scalar within [0, 1]. Got {distortion_scale}.") if not (type(height) is int and height > 0 and type(width) is int and width > 0): raise AssertionError(f"'height' and 'width' must be integers. Got {height}, {width}.") start_points: torch.Tensor = torch.tensor( [[[0.0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]], device=distortion_scale.device, dtype=distortion_scale.dtype, ).expand(batch_size, -1, -1) # generate random offset not larger than half of the image fx = distortion_scale * width / 2 fy = distortion_scale * height / 2 factor = torch.stack([fx, fy], dim=0).view(-1, 1, 2) # TODO: This line somehow breaks the gradcheck rand_val: torch.Tensor = _adapted_uniform( start_points.shape, torch.tensor(0, device=device, dtype=dtype), torch.tensor(1, device=device, dtype=dtype), same_on_batch, ).to(device=distortion_scale.device, dtype=distortion_scale.dtype) pts_norm = torch.tensor( [[[1, 1], [-1, 1], [-1, -1], [1, -1]]], device=distortion_scale.device, dtype=distortion_scale.dtype ) end_points = start_points + factor * rand_val * pts_norm return dict(start_points=start_points, end_points=end_points)
def _get_perspective_params( batch_size: int, width: int, height: int, distortion_scale: float, same_on_batch: bool = False) -> Tuple[torch.Tensor, torch.Tensor]: start_points: torch.Tensor = torch.tensor([[ [0., 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1], ]]).expand(batch_size, -1, -1) # generate random offset not larger than half of the image fx: float = distortion_scale * width / 2 fy: float = distortion_scale * height / 2 factor = torch.tensor([fx, fy]).view(-1, 1, 2) rand_val: torch.Tensor = _adapted_uniform((batch_size, 4, 2), 0, 1, same_on_batch) pts_norm = torch.tensor([[[1, 1], [-1, 1], [-1, -1], [1, -1]]]) end_points = start_points + factor * rand_val * pts_norm return start_points, end_points
def random_prob_generator( batch_size: int, p: float = 0.5, same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Generator random probabilities for a batch of inputs. Args: batch_size (int): the number of images. p (float): probability of the image being flipped or grayscaled. Default value is 0.5 same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: dict: generated parameter dictionary. See :class:`~kornia.augmentation.RandomGrayscale` for details. See :class:`~kornia.augmentation.RandomHorizontalFlip` for details. See :class:`~kornia.augmentation.RandomVerticalFlip` for details. """ if not isinstance(p, float): raise TypeError( f"The probability should be a float number. Got {type(p)}") probs: torch.Tensor = _adapted_uniform((batch_size, ), 0, 1, same_on_batch) batch_prob: torch.Tensor = probs < p return {'batch_prob': batch_prob}
def random_posterize_generator( batch_size: int, bits: torch.Tensor = torch.tensor([3, 5]), same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Generate random posterize parameters for a batch of images. Args: batch_size (int): the number of images. bits (int or tuple): Takes in an integer tuple tensor that ranged from 0 ~ 8. Default value is [3, 5]. same_on_batch (bool): apply the same transformation across the batch. Default: False. device (torch.device): the device on which the random numbers will be generated. Default: cpu. dtype (torch.dtype): the data type of the generated random numbers. Default: float32. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - bits_factor (torch.Tensor): element-wise bit factors with a shape of (B,). Note: The generated random numbers are not reproducible across different devices and dtypes. """ _common_param_check(batch_size, same_on_batch) _joint_range_check(bits, 'bits', (0, 8)) bits_factor = _adapted_uniform( (batch_size,), bits[0].to(device=device, dtype=dtype), bits[1].to(device=device, dtype=dtype), same_on_batch ).int() return dict(bits_factor=bits_factor.to(device=bits.device, dtype=torch.int32))
def random_mixup_generator( batch_size: int, p: float = 0.5, lambda_val: Optional[torch.Tensor] = None, same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Generate mixup indexes and lambdas for a batch of inputs. Args: batch_size (int): the number of images. If batchsize == 1, the output will be as same as the input. p (flot): probability of applying mixup. lambda_val (torch.Tensor, optional): min-max strength for mixup images, ranged from [0., 1.]. If None, it will be set to tensor([0., 1.]), which means no restrictions. same_on_batch (bool): apply the same transformation across the batch. Default: False. device (torch.device): the device on which the random numbers will be generated. Default: cpu. dtype (torch.dtype): the data type of the generated random numbers. Default: float32. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - mix_pairs (torch.Tensor): element-wise probabilities with a shape of (B,). - mixup_lambdas (torch.Tensor): element-wise probabilities with a shape of (B,). Note: The generated random numbers are not reproducible across different devices and dtypes. Examples: >>> rng = torch.manual_seed(0) >>> random_mixup_generator(5, 0.7) {'mixup_pairs': tensor([4, 0, 3, 1, 2]), 'mixup_lambdas': tensor([0.6323, 0.0000, 0.4017, 0.0223, 0.1689])} """ _common_param_check(batch_size, same_on_batch) _device, _dtype = _extract_device_dtype([lambda_val]) lambda_val = torch.as_tensor( [0.0, 1.0] if lambda_val is None else lambda_val, device=device, dtype=dtype) _joint_range_check(lambda_val, 'lambda_val', bounds=(0, 1)) batch_probs: torch.Tensor = random_prob_generator( batch_size, p, same_on_batch=same_on_batch, device=device, dtype=dtype) mixup_pairs: torch.Tensor = torch.randperm(batch_size, device=device, dtype=dtype).long() mixup_lambdas: torch.Tensor = _adapted_uniform((batch_size, ), lambda_val[0], lambda_val[1], same_on_batch=same_on_batch) mixup_lambdas = mixup_lambdas * batch_probs return dict( mixup_pairs=mixup_pairs.to(device=_device, dtype=torch.long), mixup_lambdas=mixup_lambdas.to(device=_device, dtype=_dtype), )
def random_crop_size_generator( size: Tuple[int, int], scale: Tuple[float, float], ratio: Tuple[float, float], same_on_batch: bool = False) -> Tuple[torch.Tensor, torch.Tensor]: r"""Get cropping heights and widths for ```crop``` transformation for resized crop transform. Args: size (Tuple[int, int]): expected output size of each edge scale: range of size of the origin size cropped ratio: range of aspect ratio of the origin aspect ratio cropped same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ area = _adapted_uniform((10, ), scale[0] * size[0] * size[1], scale[1] * size[0] * size[1], same_on_batch) log_ratio = _adapted_uniform((10, ), math.log(ratio[0]), math.log(ratio[1]), same_on_batch) aspect_ratio = torch.exp(log_ratio) w = torch.sqrt(area * aspect_ratio).int() h = torch.sqrt(area / aspect_ratio).int() # Element-wise w, h condition cond = ((0 < h) * (h < size[1]) * (0 < w) * (w < size[0])).int() if torch.sum(cond) > 0: return (h[torch.argmax(cond)], w[torch.argmax(cond)]) # Fallback to center crop in_ratio = float(size[0]) / float(size[1]) if (in_ratio < min(ratio)): w = torch.tensor(size[0]) h = torch.round(w / min(ratio)) elif (in_ratio > max(ratio)): h = torch.tensor(size[1]) w = torch.round(h * max(ratio)) else: # whole image w = torch.tensor(size[0]) h = torch.tensor(size[1]) return (h, w)
def random_rectangles_params_generator( batch_size: int, height: int, width: int, p: float, scale: Tuple[float, float], ratio: Tuple[float, float], value: float = 0., same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Get parameters for ```erasing``` transformation for erasing transform Args: batch_size (int): the tensor batch size. height (int) : height of the image. width (int): width of the image. p (float): probability of applying random earaing. scale ([int, int]): range of size of the origin size cropped. ratio ([int, int]): range of aspect ratio of the origin aspect ratio cropped. value (float): value to be filled in the erased area. same_on_batch (bool): apply the same transformation across the batch. Default: False. """ batch_prob = random_prob_generator(batch_size, p, same_on_batch)['batch_prob'] zeros = torch.zeros((batch_size, )) images_area = height * width target_areas = _adapted_uniform( (batch_size, ), scale[0], scale[1], same_on_batch) * images_area if ratio[0] < 1. and ratio[1] > 1.: aspect_ratios1 = _adapted_uniform((batch_size, ), ratio[0], 1, same_on_batch) aspect_ratios2 = _adapted_uniform((batch_size, ), 1, ratio[1], same_on_batch) rand_idxs = torch.round(torch.rand((batch_size, ))).bool() aspect_ratios = torch.where(rand_idxs, aspect_ratios1, aspect_ratios2) else: aspect_ratios = _adapted_uniform((batch_size, ), ratio[0], ratio[1], same_on_batch) # based on target areas and aspect ratios, rectangle params are computed heights = torch.min( torch.max(torch.round((target_areas * aspect_ratios)**(1 / 2)), torch.tensor(1.)), torch.tensor(float(height))).int() widths = torch.min( torch.max(torch.round((target_areas / aspect_ratios)**(1 / 2)), torch.tensor(1.)), torch.tensor(float(width))).int() xs = (_adapted_uniform((batch_size, ), 0, 1, same_on_batch) * (torch.tensor(width) - widths + 1).float()).int() ys = (_adapted_uniform((batch_size, ), 0, 1, same_on_batch) * (torch.tensor(height) - heights + 1).float()).int() params: Dict[str, torch.Tensor] = {} params["widths"] = torch.where(batch_prob, widths, zeros.to(widths.dtype)) params["heights"] = torch.where(batch_prob, heights, zeros.to(widths.dtype)) params["xs"] = xs params["ys"] = ys params["values"] = torch.tensor([value] * batch_size) return params
def random_rotation_generator3d( batch_size: int, degrees: torch.Tensor, same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Get parameters for ``rotate`` for a random rotate transform. Args: batch_size (int): the tensor batch size. degrees (torch.Tensor): Ranges of degrees (3, 2) for yaw, pitch and roll. same_on_batch (bool): apply the same transformation across the batch. Default: False. device (torch.device): the device on which the random numbers will be generated. Default: cpu. dtype (torch.dtype): the data type of the generated random numbers. Default: float32. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - yaw (torch.Tensor): element-wise rotation yaws with a shape of (B,). - pitch (torch.Tensor): element-wise rotation pitches with a shape of (B,). - roll (torch.Tensor): element-wise rotation rolls with a shape of (B,). """ if degrees.shape != torch.Size([3, 2]): raise AssertionError( f"'degrees' must be the shape of (3, 2). Got {degrees.shape}.") _device, _dtype = _extract_device_dtype([degrees]) degrees = degrees.to(device=device, dtype=dtype) yaw = _adapted_uniform((batch_size, ), degrees[0][0], degrees[0][1], same_on_batch) pitch = _adapted_uniform((batch_size, ), degrees[1][0], degrees[1][1], same_on_batch) roll = _adapted_uniform((batch_size, ), degrees[2][0], degrees[2][1], same_on_batch) return dict( yaw=yaw.to(device=_device, dtype=_dtype), pitch=pitch.to(device=_device, dtype=_dtype), roll=roll.to(device=_device, dtype=_dtype), )
def _get_random_affine_params( batch_size: int, height: int, width: int, degrees: TupleFloat, translate: Optional[TupleFloat], scales: Optional[TupleFloat], shears: Optional[TupleFloat], resample: Union[str, int, Resample] = Resample.BILINEAR.name, same_on_batch: bool = False, align_corners: bool = False) -> Dict[str, torch.Tensor]: r"""Get parameters for ```affine``` transformation random affine transform. The returned matrix is Bx3x3. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ angle = _adapted_uniform((batch_size, ), degrees[0], degrees[1], same_on_batch) # compute tensor ranges if scales is not None: scale = _adapted_uniform((batch_size, ), scales[0], scales[1], same_on_batch) else: scale = torch.ones(batch_size) if translate is not None: max_dx: float = translate[0] * width max_dy: float = translate[1] * height translations = torch.stack([ _adapted_uniform((batch_size, ), -max_dx, max_dx, same_on_batch), _adapted_uniform((batch_size, ), -max_dy, max_dy, same_on_batch) ], dim=-1) else: translations = torch.zeros(batch_size, 2) center: torch.Tensor = torch.tensor( [width, height], dtype=torch.float32).view(1, 2) / 2. - 0.5 center = center.expand(batch_size, -1) if shears is not None: sx = _adapted_uniform((batch_size, ), shears[0], shears[1], same_on_batch) sy = _adapted_uniform((batch_size, ), shears[0], shears[1], same_on_batch) else: sx = sy = torch.tensor([0] * batch_size) return dict(translations=translations, center=center, scale=scale, angle=angle, sx=sx, sy=sy, resample=torch.tensor(Resample.get(resample).value), align_corners=torch.tensor(align_corners))
def random_solarize_generator( batch_size: int, thresholds: FloatUnionType = 0.1, additions: FloatUnionType = 0.1, same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Generator random solarize parameters for a batch of images. For each pixel in the image less than threshold, we add 'addition' amount to it and then clip the pixel value to be between 0 and 1.0 Args: batch_size (int): the number of images. thresholds (float or tuple): Pixels less than threshold will selected. Otherwise, subtract 1.0 from the pixel. Default value is 0.1 additions (float or tuple): The value is between -0.5 and 0.5. Default value is 0.1 same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ thresholds_bound: torch.Tensor = _check_and_bound(thresholds, 'thresholds', center=0.5, bounds=(0., 1.)) additions_bound: torch.Tensor = _check_and_bound(additions, 'additions', bounds=(-0.5, 0.5)) thresholds_factor = _adapted_uniform( (batch_size, ), thresholds_bound[0].float(), thresholds_bound[1].float(), same_on_batch) additions_factor = _adapted_uniform( (batch_size, ), additions_bound[0].float(), additions_bound[1].float(), same_on_batch) return dict(thresholds_factor=thresholds_factor, additions_factor=additions_factor)
def random_rotation_generator( batch_size: int, degrees: FloatUnionType, interpolation: Union[str, int, Resample] = Resample.BILINEAR.name, same_on_batch: bool = False, align_corners: bool = False) -> Dict[str, torch.Tensor]: r"""Get parameters for ``rotate`` for a random rotate transform. Args: batch_size (int): the tensor batch size. degrees (sequence or float or tensor): range of degrees to select from. If degrees is a number the range of degrees to select from will be (-degrees, +degrees) interpolation (int, str or kornia.Resample): Default: Resample.BILINEAR same_on_batch (bool): apply the same transformation across the batch. Default: False align_corners (bool): interpolation flag. Default: False. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ if not torch.is_tensor(degrees): if isinstance(degrees, (float, int)): if degrees < 0: raise ValueError( f"If Degrees is only one number it must be a positive number. Got{degrees}" ) degrees = torch.tensor([-degrees, degrees]).to(torch.float32) elif isinstance(degrees, (tuple, list)): degrees = torch.tensor(degrees).to(torch.float32) else: raise TypeError( f"Degrees should be a float number a sequence or a tensor. Got {type(degrees)}" ) # https://mypy.readthedocs.io/en/latest/casts.html cast to please mypy gods degrees = cast(torch.Tensor, degrees) if degrees.numel() != 2: raise ValueError("If degrees is a sequence it must be of length 2") degrees = _adapted_uniform((batch_size, ), degrees[0], degrees[1], same_on_batch) return dict(degrees=degrees, interpolation=torch.tensor(Resample.get(interpolation).value), align_corners=torch.tensor(align_corners))
def _get_random_affine_params( batch_size: int, height: int, width: int, degrees: TupleFloat, translate: Optional[TupleFloat], scales: Optional[TupleFloat], shears: Optional[TupleFloat], same_on_batch: bool = False ) -> torch.Tensor: r"""Get parameters for affine transformation random generation. The returned matrix is Bx3x3. Returns: torch.Tensor: params to be passed to the affine transformation. """ angle = _adapted_uniform((batch_size,), degrees[0], degrees[1], same_on_batch) # compute tensor ranges if scales is not None: scale = _adapted_uniform((batch_size,), scales[0], scales[1], same_on_batch) else: scale = torch.ones(batch_size) if translate is not None: max_dx: float = translate[0] * width max_dy: float = translate[1] * height translations = torch.stack([ _adapted_uniform((batch_size,), -max_dx, max_dx, same_on_batch), _adapted_uniform((batch_size,), -max_dy, max_dy, same_on_batch) ], dim=-1) else: translations = torch.zeros(batch_size, 2) center: torch.Tensor = torch.tensor( [width, height], dtype=torch.float32).view(1, 2) / 2. - 0.5 center = center.expand(batch_size, -1) if shears is not None: shears = math.radians(shears[0]), math.radians(shears[1]) sx = _adapted_uniform((batch_size,), shears[0], shears[1], same_on_batch) sy = _adapted_uniform((batch_size,), shears[0], shears[1], same_on_batch) ones = torch.ones_like(sx) else: sx = sy = None # concatenate transforms transform_h = _compose_affine_matrix_3x3(translations, center, scale, angle, sx, sy) return transform_h
def _get_perspective_params(batch_size: int, width: int, height: int, distortion_scale: float, same_on_batch: bool = False) -> Tuple[torch.Tensor, torch.Tensor]: r"""Get parameters for ``perspective`` for a random perspective transform. Args: batch_size (int): the tensor batch size. width (int): width of the image. height (int) : height of the image. distortion_scale (float): it controls the degree of distortion and ranges from 0 to 1. Default value is 0.5. Returns: List containing [top-left, top-right, bottom-right, bottom-left] of the original image, List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image. The points are in -x order. """ start_points: torch.Tensor = torch.tensor([[ [0., 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1], ]]).expand(batch_size, -1, -1) # generate random offset not larger than half of the image fx: float = distortion_scale * width / 2 fy: float = distortion_scale * height / 2 factor = torch.tensor([fx, fy]).view(-1, 1, 2) rand_val: torch.Tensor = _adapted_uniform((batch_size, 4, 2), 0, 1, same_on_batch) pts_norm = torch.tensor([[ [1, 1], [-1, 1], [-1, -1], [1, -1] ]]) end_points = start_points + factor * rand_val * pts_norm return start_points, end_points
def random_prob_generator( batch_size: int, p: float = 0.5, same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Generator random probabilities for a batch of inputs. Args: batch_size (int): the number of images. p (float): probability of the image being flipped or grayscaled. Default value is 0.5 same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. """ if not isinstance(p, float): raise TypeError( f"The probability should be a float number. Got {type(p)}") probs: torch.Tensor = _adapted_uniform((batch_size, ), 0, 1, same_on_batch) batch_prob: torch.Tensor = (probs < p) return dict(batch_prob=batch_prob)
def random_color_jitter_gen(batch_size: int, brightness: FloatUnionType = 0., contrast: FloatUnionType = 0., saturation: FloatUnionType = 0., hue: FloatUnionType = 0., same_on_batch: bool = False) -> Dict[str, torch.Tensor]: r"""Generator random color jiter parameters for a batch of images. Args: batch_size (int): the number of images. brightness (float or tuple): Default value is 0 contrast (float or tuple): Default value is 0 saturation (float or tuple): Default value is 0 hue (float or tuple): Default value is 0 Returns: dict: generated parameter dictionary. See :class:`~kornia.augmentation.ColorJitter` for details. """ def _check_and_bound(factor: FloatUnionType, name: str, center: float = 0., bounds: Tuple[float, float] = (0, float('inf'))) -> torch.Tensor: r"""Check inputs and compute the corresponding factor bounds """ if isinstance(factor, float): if factor < 0: raise ValueError(f"If {name} is a single number number, it must be non negative. Got {factor}") factor_bound = torch.tensor([center - factor, center + factor], dtype=torch.float32) factor_bound = torch.clamp(factor_bound, bounds[0], bounds[1]) elif (isinstance(factor, torch.Tensor) and factor.dim() == 0): if factor < 0: raise ValueError(f"If {name} is a single number number, it must be non negative. Got {factor}") factor_bound = torch.tensor( [torch.tensor(center) - factor, torch.tensor(center) + factor], dtype=torch.float32) factor_bound = torch.clamp(factor_bound, bounds[0], bounds[1]) elif isinstance(factor, (tuple, list)) and len(factor) == 2: if not bounds[0] <= factor[0] <= factor[1] <= bounds[1]: raise ValueError(f"{name}[0] should be smaller than {name}[1] got {factor}") factor_bound = torch.tensor(factor, dtype=torch.float32) elif isinstance(factor, torch.Tensor) and factor.shape[0] == 2 and factor.dim() == 1: if not bounds[0] <= factor[0] <= factor[1] <= bounds[1]: raise ValueError(f"{name}[0] should be smaller than {name}[1] got {factor}") factor_bound = factor else: raise TypeError( f"The {name} should be a float number or a tuple with length 2 whose values move between {bounds}.") return factor_bound brightness_bound: torch.Tensor = _check_and_bound( brightness, 'brightness', center=1., bounds=(0, 2)) contrast_bound: torch.Tensor = _check_and_bound(contrast, 'contrast', center=1.) saturation_bound: torch.Tensor = _check_and_bound(saturation, 'saturation', center=1.) hue_bound: torch.Tensor = _check_and_bound(hue, 'hue', bounds=(-0.5, 0.5)) brightness_factor = _adapted_uniform((batch_size,), brightness_bound[0], brightness_bound[1], same_on_batch) contrast_factor = _adapted_uniform((batch_size,), contrast_bound[0], contrast_bound[1], same_on_batch) hue_factor = _adapted_uniform((batch_size,), hue_bound[0], hue_bound[1], same_on_batch) saturation_factor = _adapted_uniform((batch_size,), saturation_bound[0], saturation_bound[1], same_on_batch) return { "brightness_factor": brightness_factor, "contrast_factor": contrast_factor, "hue_factor": hue_factor, "saturation_factor": saturation_factor, "order": torch.randperm(4) }
def random_affine_generator3d( batch_size: int, depth: int, height: int, width: int, degrees: torch.Tensor, translate: Optional[torch.Tensor] = None, scale: Optional[torch.Tensor] = None, shears: Optional[torch.Tensor] = None, same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Get parameters for ```3d affine``` transformation random affine transform. Args: batch_size (int): the tensor batch size. depth (int) : depth of the image. height (int) : height of the image. width (int): width of the image. degrees (torch.Tensor): Ranges of degrees with shape (3, 2) for yaw, pitch and roll. translate (torch.Tensor, optional): maximum absolute fraction with shape (3,) for horizontal, vertical and depthical translations (dx,dy,dz). Will not translate by default. scale (torch.Tensor, optional): scaling factor interval, e.g (a, b), then scale is randomly sampled from the range a <= scale <= b. Will keep original scale by default. shear (sequence or float, optional): Range of degrees to select from. Shaped as (6, 2) for 6 facet (xy, xz, yx, yz, zx, zy). The shear to the i-th facet in the range (-shear[i, 0], shear[i, 1]) will be applied. same_on_batch (bool): apply the same transformation across the batch. Default: False Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - translations (torch.Tensor): element-wise translations with a shape of (B, 3). - center (torch.Tensor): element-wise center with a shape of (B, 3). - scale (torch.Tensor): element-wise scales with a shape of (B, 3). - angle (torch.Tensor): element-wise rotation angles with a shape of (B, 3). - sxy (torch.Tensor): element-wise x-y-facet shears with a shape of (B,). - sxz (torch.Tensor): element-wise x-z-facet shears with a shape of (B,). - syx (torch.Tensor): element-wise y-x-facet shears with a shape of (B,). - syz (torch.Tensor): element-wise y-z-facet shears with a shape of (B,). - szx (torch.Tensor): element-wise z-x-facet shears with a shape of (B,). - szy (torch.Tensor): element-wise z-y-facet shears with a shape of (B,). Note: The generated random numbers are not reproducible across different devices and dtypes. """ if not (type(depth) is int and depth > 0 and type(height) is int and height > 0 and type(width) is int and width > 0): raise AssertionError( f"'depth', 'height' and 'width' must be integers. Got {depth}, {height}, {width}." ) _device, _dtype = _extract_device_dtype( [degrees, translate, scale, shears]) if degrees.shape != torch.Size([3, 2]): raise AssertionError( f"'degrees' must be the shape of (3, 2). Got {degrees.shape}.") degrees = degrees.to(device=device, dtype=dtype) yaw = _adapted_uniform((batch_size, ), degrees[0][0], degrees[0][1], same_on_batch) pitch = _adapted_uniform((batch_size, ), degrees[1][0], degrees[1][1], same_on_batch) roll = _adapted_uniform((batch_size, ), degrees[2][0], degrees[2][1], same_on_batch) angles = torch.stack([yaw, pitch, roll], dim=1) # compute tensor ranges if scale is not None: if scale.shape != torch.Size([3, 2]): raise AssertionError( f"'scale' must be the shape of (3, 2). Got {scale.shape}.") scale = scale.to(device=device, dtype=dtype) scale = torch.stack( [ _adapted_uniform( (batch_size, ), scale[0, 0], scale[0, 1], same_on_batch), _adapted_uniform( (batch_size, ), scale[1, 0], scale[1, 1], same_on_batch), _adapted_uniform( (batch_size, ), scale[2, 0], scale[2, 1], same_on_batch), ], dim=1, ) else: scale = torch.ones(batch_size, device=device, dtype=dtype).reshape(batch_size, 1).repeat(1, 3) if translate is not None: if translate.shape != torch.Size([3]): raise AssertionError( f"'translate' must be the shape of (2). Got {translate.shape}." ) translate = translate.to(device=device, dtype=dtype) max_dx: torch.Tensor = translate[0] * width max_dy: torch.Tensor = translate[1] * height max_dz: torch.Tensor = translate[2] * depth # translations should be in x,y,z translations = torch.stack( [ _adapted_uniform( (batch_size, ), -max_dx, max_dx, same_on_batch), _adapted_uniform( (batch_size, ), -max_dy, max_dy, same_on_batch), _adapted_uniform( (batch_size, ), -max_dz, max_dz, same_on_batch), ], dim=1, ) else: translations = torch.zeros((batch_size, 3), device=device, dtype=dtype) # center should be in x,y,z center: torch.Tensor = torch.tensor([width, height, depth], device=device, dtype=dtype).view(1, 3) / 2.0 - 0.5 center = center.expand(batch_size, -1) if shears is not None: if shears.shape != torch.Size([6, 2]): raise AssertionError( f"'shears' must be the shape of (6, 2). Got {shears.shape}.") shears = shears.to(device=device, dtype=dtype) sxy = _adapted_uniform((batch_size, ), shears[0, 0], shears[0, 1], same_on_batch) sxz = _adapted_uniform((batch_size, ), shears[1, 0], shears[1, 1], same_on_batch) syx = _adapted_uniform((batch_size, ), shears[2, 0], shears[2, 1], same_on_batch) syz = _adapted_uniform((batch_size, ), shears[3, 0], shears[3, 1], same_on_batch) szx = _adapted_uniform((batch_size, ), shears[4, 0], shears[4, 1], same_on_batch) szy = _adapted_uniform((batch_size, ), shears[5, 0], shears[5, 1], same_on_batch) else: sxy = sxz = syx = syz = szx = szy = torch.tensor([0] * batch_size, device=device, dtype=dtype) return dict( translations=translations.to(device=_device, dtype=_dtype), center=center.to(device=_device, dtype=_dtype), scale=scale.to(device=_device, dtype=_dtype), angles=angles.to(device=_device, dtype=_dtype), sxy=sxy.to(device=_device, dtype=_dtype), sxz=sxz.to(device=_device, dtype=_dtype), syx=syx.to(device=_device, dtype=_dtype), syz=syz.to(device=_device, dtype=_dtype), szx=szx.to(device=_device, dtype=_dtype), szy=szy.to(device=_device, dtype=_dtype), )
def random_cutmix_generator( batch_size: int, width: int, height: int, p: float = 0.5, num_mix: int = 1, beta: Optional[torch.Tensor] = None, cut_size: Optional[torch.Tensor] = None, same_on_batch: bool = False, device: torch.device = torch.device('cpu'), dtype: torch.dtype = torch.float32, ) -> Dict[str, torch.Tensor]: r"""Generate cutmix indexes and lambdas for a batch of inputs. Args: batch_size (int): the number of images. If batchsize == 1, the output will be as same as the input. width (int): image width. height (int): image height. p (float): probability of applying cutmix. num_mix (int): number of images to mix with. Default is 1. beta (torch.Tensor, optional): hyperparameter for generating cut size from beta distribution. If None, it will be set to 1. cut_size (torch.Tensor, optional): controlling the minimum and maximum cut ratio from [0, 1]. If None, it will be set to [0, 1], which means no restriction. same_on_batch (bool): apply the same transformation across the batch. Default: False. device (torch.device): the device on which the random numbers will be generated. Default: cpu. dtype (torch.dtype): the data type of the generated random numbers. Default: float32. Returns: params Dict[str, torch.Tensor]: parameters to be passed for transformation. - mix_pairs (torch.Tensor): element-wise probabilities with a shape of (num_mix, B). - crop_src (torch.Tensor): element-wise probabilities with a shape of (num_mix, B, 4, 2). Note: The generated random numbers are not reproducible across different devices and dtypes. Examples: >>> rng = torch.manual_seed(0) >>> random_cutmix_generator(3, 224, 224, p=0.5, num_mix=2) {'mix_pairs': tensor([[2, 0, 1], [1, 2, 0]]), 'crop_src': tensor([[[[ 35., 25.], [208., 25.], [208., 198.], [ 35., 198.]], <BLANKLINE> [[156., 137.], [155., 137.], [155., 136.], [156., 136.]], <BLANKLINE> [[ 3., 12.], [210., 12.], [210., 219.], [ 3., 219.]]], <BLANKLINE> <BLANKLINE> [[[ 83., 125.], [177., 125.], [177., 219.], [ 83., 219.]], <BLANKLINE> [[ 54., 8.], [205., 8.], [205., 159.], [ 54., 159.]], <BLANKLINE> [[ 97., 70.], [ 96., 70.], [ 96., 69.], [ 97., 69.]]]])} """ _device, _dtype = _extract_device_dtype([beta, cut_size]) beta = torch.as_tensor(1.0 if beta is None else beta, device=device, dtype=dtype) cut_size = torch.as_tensor([0.0, 1.0] if cut_size is None else cut_size, device=device, dtype=dtype) if not (num_mix >= 1 and isinstance(num_mix, (int, ))): raise AssertionError( f"`num_mix` must be an integer greater than 1. Got {num_mix}.") if not (type(height) is int and height > 0 and type(width) is int and width > 0): raise AssertionError( f"'height' and 'width' must be integers. Got {height}, {width}.") _joint_range_check(cut_size, 'cut_size', bounds=(0, 1)) _common_param_check(batch_size, same_on_batch) if batch_size == 0: return dict( mix_pairs=torch.zeros([0, 3], device=_device, dtype=torch.long), crop_src=torch.zeros([0, 4, 2], device=_device, dtype=torch.long), ) batch_probs: torch.Tensor = random_prob_generator(batch_size * num_mix, p, same_on_batch, device=device, dtype=dtype) mix_pairs: torch.Tensor = torch.rand(num_mix, batch_size, device=device, dtype=dtype).argsort(dim=1) cutmix_betas: torch.Tensor = _adapted_beta((batch_size * num_mix, ), beta, beta, same_on_batch=same_on_batch) # Note: torch.clamp does not accept tensor, cutmix_betas.clamp(cut_size[0], cut_size[1]) throws: # Argument 1 to "clamp" of "_TensorBase" has incompatible type "Tensor"; expected "float" cutmix_betas = torch.min(torch.max(cutmix_betas, cut_size[0]), cut_size[1]) cutmix_rate = torch.sqrt(1.0 - cutmix_betas) * batch_probs cut_height = (cutmix_rate * height).floor().to(device=device, dtype=_dtype) cut_width = (cutmix_rate * width).floor().to(device=device, dtype=_dtype) _gen_shape = (1, ) if same_on_batch: _gen_shape = (cut_height.size(0), ) cut_height = cut_height[0] cut_width = cut_width[0] # Reserve at least 1 pixel for cropping. x_start = (_adapted_uniform( _gen_shape, torch.zeros_like(cut_width, device=device, dtype=dtype), (width - cut_width - 1).to(device=device, dtype=dtype), same_on_batch, ).floor().to(device=device, dtype=_dtype)) y_start = (_adapted_uniform( _gen_shape, torch.zeros_like(cut_height, device=device, dtype=dtype), (height - cut_height - 1).to(device=device, dtype=dtype), same_on_batch, ).floor().to(device=device, dtype=_dtype)) crop_src = bbox_generator(x_start.squeeze(), y_start.squeeze(), cut_width, cut_height) # (B * num_mix, 4, 2) => (num_mix, batch_size, 4, 2) crop_src = crop_src.view(num_mix, batch_size, 4, 2) return dict( mix_pairs=mix_pairs.to(device=_device, dtype=torch.long), crop_src=crop_src.floor().to(device=_device, dtype=_dtype), )