def __call__(self, sample): if np.random.random() < 0.5: sample['left'] = F.gaussian_blur(sample['left'], 7) sample['right'] = F.gaussian_blur(sample['right'], 7) return sample
def local_norm(img, kernel_size=31, cutoff_percent=80): ''' Performs local normalization of a given image relative to the neighborhood of size (kernel_size, kernel_size) using a global cutoffself. # Arguments: - img: tensor with the image of shape (height, width) - kernel_size: size of the averaging kernel or tuple of (kernel_height, kernel_width). Should be odd ints. - cutoff_percentile: int between 0 and 100. Global percentile cut-off, preventing over-amplification of noise. # Returns: - norm_img: image of the same size as the input img, with values locally normalized. ''' kernel_size = hp.misc.ensure_list(kernel_size) if len(kernel_size) == 1: kernel_size = (kernel_size[0], kernel_size[0]) norm = ttf.gaussian_blur(img.unsqueeze(0), kernel_size, [k/3 for k in kernel_size]).squeeze(0) cutoff = torch.max(norm) * np.power(cutoff_percent/100, 3) norm_img = img / torch.maximum(norm, cutoff) norm_img = torch.nan_to_num(norm_img) img = (img-torch.min(img))/(torch.max(img)-torch.min(img)) norm_img = (norm_img-torch.min(norm_img))/(torch.max(norm_img)-torch.min(norm_img)) return (img+norm_img)/2
def transform_train(self, image, mask): #convert image and mask to PIL Images mask = TF.to_pil_image(mask) image = Image.fromarray(image, "RGBA") #random horizontal flip if random.random() > 0.5: image = TF.hflip(image) mask = TF.hflip(mask) #random vertical flip if random.random() > 0.5: image = TF.vflip(image) mask = TF.vflip(mask) #random rotation image = TF.rotate(image, angle=10.0) mask = TF.rotate(mask, angle=10.0) #Gaussian Blur image = TF.gaussian_blur(image, kernel_size=(3,3)) #convert PIL image and mask to tensor before returning image = TF.to_tensor(image) mask = TF.to_tensor(mask) #normalise the image as per mean and std dev image = TF.normalize(image, mean=[0.485, 0.456, 0.406, 0.5],std=[0.229, 0.224, 0.225, 0.5]) return image, mask
def get_optical_aberration_img(imgT): r = torch.rand(1) r1 = 0.2 + (0.8 - 0.2) * torch.rand(1) # 20 - 80% occlusion # r1 = 0.8 if r < 0.33: # print('gauss', imgT.shape) o1 = F.gaussian_blur(imgT, kernel_size=7, sigma=(10, 10)) m1 = torch.ones(imgT.shape) m1[:, :, int(imgT.shape[-1] * r1):] = 0 if torch.rand(1) < 0.5: m1 = F.hflip(m1) x = m1 * o1 + (1-m1) * imgT elif r < 0.66: o1 = T.ColorJitter(brightness=(1.6, 2.0))(imgT) m1 = torch.ones(imgT.shape) m1[:, :, int(imgT.shape[-1] * r1):] = 0 if torch.rand(1) < 0.5: m1 = F.hflip(m1) x = m1 * o1 + (1-m1) * imgT elif r < 1: o1 = T.ColorJitter(contrast=(0.1, 0.3))(imgT) m1 = (torch.rand((1, 10, 10)) < r1).float() # print('contrast', m1.shape, o1.shape) m1 = F.resize(m1, o1.shape[-2:], interpolation=0) x = m1 * o1 + (1-m1) * imgT return x
def transform_train(self, image, mask): #convert image and mask to PIL Images mask = TF.to_pil_image(mask) image = Image.fromarray(image, "RGB") #npimg = np.asarray(image) #random horizontal flip if random.random() > 0.5: image = TF.hflip(image) mask = TF.hflip(mask) #random vertical flip if random.random() > 0.5: image = TF.vflip(image) mask = TF.vflip(mask) #random rotation image = TF.rotate(image, angle=10.0) mask = TF.rotate(mask, angle=10.0) #Gaussian Blur image = TF.gaussian_blur(image, kernel_size=(3,3)) #convert PIL image and mask to tensor before returning image = TF.to_tensor(image) mask = TF.to_tensor(mask) #normalise the image as per mean and std dev #for ImageNet pre-trained image = TF.normalize(image, mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) #for solaris pre-trained #image = TF.normalize(image, mean=[0.006479, 0.009328, 0.01123], # std=[0.004986, 0.004964, 0.004950]) return image, mask
def augmentor(self, image, mask): if np.random.random() > 0.5: image = TF.hflip(image) mask = TF.hflip(mask) if np.random.random() > 0.5: image = TF.vflip(image) mask = TF.vflip(mask) if np.random.random() > 0.5: image = TF.rotate(image, 90) mask = TF.rotate(mask, 90) if random.random() > 0: i, j, h, w = transforms.RandomCrop.get_params(image, output_size=(248, 248)) image = TF.crop(image, i, j, h, w) mask = TF.crop(mask, i, j, h, w) if random.random() > 0: sigma = np.random.uniform(0, 0.05) image = TF.gaussian_blur(image, 3, sigma=sigma) return image, mask
def __call__(self, image: torch.Tensor, context: ExpressionContext) -> torch.Tensor: kernel_size = context(self.kernel_size) kernel_size = [max(1, k + 1 if k % 2 == 0 else k) for k in kernel_size] if self.sigma is None: sigma = None else: sigma = [max(0.0001, s) for s in context(self.sigma)] return VF.gaussian_blur(image, kernel_size, sigma)
def __call__(self, sample): image = sample["image"] blur_probability = torch.rand(1) if blur_probability[0] > 0.5: image = TF.gaussian_blur(image, self.kernel_size) if "image2" in sample: image2 = sample["image2"] blur_probability = torch.rand(1) if blur_probability[0] > 0.5: image2 = TF.gaussian_blur(image2, self.kernel_size) return {'image': image, "image2": image2, 'label': sample['label']} return {'image': image, 'label': sample['label']}
def __call__(self, image: torch.Tensor, context: ExpressionContext) -> torch.Tensor: kernel_size = context(self.kernel_size) if self.sigma is None: sigma = None else: sigma = context(self.sigma) amount = torch.Tensor(context(self.amount)).to(image.device) edge = VF.gaussian_blur(image, kernel_size, sigma) edge = torch.clamp((image - edge) * amount, 0, 1) return edge
def __call__(self, sample): """ Args: img (PIL Image or Tensor): image to be blurred. Returns: PIL Image or Tensor: Gaussian blurred image """ img, target = sample['image'], sample['target'] sigma = self.get_params(self.sigma[0], self.sigma[1]) return {'image': F.gaussian_blur(img, self.kernel_size, [sigma, sigma]), 'target': target}
def myTransform(img, mask): img = torch.tensor(img) mask = torch.tensor(mask) if random.random() > 0.2: img, mask = rotate(img, mask) if random.random() > 0.5: img, mask = flip(img, mask) if random.random() > 0.5: img = tf.gaussian_blur(img, [3, 5], sigma=[0.1, 2.0]) return img, mask
def get_expression_blur( image: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]], context: ExpressionContext, ) -> torch.Tensor: kernel_size = context(kernel_size) kernel_size = [ max(1, k+1 if k % 2 == 0 else k) for k in kernel_size ] if sigma is not None: sigma = [max(0.0001, s) for s in context(sigma)] return VF.gaussian_blur(image, kernel_size, sigma)
def handle_class_max(data): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # manual_seed(0) model = create_model(data['model'], pretrained=True) model.to(device) model.eval() mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] x = torch.randint(0, 255, [1, 3, 224, 224]) / 255 x = TF.normalize(x, mean, std).to(device).requires_grad_(True) optimizer = torch.optim.SGD([x], lr=int( data['lr']), weight_decay=float(data['weight_decay'])) for i in range(int(data['epochs'])): if bool(data['blur']) and i % int(data['blur_freq']) == 0: x.data = TF.gaussian_blur(x.data, [3, 3]) optimizer.zero_grad() output = model(x) loss = -torch.softmax(output, dim=1)[0, int(data['target'])] loss.backward() if bool(data['clip_grad']): torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() filename = f'static/out/class_max_{i}_{time.time()}.png' torchvision.utils.save_image(denormalize( x.detach(), mean, std, clamp=bool(data['clamp'])), filename, normalize=True) output = torch.softmax(output, dim=1).squeeze(0) # predictions = [{'class': cls_names[i].replace('_', ' '), 'confidence': f'{v * 100:>4.2f}'} for i,v in enumerate(output)] topk = torch.topk(output, 5, 0, True, True) topk = [{'index': f'{i}', 'class': cls_names[i].replace('_', ' '), 'confidence': f'{v * 100:>4.2f}'} for i, v in zip( topk.indices.detach().cpu().numpy(), topk.values.detach().cpu().numpy())] emit('response_class_max', { 'epoch': i, 'loss': loss.item(), 'output': filename, 'topk': topk })
def forward(self, x): x = apply_all(x, remove_numpy) if self.isotropic: sigma_x = random_uniform(self.sigma[0], self.sigma[1]) sigma_y = sigma_x else: sigma_x = random_uniform(self.sigma[0], self.sigma[1]) sigma_y = random_uniform(self.sigma[0], self.sigma[1]) sigma = (sigma_x, sigma_y) if self.kernel_size is not None: kernel_size = self.kernel_size else: k_x = max(2 * int(math.ceil(3 * sigma_x)) + 1, 3) k_y = max(2 * int(math.ceil(3 * sigma_y)) + 1, 3) kernel_size = (k_x, k_y) return apply_all(x, lambda y: F.gaussian_blur(y, kernel_size, sigma))
def forward_map_func(args_dict, q): # with global variables mdoels and main_device i = args_dict['id'] x_adv = args_dict['tensor'] diverse_input = input_diversity(x_adv) image_resize = models[i].default_cfg['input_size'][1:] mean = torch.tensor(models[i].default_cfg['mean']).view( 3, 1, 1).to(main_device) std = torch.tensor(models[i].default_cfg['std']).view( 3, 1, 1).to(main_device) resized_tensor = F.interpolate(diverse_input, size=image_resize, mode='bicubic') gaussian_tensor = functional.gaussian_blur(resized_tensor, kernel_size=(5, 5), sigma=(0.9, 0.9)) normalized_tensor = (gaussian_tensor - mean) / std output = models[i](normalized_tensor.to( 'cuda:{}'.format(i))).to(main_device) q.put(output)
def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwargs) -> torch.Tensor: """Crop and image and associated bboxes Args: img: image to modify opacity_range: the minimum and maximum desired opacity of the shadow Returns: shaded image """ shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) # type: ignore[arg-type] opacity = np.random.uniform(*opacity_range) shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...]) # Add some blur to make it believable k = 7 + 2 * int(4 * np.random.rand(1)) sigma = np.random.uniform(0.5, 5.0) shadow_tensor = F.gaussian_blur(shadow_tensor, k, sigma=[sigma, sigma]) return opacity * shadow_tensor * img + (1 - opacity) * img
def blend_images(background_img: torch.Tensor, reflect_img: torch.Tensor, max_image_size: int = 560, ghost_rate: float = 0.49, alpha_t: float = None, offset: tuple[int, int] = (0, 0), sigma: float = None, ghost_alpha: float = None): """ Blend transmit layer and reflection layer together (include blurred & ghosted reflection layer) and return the blended image and precessed reflection image """ if alpha_t is None: alpha_t = 1. - random.uniform(0.05, 0.45) h, w = background_img.shape[-2:] aspect_ratio = w / h h, w = (max_image_size, int(round(max_image_size * aspect_ratio))) if h > w \ else (int(round(max_image_size / aspect_ratio)), max_image_size) # Original code uses cv2 INTER_CUBIC, which is slightly different from BICUBIC background_img = F.resize(background_img, size=(h, w), interpolation=InterpolationMode.BICUBIC).clamp( 0, 1) reflect_img = F.resize(reflect_img, size=(h, w), interpolation=InterpolationMode.BICUBIC).clamp( 0, 1) background_img.pow_(2.2) reflect_img.pow_(2.2) background_mask = alpha_t * background_img if random.random() < ghost_rate: # generate the blended image with ghost effect if ghost_alpha is None: ghost_alpha = abs( round(random.random()) - random.uniform(0.15, 0.5)) if offset[0] == 0 and offset[1] == 0: offset = (random.randint(3, 8), random.randint(3, 8)) reflect_1 = F.pad(background_img, [0, 0, offset[0], offset[1]]) # pad on right/bottom reflect_2 = F.pad(background_img, [offset[0], offset[1], 0, 0]) # pad on left/top reflect_ghost = ghost_alpha * reflect_1 + (1 - ghost_alpha) * reflect_2 reflect_ghost = reflect_ghost[..., offset[0]:-offset[0], offset[1]:-offset[1]] reflect_ghost = F.resize( reflect_ghost, size=[h, w], interpolation=InterpolationMode.BICUBIC).clamp( 0, 1) # no cubic mode in original code reflect_mask = (1 - alpha_t) * reflect_ghost reflection_layer = reflect_mask.pow(1 / 2.2) else: # generate the blended image with focal blur if sigma is None: sigma = random.uniform(1, 5) kernel_size = int(2 * math.ceil(2 * sigma) + 1) reflect_blur = F.gaussian_blur(reflect_img, kernel_size, sigma) blend = reflect_blur + background_img # get the reflection layers' proper range att = 1.08 + random.random() / 10.0 mask = blend > 1 mean = torch.tensor([ blend[i, mask[i]].mean().nan_to_num(1.0).item() for i in range(blend.size(0)) ]).view(-1, 1, 1) # (C, 1, 1) reflect_blur = (reflect_blur - att * (mean - 1)).clamp(0, 1) def gen_kernel(kern_len: int = 100, nsig: int = 1) -> torch.Tensor: r"""Returns a 2D Gaussian kernel tensor.""" interval = (2 * nsig + 1.) / kern_len x = torch.linspace(-nsig - interval / 2., nsig + interval / 2., kern_len + 1) # get normal distribution kern1d = norm.cdf(x).diff() kernel_raw = kern1d.outer(kern1d).sqrt() kernel = kernel_raw / kernel_raw.sum( ) # TODO: is it auxiliary for positive numbers? kernel = kernel / kernel.max() return kernel h, w = reflect_blur.shape[-2:] new_h = random.randint(0, max_image_size - h - 10) if h < max_image_size - 10 else 0 new_w = random.randint(0, max_image_size - w - 10) if w < max_image_size - 10 else 0 g_mask = gen_kernel(max_image_size, 3).repeat(3, 1, 1) alpha_r = (1 - alpha_t / 2) * g_mask[..., new_h:new_h + h, new_w:new_w + w] reflect_mask = alpha_r * reflect_blur reflection_layer = (min(1., 4 * (1 - alpha_t)) * reflect_mask).pow(1 / 2.2) blended = (reflect_mask + background_mask).pow(1 / 2.2) background_layer = background_mask.pow(1 / 2.2) return blended, background_layer, reflection_layer
def __call__(self, input: Tensor) -> Tensor: return TF.gaussian_blur(input, self.kernel_size, self.sigma)
def blur_depth_map(depth: torch.Tensor) -> torch.Tensor: depth2 = tv_f.gaussian_blur(depth, 7) return depth2
def image_that_feature_responds_to_most(model: nn.Module, layern: int, channel: int, size: tuple, lr: float = 0.1, iters: int = 20, upscaling_steps=5, upscaling_factor=1.2): """Algoritme dat uitzoekt op welke input image een bepaalde filter/feature het meest zou reageren. In een model dat gezichten herkent zou een filter kunnen reageren op ogen, dus dan zou de perfecte input image dus een foto met heel veel ogen zijn. Deze perfecte image wordt gevonden door met noise te beginnen, en door middel van gradient descent deze noise aan te passen zodat de filter/feature in de gegeven layer het meest is opgelicht. De noise image wordt ook een paar keer vergroot, dus je moet het 'dense' deel van je model er even afhalen. Voorbeeld: vgg = torchvision.models.vgg16(pretrained=True).features image_that_feature_responds_to_most(vgg, 30, 23, (128, 128)) """ assert len(size) == 2 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device: ", device) model.to(device) noise = torch.rand(1, 3, *size, requires_grad=True) feature = None def feature_hook(_module, _input, output): nonlocal feature feature = output modules = [ module for module in model.children() if type(module) != nn.Sequential ] layer = modules[layern] print( f"This will evaluate the features of channel {channel} after layer {layern}: {layer}" ) hook = layer.register_forward_hook(feature_hook) losses = [] x = list(range(iters * upscaling_steps)) for i in range(upscaling_steps): # image processing for better results size = tuple(int(x * upscaling_factor) for x in size) noise = FT.gaussian_blur(noise, 3) noise = FT.resize(noise, size) # resetting because noise was cloned a couple of times noise = torch.autograd.Variable(noise.to(device), requires_grad=True) optimizer = optim.Adam(params=[noise], lr=lr) for j in range(iters): out = model(noise) optimizer.zero_grad() # the more the feature is highlighed, the better the input image loss = -feature[0, channel].mean() losses.append(loss) loss.backward() optimizer.step() with torch.no_grad(): noise = utils.normalize(noise) print("Done with upscaling step: ", i) plt.plot(x, losses) plt.show() show_image(noise[0]) hook.remove()
def _copy_paste( image: torch.Tensor, target: Dict[str, Tensor], paste_image: torch.Tensor, paste_target: Dict[str, Tensor], blending: bool = True, resize_interpolation: F.InterpolationMode = F.InterpolationMode.BILINEAR, ) -> Tuple[torch.Tensor, Dict[str, Tensor]]: # Random paste targets selection: num_masks = len(paste_target["masks"]) if num_masks < 1: # Such degerante case with num_masks=0 can happen with LSJ # Let's just return (image, target) return image, target # We have to please torch script by explicitly specifying dtype as torch.long random_selection = torch.randint(0, num_masks, (num_masks, ), device=paste_image.device) random_selection = torch.unique(random_selection).to(torch.long) paste_masks = paste_target["masks"][random_selection] paste_boxes = paste_target["boxes"][random_selection] paste_labels = paste_target["labels"][random_selection] masks = target["masks"] # We resize source and paste data if they have different sizes # This is something we introduced here as originally the algorithm works # on equal-sized data (for example, coming from LSJ data augmentations) size1 = image.shape[-2:] size2 = paste_image.shape[-2:] if size1 != size2: paste_image = F.resize(paste_image, size1, interpolation=resize_interpolation) paste_masks = F.resize(paste_masks, size1, interpolation=F.InterpolationMode.NEAREST) # resize bboxes: ratios = torch.tensor((size1[1] / size2[1], size1[0] / size2[0]), device=paste_boxes.device) paste_boxes = paste_boxes.view(-1, 2, 2).mul(ratios).view(paste_boxes.shape) paste_alpha_mask = paste_masks.sum(dim=0) > 0 if blending: paste_alpha_mask = F.gaussian_blur( paste_alpha_mask.unsqueeze(0), kernel_size=(5, 5), sigma=[ 2.0, ], ) # Copy-paste images: image = (image * (~paste_alpha_mask)) + (paste_image * paste_alpha_mask) # Copy-paste masks: masks = masks * (~paste_alpha_mask) non_all_zero_masks = masks.sum((-1, -2)) > 0 masks = masks[non_all_zero_masks] # Do a shallow copy of the target dict out_target = {k: v for k, v in target.items()} out_target["masks"] = torch.cat([masks, paste_masks]) # Copy-paste boxes and labels boxes = ops.masks_to_boxes(masks) out_target["boxes"] = torch.cat([boxes, paste_boxes]) labels = target["labels"][non_all_zero_masks] out_target["labels"] = torch.cat([labels, paste_labels]) # Update additional optional keys: area and iscrowd if exist if "area" in target: out_target["area"] = out_target["masks"].sum( (-1, -2)).to(torch.float32) if "iscrowd" in target and "iscrowd" in paste_target: # target['iscrowd'] size can be differ from mask size (non_all_zero_masks) # For example, if previous transforms geometrically modifies masks/boxes/labels but # does not update "iscrowd" if len(target["iscrowd"]) == len(non_all_zero_masks): iscrowd = target["iscrowd"][non_all_zero_masks] paste_iscrowd = paste_target["iscrowd"][random_selection] out_target["iscrowd"] = torch.cat([iscrowd, paste_iscrowd]) # Check for degenerated boxes and remove them boxes = out_target["boxes"] degenerate_boxes = boxes[:, 2:] <= boxes[:, :2] if degenerate_boxes.any(): valid_targets = ~degenerate_boxes.any(dim=1) out_target["boxes"] = boxes[valid_targets] out_target["masks"] = out_target["masks"][valid_targets] out_target["labels"] = out_target["labels"][valid_targets] if "area" in out_target: out_target["area"] = out_target["area"][valid_targets] if "iscrowd" in out_target and len( out_target["iscrowd"]) == len(valid_targets): out_target["iscrowd"] = out_target["iscrowd"][valid_targets] return image, out_target
def get_edge_mean(image: torch.Tensor) -> torch.Tensor: blurred_image = VF.gaussian_blur(image, [3, 3], None) edges = (blurred_image - image) edges = torch.abs(edges) return edges.reshape(3, -1).mean(1)
def _blur_then_jpeg(p): return [ transforms.Lambda(lambda x: x if _RNG.random( ) > p else TF.gaussian_blur(x, kernel_size=3, sigma=_GAUSSIAN_SIGMA)), transforms.Lambda(lambda x: x if _RNG.random() > p else _jpeg(x)) ]
def forward(self, tensor): kernel_size = int( randrange(self.kernel_size[0] - 1, self.kernel_size[1])) * 2 + 1 return gaussian_blur( tensor, (kernel_size, kernel_size), [torch.empty(1).uniform_(self.sigma[0], self.sigma[1]).item()])
def blend_images(background_img: torch.Tensor, reflect_img: torch.Tensor, max_image_size: int = 560, ghost_rate: float = 0.49, alpha_bg: float = None, offset: tuple[int, int] = (0, 0), sigma: float = None, ghost_alpha: float = None ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Blend background layer and reflection layer together (including blurred & ghosted reflection layer). :attr:`background_img` is resized using :attr:`max_image_size` and :attr:`reflect_img` is resized to the same shape. Note: This blend method is only used to generate reflect images. To add watermark on images, please call :meth:`add_mark()`. Args: background_img (torch.Tensor): Background image tensor with shape ``([N], C, H, W)``. reflect_img (torch.Tensor): Reflect image tensor with shape ``([N], C, H', W')``. max_image_size (int): Max image size (the longer edge of height/width). :attr:`background_img` will be resized while keeping aspect ratio. Defaults to ``560``. ghost_rate (float): Probability to generate the blended image with ghost effect. Defaults to ``0.49``. alpha_bg (float): Weight of background image during blending. Defaults to ``1 - random.uniform(0.05, 0.45)``。 offset (tuple[int, int]): Offset of height and width used in ghost effect. Defaults to ``(random.randint(3, 8), random.randint(3, 8))``。 sigma (float): Gaussian kernel standard deviation. Defaults to ``random.uniform(1, 5)``. ghost_alpha (float): Weight of the first ghost image used in ghost effect. Defaults to ``abs(round(random.random()) - random.uniform(0.15, 0.5))``. Returns: (torch.Tensor, torch.Tensor, torch.Tensor): ``blended, background_layer, reflection_layer`` with shape ``([N], C, H, W)``. """ if alpha_bg is None: alpha_bg = 1. - random.uniform(0.05, 0.45) h, w = background_img.shape[-2:] aspect_ratio = w / h h, w = (max_image_size, int(round(max_image_size * aspect_ratio))) if h > w \ else (int(round(max_image_size / aspect_ratio)), max_image_size) # Original code uses cv2 INTER_CUBIC, which is slightly different from BICUBIC background_img = F.resize(background_img, size=(h, w), interpolation=InterpolationMode.BICUBIC).clamp(0, 1) reflect_img = F.resize(reflect_img, size=(h, w), interpolation=InterpolationMode.BICUBIC).clamp(0, 1) background_img.pow_(2.2) reflect_img.pow_(2.2) background_mask = alpha_bg * background_img if random.random() < ghost_rate: # generate the blended image with ghost effect if ghost_alpha is None: ghost_alpha = abs(round(random.random()) - random.uniform(0.15, 0.5)) if offset[0] == 0 and offset[1] == 0: offset = (random.randint(3, 8), random.randint(3, 8)) reflect_1 = F.pad(background_img, [0, 0, offset[0], offset[1]]) # pad on right/bottom reflect_2 = F.pad(background_img, [offset[0], offset[1], 0, 0]) # pad on left/top reflect_ghost = ghost_alpha * reflect_1 + (1 - ghost_alpha) * reflect_2 reflect_ghost = reflect_ghost[..., offset[0]: -offset[0], offset[1]: -offset[1]] reflect_ghost = F.resize(reflect_ghost, size=[h, w], interpolation=InterpolationMode.BICUBIC ).clamp(0, 1) # no cubic mode in original code reflect_mask = (1 - alpha_bg) * reflect_ghost reflection_layer = reflect_mask.pow(1 / 2.2) else: # generate the blended image with focal blur if sigma is None: sigma = random.uniform(1, 5) kernel_size = int(2 * math.ceil(2 * sigma) + 1) reflect_blur = F.gaussian_blur(reflect_img, kernel_size, sigma) blend = reflect_blur + background_img # get the reflection layers' proper range att = 1.08 + random.random() / 10.0 mask = blend > 1 mean = torch.tensor([blend[i, mask[i]].mean().nan_to_num(1.0).item() for i in range(blend.size(0))]).view(-1, 1, 1) # (C, 1, 1) reflect_blur = (reflect_blur - att * (mean - 1)).clamp(0, 1) def gen_kernel(kern_len: int = 100, nsig: int = 1) -> torch.Tensor: r"""Returns a 2D Gaussian kernel tensor.""" interval = (2 * nsig + 1.) / kern_len x = torch.linspace(-nsig - interval / 2., nsig + interval / 2., kern_len + 1) # get normal distribution kern1d = norm.cdf(x).diff() kernel_raw = kern1d.outer(kern1d).sqrt() kernel = kernel_raw / kernel_raw.sum() # TODO: is it auxiliary for positive numbers? kernel = kernel / kernel.max() return kernel h, w = reflect_blur.shape[-2:] new_h = random.randint(0, max_image_size - h - 10) if h < max_image_size - 10 else 0 new_w = random.randint(0, max_image_size - w - 10) if w < max_image_size - 10 else 0 g_mask = gen_kernel(max_image_size, 3).repeat(3, 1, 1) # TODO: try to avoid hard encode 3 as channel alpha_r = (1 - alpha_bg / 2) * g_mask[..., new_h: new_h + h, new_w: new_w + w] reflect_mask = alpha_r * reflect_blur reflection_layer = (min(1., 4 * (1 - alpha_bg)) * reflect_mask).pow(1 / 2.2) blended = (reflect_mask + background_mask).pow(1 / 2.2) background_layer = background_mask.pow(1 / 2.2) return blended, background_layer, reflection_layer
def __call__(self, image, label): sigma = self.get_params(self.sigma[0], self.sigma[1]) # TODO need a higher version of torchvision return F.gaussian_blur(image, self.kernel_size, [sigma, sigma]), label
def _apply_op(img: Tensor, op_name: str, magnitude: float, interpolation: InterpolationMode, fill: Optional[List[float]], img2: Tensor): # "Samplesharing": (torch.linspace(0.0, 0.4, num_bins), True), # "Gaussian": (torch.linspace(0.0, 0.4, num_bins), True), # "Flip": (torch.linspace(0.0), False), # "Cutout": (torch.linspace(0.0, 60.0, num_bins), True), # "Colorshift": (torch.linspace(-20.0, 20.0, num_bins), True), # "Scale": (torch.linspace(0.6, 1.4, num_bins), True), # "EqualizeYUV": (torch.linspace(0.0), False), if op_name == "ShearX": img = F.affine(img, angle=0.0, translate=[0, 0], scale=1.0, shear=[math.degrees(magnitude), 0.0], interpolation=interpolation, fill=fill) elif op_name == "ShearY": img = F.affine(img, angle=0.0, translate=[0, 0], scale=1.0, shear=[0.0, math.degrees(magnitude)], interpolation=interpolation, fill=fill) elif op_name == "TranslateX": img = F.affine(img, angle=0.0, translate=[int(magnitude), 0], scale=1.0, interpolation=interpolation, shear=[0.0, 0.0], fill=fill) elif op_name == "TranslateY": img = F.affine(img, angle=0.0, translate=[0, int(magnitude)], scale=1.0, interpolation=interpolation, shear=[0.0, 0.0], fill=fill) elif op_name == "Rotate": img = F.rotate(img, magnitude, interpolation=interpolation, fill=fill) elif op_name == "Brightness": img = F.adjust_brightness(img, 1.0 + magnitude) elif op_name == "Color": img = F.adjust_saturation(img, 1.0 + magnitude) elif op_name == "Contrast": img = F.adjust_contrast(img, 1.0 + magnitude) elif op_name == "Sharpness": img = F.adjust_sharpness(img, 1.0 + magnitude) elif op_name == "Posterize": img = F.posterize(img, int(magnitude)) elif op_name == "Solarize": img = F.solarize(img, magnitude) elif op_name == "AutoContrast": img = F.autocontrast(img) elif op_name == "Equalize": img = F.equalize(img) elif op_name == "Invert": img = F.invert(img) elif op_name == "Identity": pass elif op_name == "Gaussian": img = F.gaussian_blur(img, [3, 3], magnitude) elif op_name == "Flip": if torch.rand(1) < 0.5: img = F.hflip(img) if torch.rand(1) < 0.5: img = F.vflip(img) elif op_name == "Cutout": x = int(torch.randint(0, int(224 - magnitude), (1, ))) y = int(torch.randint(0, int(224 - magnitude), (1, ))) # import pdb;pdb.set_trace() img = np.array(img, dtype=np.uint8) img[y:y + int(magnitude), x:x + int(magnitude)] = 124 img = Image.fromarray(img) elif op_name == "Colorshift": img = np.asarray(img) img = np.array(img + magnitude, dtype=np.uint8) img = Image.fromarray(img) elif op_name == "Scale": w, h = img.size # print(w,h,magnitude) img = F.resize(img, [int(h * magnitude), int(w * magnitude)]) elif op_name == "EqualizeYUV": img = cv2.cvtColor(np.array(img), cv2.COLOR_BGR2YUV) img = F.equalize( torch.tensor(np.transpose(img, (2, 0, 1)), dtype=torch.uint8)) # print(img.size()) img = img.permute(1, 2, 0) # print(img.size()) img = Image.fromarray(cv2.cvtColor(np.array(img), cv2.COLOR_YUV2BGR)) # print(img.size) elif op_name == "Samplesharing": img = np.asarray(img) img2 = np.asarray(img2) img = np.array(img2 * magnitude + img * (1 - magnitude), dtype=np.uint8) img = Image.fromarray(img) else: raise ValueError( "The provided operator {} is not recognized.".format(op_name)) return img
def forward(self, image, mask): sigma = self.get_params(self.sigma[0], self.sigma[1]) return F.gaussian_blur(image, self.kernel_size, [sigma, sigma]), mask