def test_adjusts_L_mode(self): x_shape = [2, 2, 3] x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) x_rgb = Image.fromarray(x_np, mode='RGB') x_l = x_rgb.convert('L') assert F.adjust_brightness(x_l, 2).mode == 'L' assert F.adjust_saturation(x_l, 2).mode == 'L' assert F.adjust_contrast(x_l, 2).mode == 'L' assert F.adjust_hue(x_l, 0.4).mode == 'L' assert F.adjust_gamma(x_l, 0.5).mode == 'L'
def test_adjust_saturation(self): x_shape = [2, 2, 3] x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) x_pil = Image.fromarray(x_np, mode='RGB') # test 0 y_pil = F.adjust_saturation(x_pil, 1) y_np = np.array(y_pil) assert np.allclose(y_np, x_np) # test 1 y_pil = F.adjust_saturation(x_pil, 0.5) y_np = np.array(y_pil) y_ans = [2, 4, 8, 87, 128, 173, 39, 25, 138, 133, 215, 88] y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) assert np.allclose(y_np, y_ans) # test 2 y_pil = F.adjust_saturation(x_pil, 2) y_np = np.array(y_pil) y_ans = [0, 6, 22, 0, 149, 255, 32, 0, 255, 4, 255, 0] y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) assert np.allclose(y_np, y_ans)
def torchvision_transform(self, img): img = torchvision.adjust_hue(img, hue_factor=0.1) img = torchvision.adjust_saturation(img, saturation_factor=1.2) img = torchvision.adjust_brightness(img, brightness_factor=1.2) return img
def __call__(self, img, mask): assert img.size == mask.size return tf.adjust_saturation( img, random.uniform(1 - self.saturation, 1 + self.saturation)), mask
def __getitem__(self, idx): gaze_inside = True image_path = self.paths[idx][0][0] image_path = os.path.join(self.root_dir, image_path) img = Image.open(image_path) img = img.convert('RGB') width, height = img.size # print('imsize', img.size) # print('img path', image_path) box = self.bboxes[0, idx][0] eye = self.eyes[0, idx][0] # todo: process gaze differently for training or testing gaze = self.gazes[0, idx].mean(axis=0) # print("Gaze", gaze.shape, gaze) # image = cv2.imread(image_path, cv2.IMREAD_COLOR) # if random.random() > 0.5 and self.training == 'train': # eye = [1.0 - eye[0], eye[1]] # gaze = [1.0 - gaze[0], gaze[1]] # image = cv2.flip(image, 1) gaze_x, gaze_y = gaze.tolist() eye_x, eye_y = eye.tolist() #print('gaze coords: ', type(gaze_x), type(gaze_y), gaze_x, gaze_y) #print('eye coords: ', type(eye_x), type(eye_y), eye_x, eye_y) # expand face bbox a bit k = 0.1 x_min = (eye_x - 0.15) * width y_min = (eye_y - 0.15) * height x_max = (eye_x + 0.15) * width y_max = (eye_y + 0.15) * height if x_min < 0: x_min = 0 if y_min < 0: y_min = 0 if x_max < 0: x_max = 0 if y_max < 0: y_max = 0 x_min -= k * abs(x_max - x_min) y_min -= k * abs(y_max - y_min) x_max += k * abs(x_max - x_min) y_max += k * abs(y_max - y_min) x_min, y_min, x_max, y_max = map(float, [x_min, y_min, x_max, y_max]) #print(x_min, y_min, x_max, y_max) if self.imshow: img.save("origin_img.jpg") if self.training == 'test': imsize = torch.IntTensor([width, height]) else: ## data augmentation # Jitter (expansion-only) bounding box size if np.random.random_sample() <= 0.5: k = np.random.random_sample() * 0.2 x_min -= k * abs(x_max - x_min) y_min -= k * abs(y_max - y_min) x_max += k * abs(x_max - x_min) y_max += k * abs(y_max - y_min) # Random Crop if np.random.random_sample() <= 0.5: # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target crop_x_min = np.min([gaze_x * width, x_min, x_max]) crop_y_min = np.min([gaze_y * height, y_min, y_max]) crop_x_max = np.max([gaze_x * width, x_min, x_max]) crop_y_max = np.max([gaze_y * height, y_min, y_max]) # Randomly select a random top left corner if crop_x_min >= 0: crop_x_min = np.random.uniform(0, crop_x_min) if crop_y_min >= 0: crop_y_min = np.random.uniform(0, crop_y_min) # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min) crop_width_min = crop_x_max - crop_x_min crop_height_min = crop_y_max - crop_y_min crop_width_max = width - crop_x_min crop_height_max = height - crop_y_min # Randomly select a width and a height crop_width = np.random.uniform(crop_width_min, crop_width_max) crop_height = np.random.uniform(crop_height_min, crop_height_max) # Crop it img = TF.crop(img, crop_y_min, crop_x_min, crop_height, crop_width) # Record the crop's (x, y) offset offset_x, offset_y = crop_x_min, crop_y_min # convert coordinates into the cropped frame x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y # if gaze_inside: gaze_x, gaze_y = (gaze_x * width - offset_x) / float(crop_width), \ (gaze_y * height - offset_y) / float(crop_height) # else: # gaze_x = -1; gaze_y = -1 width, height = crop_width, crop_height # Random flip if np.random.random_sample() <= 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) x_max_2 = width - x_min x_min_2 = width - x_max x_max = x_max_2 x_min = x_min_2 gaze_x = 1 - gaze_x # Random color change if np.random.random_sample() <= 0.5: img = TF.adjust_brightness(img, brightness_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_contrast(img, contrast_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_saturation(img, saturation_factor=np.random.uniform( 0, 1.5)) # print('bbx2', [x_min, y_min, x_max, y_max]) head_channel = chong_imutils.get_head_box_channel( x_min, y_min, x_max, y_max, width, height, resolution=self.input_size, coordconv=False).unsqueeze(0) # Crop the face face = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) if self.imshow: img.save("img_aug.jpg") face.save('face_aug.jpg') if self.transform is not None: img = self.transform(img) face = self.transform(face) #print('imsize2', img.size()) # generate the heat map used for deconv prediction gaze_heatmap = torch.zeros( self.output_size, self.output_size) # set the size of the output #print([gaze_x * self.output_size, gaze_y * self.output_size]) #print(self.output_size) if self.training == 'test': # aggregated heatmap gaze_heatmap = chong_imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') else: # if gaze_inside: gaze_heatmap = chong_imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') return if self.imshow: fig = plt.figure(111) img = 255 - chong_imutils.unnorm(img.numpy()) * 255 img = np.clip(img, 0, 255) plt.imshow(np.transpose(img, (1, 2, 0))) plt.imshow(imresize(gaze_heatmap, (self.input_size, self.input_size)), cmap='jet', alpha=0.3) plt.imshow(imresize(1 - head_channel.squeeze(0), (self.input_size, self.input_size)), alpha=0.2) plt.savefig('viz_aug.png') if self.training == 'test': return img, face, head_channel, eye, gaze_heatmap, gaze, gaze_inside, image_path else: return img, face, head_channel, gaze_heatmap, image_path, gaze_inside
def __getitem__(self, idx): rgb, depth, gt, K = self._load_data(idx) if self.augment and self.mode == 'train': # Top crop if needed if self.args.top_crop > 0: width, height = rgb.size rgb = TF.crop(rgb, self.args.top_crop, 0, height - self.args.top_crop, width) depth = TF.crop(depth, self.args.top_crop, 0, height - self.args.top_crop, width) gt = TF.crop(gt, self.args.top_crop, 0, height - self.args.top_crop, width) K[3] = K[3] - self.args.top_crop width, height = rgb.size _scale = np.random.uniform(1.0, 1.5) scale = np.int(height * _scale) degree = np.random.uniform(-5.0, 5.0) flip = np.random.uniform(0.0, 1.0) # Horizontal flip if flip > 0.5: rgb = TF.hflip(rgb) depth = TF.hflip(depth) gt = TF.hflip(gt) K[2] = width - K[2] # Rotation rgb = TF.rotate(rgb, angle=degree, resample=Image.BICUBIC) depth = TF.rotate(depth, angle=degree, resample=Image.NEAREST) gt = TF.rotate(gt, angle=degree, resample=Image.NEAREST) # Color jitter brightness = np.random.uniform(0.6, 1.4) contrast = np.random.uniform(0.6, 1.4) saturation = np.random.uniform(0.6, 1.4) rgb = TF.adjust_brightness(rgb, brightness) rgb = TF.adjust_contrast(rgb, contrast) rgb = TF.adjust_saturation(rgb, saturation) # Resize rgb = TF.resize(rgb, scale, Image.BICUBIC) depth = TF.resize(depth, scale, Image.NEAREST) gt = TF.resize(gt, scale, Image.NEAREST) K[0] = K[0] * _scale K[1] = K[1] * _scale K[2] = K[2] * _scale K[3] = K[3] * _scale # Crop width, height = rgb.size assert self.height <= height and self.width <= width, \ "patch size is larger than the input size" h_start = random.randint(0, height - self.height) w_start = random.randint(0, width - self.width) rgb = TF.crop(rgb, h_start, w_start, self.height, self.width) depth = TF.crop(depth, h_start, w_start, self.height, self.width) gt = TF.crop(gt, h_start, w_start, self.height, self.width) K[2] = K[2] - w_start K[3] = K[3] - h_start rgb = TF.to_tensor(rgb) rgb = TF.normalize(rgb, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225), inplace=True) depth = TF.to_tensor(np.array(depth)) depth = depth / _scale gt = TF.to_tensor(np.array(gt)) gt = gt / _scale elif self.mode in ['train', 'val']: # Top crop if needed if self.args.top_crop > 0: width, height = rgb.size rgb = TF.crop(rgb, self.args.top_crop, 0, height - self.args.top_crop, width) depth = TF.crop(depth, self.args.top_crop, 0, height - self.args.top_crop, width) gt = TF.crop(gt, self.args.top_crop, 0, height - self.args.top_crop, width) K[3] = K[3] - self.args.top_crop # Crop width, height = rgb.size assert self.height <= height and self.width <= width, \ "patch size is larger than the input size" h_start = random.randint(0, height - self.height) w_start = random.randint(0, width - self.width) rgb = TF.crop(rgb, h_start, w_start, self.height, self.width) depth = TF.crop(depth, h_start, w_start, self.height, self.width) gt = TF.crop(gt, h_start, w_start, self.height, self.width) K[2] = K[2] - w_start K[3] = K[3] - h_start rgb = TF.to_tensor(rgb) rgb = TF.normalize(rgb, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225), inplace=True) depth = TF.to_tensor(np.array(depth)) gt = TF.to_tensor(np.array(gt)) else: if self.args.top_crop > 0 and self.args.test_crop: width, height = rgb.size rgb = TF.crop(rgb, self.args.top_crop, 0, height - self.args.top_crop, width) depth = TF.crop(depth, self.args.top_crop, 0, height - self.args.top_crop, width) gt = TF.crop(gt, self.args.top_crop, 0, height - self.args.top_crop, width) K[3] = K[3] - self.args.top_crop rgb = TF.to_tensor(rgb) rgb = TF.normalize(rgb, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225), inplace=True) depth = TF.to_tensor(np.array(depth)) gt = TF.to_tensor(np.array(gt)) if self.args.num_sample > 0: depth = self.get_sparse_depth(depth, self.args.num_sample) output = {'rgb': rgb, 'dep': depth, 'gt': gt, 'K': torch.Tensor(K)} return output
def __call__(self, img, lbl_semseg, lbl_side): assert img.size == lbl_semseg.size == lbl_side.size return tf.adjust_saturation( img, random.uniform(1 - self.saturation, 1 + self.saturation)), lbl_semseg, lbl_side
def train_trans(image, mask): # Generate random parameters for augmentation bf = np.random.uniform(1 - args.colorjitter_factor, 1 + args.colorjitter_factor) cf = np.random.uniform(1 - args.colorjitter_factor, 1 + args.colorjitter_factor) sf = np.random.uniform(1 - args.colorjitter_factor, 1 + args.colorjitter_factor) hf = np.random.uniform(-args.colorjitter_factor, +args.colorjitter_factor) pflip = np.random.randint(0, 1) > 0.5 # Random scaling scale_factor = np.random.uniform(0.75, 2.0) scaled_train_size = [ int(element * scale_factor) for element in args.train_size ] # Resize, 1 for Image.LANCZOS image = TF.resize(image, scaled_train_size, interpolation=1) # Resize, 0 for Image.NEAREST mask = TF.resize(mask, scaled_train_size, interpolation=0) # Random cropping if not args.train_size == args.crop_size: if image.size[1] <= args.crop_size[ 0]: # PIL image: (width, height) vs. args.size: (height, width) pad_h = args.crop_size[0] - image.size[1] + 1 pad_w = args.crop_size[1] - image.size[0] + 1 image = ImageOps.expand(image, border=(0, 0, pad_w, pad_h), fill=0) mask = ImageOps.expand(mask, border=(0, 0, pad_w, pad_h), fill=19) # From PIL to Tensor image = TF.to_tensor(image) mask = TF.to_tensor(mask) h, w = image.size()[1], image.size()[ 2] #scaled_train_size #args.train_size th, tw = args.crop_size i = np.random.randint(0, h - th) j = np.random.randint(0, w - tw) image_crop = image[:, i:i + th, j:j + tw] mask_crop = mask[:, i:i + th, j:j + tw] image = TF.to_pil_image(image_crop) mask = TF.to_pil_image(mask_crop[0, :, :]) # H-flip if pflip == True and args.hflip == True: image = TF.hflip(image) mask = TF.hflip(mask) # Color jitter image = TF.adjust_brightness(image, bf) image = TF.adjust_contrast(image, cf) image = TF.adjust_saturation(image, sf) image = TF.adjust_hue(image, hf) # From PIL to Tensor image = TF.to_tensor(image) # Normalize image = TF.normalize(image, args.dataset_mean, args.dataset_std) # Convert ids to train_ids mask = np.array(mask, np.uint8) # PIL Image to numpy array mask = torch.from_numpy(mask) # Numpy array to tensor return image, mask
def __call__(self, inputs): img1 = inputs[0] img2 = inputs[1] depth = inputs[2] phase = inputs[3] fb = inputs[4] h = img1.height w = img1.width w0 = w if self.size == [-1]: divisor = 32.0 h = int(math.ceil(h / divisor) * divisor) w = int(math.ceil(w / divisor) * divisor) self.size = (h, w) scale_transform = transforms.Compose( [transforms.Resize(self.size, Image.BICUBIC)]) img1 = scale_transform(img1) if img2 is not None: img2 = scale_transform(img2) if fb is not None: # Scaling of the focal for e.g. stereo photo loss scale = float(self.size[1]) / float(w0) fb = fb * scale if phase == 'test': return img1, img2, depth, fb if depth is not None: scale_transform_d = transforms.Compose( [transforms.Resize(self.size, Image.NEAREST)]) depth = scale_transform_d(depth) if not self.size == 0: if depth is not None: # This maps the VKITTI depth to [0, 1] # with 1 being 655.35 meters arr_depth = np.array(depth, dtype=np.float32) arr_depth /= 65535.0 # cm->m arr_depth[arr_depth < 0.0] = 0.0 depth = Image.fromarray(arr_depth, 'F') if depth is not None: # Maps depth to [-1, 1] to match tanh output of # the depth model depth = np.array(depth, dtype=np.float32) depth = depth * 2.0 depth -= 1.0 if random.random() < 0.5: # Some brightness, contrast and saturation augmentation brightness = random.uniform(0.8, 1.0) contrast = random.uniform(0.8, 1.0) saturation = random.uniform(0.8, 1.0) img1 = F.adjust_brightness(img1, brightness) img1 = F.adjust_contrast(img1, contrast) img1 = F.adjust_saturation(img1, saturation) if img2 is not None: img2 = F.adjust_brightness(img2, brightness) img2 = F.adjust_contrast(img2, contrast) img2 = F.adjust_saturation(img2, saturation) return img1, img2, depth, fb
def transform_triplets(self, img, gt1, gt2): # resize image and covert to tensor img = TF.to_pil_image(img) img = TF.resize(img, [self.img_size, self.img_size]) gt1 = TF.to_pil_image(gt1) gt1 = TF.resize(gt1, [self.img_size, self.img_size]) gt2 = TF.to_pil_image(gt2) gt2 = TF.resize(gt2, [self.img_size, self.img_size]) if self.with_random_hflip and random.random() > 0.5: img = TF.hflip(img) gt1 = TF.hflip(gt1) gt2 = TF.hflip(gt2) if self.with_random_vflip and random.random() > 0.5: img = TF.vflip(img) gt1 = TF.vflip(gt1) gt2 = TF.vflip(gt2) if self.with_random_rot90 and random.random() > 0.5: img = TF.rotate(img, 90) gt1 = TF.rotate(gt1, 90) gt2 = TF.rotate(gt2, 90) if self.with_random_rot180 and random.random() > 0.5: img = TF.rotate(img, 180) gt1 = TF.rotate(gt1, 180) gt2 = TF.rotate(gt2, 180) if self.with_random_rot270 and random.random() > 0.5: img = TF.rotate(img, 270) gt1 = TF.rotate(gt1, 270) gt2 = TF.rotate(gt2, 270) if self.with_color_jittering and random.random() > 0.5: img = TF.adjust_hue(img, hue_factor=random.random() * 0.5 - 0.25) # -0.25 ~ +0.25 img = TF.adjust_saturation( img, saturation_factor=random.random() * 0.8 + 0.8) # 0.8 ~ +1.6 gt1 = TF.adjust_hue(gt1, hue_factor=random.random() * 0.5 - 0.25) # -0.25 ~ +0.25 gt1 = TF.adjust_saturation( gt1, saturation_factor=random.random() * 0.8 + 0.8) # 0.8 ~ +1.6 gt2 = TF.adjust_hue(gt2, hue_factor=random.random() * 0.5 - 0.25) # -0.25 ~ +0.25 gt2 = TF.adjust_saturation( gt2, saturation_factor=random.random() * 0.8 + 0.8) # 0.8 ~ +1.6 if self.with_random_crop and random.random() > 0.5: i, j, h, w = transforms.RandomResizedCrop(size=self.img_size). \ get_params(img=img, scale=(0.5, 1.0), ratio=self.crop_ratio) img = TF.resized_crop(img, i, j, h, w, size=(self.img_size, self.img_size)) gt1 = TF.resized_crop(gt1, i, j, h, w, size=(self.img_size, self.img_size)) gt2 = TF.resized_crop(gt2, i, j, h, w, size=(self.img_size, self.img_size)) # to tensor img = TF.to_tensor(img) gt1 = TF.to_tensor(gt1) gt2 = TF.to_tensor(gt2) return img, gt1, gt2
def __call__(self, sample): if np.random.random() < 0.5: saturation = np.random.uniform(0.8, 1.2) sample['left'] = F.adjust_saturation(sample['left'], saturation) sample['right'] = F.adjust_saturation(sample['right'], saturation) return sample
def adjust_saturation(image, mask, factor=0.5, p=1): if random.random() <= p: image = tf.adjust_saturation(image, factor) return image, mask
def __call__(self, img, gt=None, batch_size=1): """ Args: blob: blob to be transformed. """ #color img = TF.adjust_brightness(img, self.TF_params.brightness_factor) img = TF.adjust_contrast(img, self.TF_params.contrast_factor) img = TF.adjust_gamma(img, self.TF_params.gamma, gain=1) img = TF.adjust_hue(img, self.TF_params.hue_factor) img = TF.adjust_saturation(img, self.TF_params.saturation_factor) #affine #here we do not use translate and scale in affine function. scale = 1.0 translate = (0, 0) #resample = PIL.Image.BICUBIC or PIL.Image.NEAREST or PIL.Image.BILINEAR img = TF.affine(img, self.TF_params.angle, translate, scale, self.TF_params.shear, PIL.Image.BICUBIC, fillcolor=None) if gt is not None: gt = TF.affine(gt, self.TF_params.angle, translate, scale, self.TF_params.shear, PIL.Image.NEAREST, fillcolor=255) if self.TF_params.hflip: img = TF.hflip(img) if gt is not None: gt = TF.hflip(gt) img_crops = [] if gt is not None: gt_crops = [] else: gt_crops = None for b in range(batch_size): self.getNewRandomCrop(img) img_crop = TF.resized_crop(img, self.crop_tuple[0], self.crop_tuple[1], self.crop_tuple[2], self.crop_tuple[3], self.TF_params.size, interpolation=PIL.Image.BICUBIC) img_crops.append(np.array(img_crop)) if gt is not None: gt_crop = TF.resized_crop(gt, self.crop_tuple[0], self.crop_tuple[1], self.crop_tuple[2], self.crop_tuple[3], self.TF_params.size, interpolation=PIL.Image.NEAREST) gt_crops.append(np.array(gt_crop)) return img_crops, gt_crops
def saturation(im, factor): if factor == 1: return im assert isPIL(im) or isinstance(torch.Tensor), f"Got type {type(im)}." return TFF.adjust_saturation(im, factor)
def torchvision_transform(self, img): img = torchvision.adjust_brightness(img, 1.5) img = torchvision.adjust_contrast(img, 1.5) img = torchvision.adjust_saturation(img, 1.5) img = torchvision.adjust_hue(img, 0.5) return img
def __getitem__(self, index): squeen = self.sequeueslists[index] image_filenames = [ join(squeen, x) for x in listdir(squeen) if is_image_file(x) ] randi = 0 cropsize = self.crop_size hr_scale = Resize((cropsize, cropsize), interpolation=Image.BICUBIC) # first image of seq imgname = join(squeen, str(randi + 1) + '.jpg') hr_image = Image.open(imgname) w, h = hr_image.size ragey = random.randint(0, h - cropsize) rangx = random.randint(0, w - cropsize) ################################ hr_image = self.seq_randomcrop(hr_image, ragey, rangx, cropsize, cropsize) hfp = random.random() if hfp < 0.5: hr_image = self.randomHflip(hr_image) vfp = random.random() if vfp < 0.5: hr_image = self.randomVflip(hr_image) brightness = 0.2 contrast = 0.2 saturation = 0.1 hue = 0.1 transforms = [] brightness_factor = np.random.uniform(max(0, 1 - brightness), 1 + brightness) transforms.append( Lambda(lambda img: F.adjust_brightness(img, brightness_factor))) contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast) transforms.append( Lambda(lambda img: F.adjust_contrast(img, contrast_factor))) saturation_factor = np.random.uniform(max(0, 1 - saturation), 1 + saturation) transforms.append( Lambda(lambda img: F.adjust_saturation(img, saturation_factor))) hue_factor = np.random.uniform(-hue, hue) transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor))) np.random.shuffle(transforms) color_transform = Compose(transforms) hr_image = color_transform(hr_image) ################################ hr_image = ToTensor()(hr_image) lr_image = self.lr_transform(hr_image) # Y channel hr_image = torch.unsqueeze(hr_image, dim=0) bic_hr = torch.unsqueeze(ToTensor()(hr_scale(lr_image)), dim=0) lr_image = torch.unsqueeze(ToTensor()(lr_image), dim=0) t0 = lr_image t1 = bic_hr t2 = hr_image for i in range(randi + 1, randi + self.relation): imgname = join(squeen, str(i + 1) + '.jpg') hr_image = Image.open(imgname) # data argument hr_image = self.seq_randomcrop(hr_image, rangx, ragey, cropsize, cropsize) if hfp < 0.5: hr_image = self.randomHflip(hr_image) if vfp < 0.5: hr_image = self.randomVflip(hr_image) hr_image = color_transform(hr_image) hr_image = ToTensor()(hr_image) lr_image = self.lr_transform(hr_image) # Y channel hr_image = torch.unsqueeze(hr_image, dim=0) bic_hr = torch.unsqueeze(ToTensor()(hr_scale(lr_image)), dim=0) lr_image = torch.unsqueeze(ToTensor()(lr_image), dim=0) t0 = torch.cat((t0, lr_image), 0) t1 = torch.cat((t1, bic_hr), 0) t2 = torch.cat((t2, hr_image), 0) return t0, t1, t2
def saturation(img, saturate): return ImageEnhance.Color(img).enhance(saturate) return F.adjust_saturation(img, saturate_value)
def __call__(self, image): if random.random() < self.prob: saturation_factor = random.uniform(0.5, 2) image = F.adjust_saturation(image, saturation_factor) return image
def train_trans_alt(image, mask): colorjitter_factor = 0.2 th, tw = args.train_size h, w = 512, 1024 crop_scales = [1.0, 0.8, 0.6, 0.4] # Generate random parameters for augmentation pflip = np.random.randint(0, 1) > 0.5 bf = np.random.uniform(1 - colorjitter_factor, 1 + colorjitter_factor) cf = np.random.uniform(1 - colorjitter_factor, 1 + colorjitter_factor) sf = np.random.uniform(1 - colorjitter_factor, 1 + colorjitter_factor) hf = np.random.uniform(-colorjitter_factor, colorjitter_factor) # Resize, 1 for Image.LANCZOS image = TF.resize(image, (h, w), interpolation=1) # Resize, 0 for Image.NEAREST mask = TF.resize(mask, (h, w), interpolation=0) # Random cropping # From PIL to Tensor crop_scale = np.random.choice(crop_scales) if crop_scale != 1.0: image = TF.to_tensor(image) mask = TF.to_tensor(mask) h, w = args.train_size ch, cw = [int(x * crop_scale) for x in (h, w)] i = np.random.randint(0, h - ch) j = np.random.randint(0, w - cw) image = image[:, i:i + ch, j:j + cw] mask = mask[:, i:i + ch, j:j + cw] image = TF.to_pil_image(image) mask = TF.to_pil_image(mask[0, :, :]) # Resize, 1 for Image.LANCZOS image = TF.resize(image, (th, tw), interpolation=1) # Resize, 0 for Image.NEAREST mask = TF.resize(mask, (th, tw), interpolation=0) # H-flip if pflip == True and args.hflip == True: image = TF.hflip(image) mask = TF.hflip(mask) #Color jitter image = TF.adjust_brightness(image, bf) image = TF.adjust_contrast(image, cf) image = TF.adjust_saturation(image, sf) image = TF.adjust_hue(image, hf) # From PIL to Tensor image = TF.to_tensor(image) # Normalize image = TF.normalize(image, args.dataset_mean, args.dataset_std) # Convert ids to train_ids mask = np.array(mask, np.uint8) # PIL Image to numpy array mask = torch.from_numpy(mask) # Numpy array to tensor return image, mask
def transform_fn(self, image, mask): if self.num_classes == 1: ### Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape H x W x C to a PIL Image while preserving the value range. image = array_to_img(image, data_format="channels_last") mask = array_to_img(mask, data_format="channels_last") ## Input type float32 is not supported ##!!! ## the preprocess funcions from Keras are very convenient ##!!! # Resize #resize = transforms.Resize(size=(520, 520)) #image = resize(image) #mask = resize(mask) # Random crop #i, j, h, w = transforms.RandomCrop.get_params( # image, output_size=(512, 512)) #image = TF.crop(image, i, j, h, w) #mask = TF.crop(mask, i, j, h, w) ## https://pytorch.org/docs/stable/torchvision/transforms.html ## https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py # Random horizontal flipping if random.random() > 0.5: image = TF.hflip(image) mask = TF.hflip(mask) # Random vertical flipping if random.random() > 0.5: image = TF.vflip(image) mask = TF.vflip(mask) # Random to_grayscale # if random.random() > 0.6: # image = TF.to_grayscale(image, num_output_channels=3) angle = random.randint(0, 90) translate = (random.uniform(0, 100), random.uniform(0, 100)) scale = random.uniform(0.5, 2) shear = random.uniform(-10, 10) image = TF.affine(image, angle, translate, scale, shear) mask = TF.affine(mask, angle, translate, scale, shear) # Random adjust_brightness image = TF.adjust_brightness(image, brightness_factor=random.uniform( 0.8, 1.2)) # Random adjust_saturation image = TF.adjust_saturation(image, saturation_factor=random.uniform( 0.8, 1.2)) # Random adjust_hue # `hue_factor` is the amount of shift in H channel and must be in the # interval `[-0.5, 0.5]`. #image = TF.adjust_hue(image, hue_factor=random.uniform(-0.2, 0.2)) #image = TF.adjust_gamma(image, gamma=random.uniform(0.8, 1.5), gain=1) angle = random.randint(0, 90) image = TF.rotate(image, angle) mask = TF.rotate(mask, angle) # Transform to tensor image = img_to_array(image, data_format="channels_last") mask = img_to_array(mask, data_format="channels_last") else: ### Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape H x W x C to a PIL Image while preserving the value range. image = array_to_img(image, data_format="channels_last") mask_pil_array = [None] * mask.shape[-1] for i in range(mask.shape[-1]): mask_pil_array[i] = array_to_img(mask[:, :, i, np.newaxis], data_format="channels_last") ## https://pytorch.org/docs/stable/torchvision/transforms.html ## https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py # Random horizontal flipping if random.random() > 0.5: image = TF.hflip(image) for i in range(mask.shape[-1]): mask_pil_array[i] = TF.hflip(mask_pil_array[i]) # Random vertical flipping if random.random() > 0.5: image = TF.vflip(image) for i in range(mask.shape[-1]): mask_pil_array[i] = TF.vflip(mask_pil_array[i]) # Random to_grayscale # if random.random() > 0.6: # image = TF.to_grayscale(image, num_output_channels=3) angle = random.randint(0, 90) translate = (random.uniform(0, 100), random.uniform(0, 100)) scale = random.uniform(0.5, 2) shear = random.uniform(0, 0) image = TF.affine(image, angle, translate, scale, shear) for i in range(mask.shape[-1]): mask_pil_array[i] = TF.affine(mask_pil_array[i], angle, translate, scale, shear) # Random adjust_brightness image = TF.adjust_brightness(image, brightness_factor=random.uniform( 0.8, 1.2)) # Random adjust_saturation image = TF.adjust_saturation(image, saturation_factor=random.uniform( 0.8, 1.2)) # Random adjust_hue # `hue_factor` is the amount of shift in H channel and must be in the # interval `[-0.5, 0.5]`. # image = TF.adjust_hue(image, hue_factor=random.uniform(-0.2, 0.2)) # image = TF.adjust_gamma(image, gamma=random.uniform(0.8, 1.5), gain=1) #angle = random.randint(0, 90) #image = TF.rotate(image, angle) #for i in range(mask.shape[-1]): # mask_pil_array[i] = TF.rotate(mask_pil_array[i], angle) # Transform to tensor image = img_to_array(image, data_format="channels_last") for i in range(mask.shape[-1]): # img_to_array(mask_pil_array[i], data_format="channels_last"): 512, 512, 1 mask[:, :, i] = img_to_array( mask_pil_array[i], data_format="channels_last")[:, :, 0].astype('uint8') ### img_to_array will scale the image to (0,255) ### when use img_to_array, the image and mask will in (0,255) image = (image / 255.0).astype('float32') mask = (mask / 255.0).astype('uint8') #print(11) return image, mask
def __getitem__(self, index): sequence_path = self.all_sequence_paths[index] df = pd.read_csv( sequence_path, header=None, index_col=False, names=['path', 'xmin', 'ymin', 'xmax', 'ymax', 'gazex', 'gazey']) show_name = sequence_path.split('/')[-3] clip = sequence_path.split('/')[-2] seq_len = len(df.index) # moving-avg smoothing window_size = 11 # should be odd number df['xmin'] = myutils.smooth_by_conv(window_size, df, 'xmin') df['ymin'] = myutils.smooth_by_conv(window_size, df, 'ymin') df['xmax'] = myutils.smooth_by_conv(window_size, df, 'xmax') df['ymax'] = myutils.smooth_by_conv(window_size, df, 'ymax') if not self.test: # cond for data augmentation cond_jitter = np.random.random_sample() cond_flip = np.random.random_sample() cond_color = np.random.random_sample() if cond_color < 0.5: n1 = np.random.uniform(0.5, 1.5) n2 = np.random.uniform(0.5, 1.5) n3 = np.random.uniform(0.5, 1.5) cond_crop = np.random.random_sample() # if longer than seq_len_limit, cut it down to the limit with the init index randomly sampled if seq_len > self.seq_len_limit: sampled_ind = np.random.randint(0, seq_len - self.seq_len_limit) seq_len = self.seq_len_limit else: sampled_ind = 0 if cond_crop < 0.5: sliced_x_min = df['xmin'].iloc[sampled_ind:sampled_ind + seq_len] sliced_x_max = df['xmax'].iloc[sampled_ind:sampled_ind + seq_len] sliced_y_min = df['ymin'].iloc[sampled_ind:sampled_ind + seq_len] sliced_y_max = df['ymax'].iloc[sampled_ind:sampled_ind + seq_len] sliced_gaze_x = df['gazex'].iloc[sampled_ind:sampled_ind + seq_len] sliced_gaze_y = df['gazey'].iloc[sampled_ind:sampled_ind + seq_len] check_sum = sliced_gaze_x.sum() + sliced_gaze_y.sum() all_outside = check_sum == -2 * seq_len # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target if all_outside: crop_x_min = np.min( [sliced_x_min.min(), sliced_x_max.min()]) crop_y_min = np.min( [sliced_y_min.min(), sliced_y_max.min()]) crop_x_max = np.max( [sliced_x_min.max(), sliced_x_max.max()]) crop_y_max = np.max( [sliced_y_min.max(), sliced_y_max.max()]) else: crop_x_min = np.min([ sliced_gaze_x.min(), sliced_x_min.min(), sliced_x_max.min() ]) crop_y_min = np.min([ sliced_gaze_y.min(), sliced_y_min.min(), sliced_y_max.min() ]) crop_x_max = np.max([ sliced_gaze_x.max(), sliced_x_min.max(), sliced_x_max.max() ]) crop_y_max = np.max([ sliced_gaze_y.max(), sliced_y_min.max(), sliced_y_max.max() ]) # Randomly select a random top left corner if crop_x_min >= 0: crop_x_min = np.random.uniform(0, crop_x_min) if crop_y_min >= 0: crop_y_min = np.random.uniform(0, crop_y_min) # Get image size path = os.path.join(self.data_dir, show_name, clip, df['path'].iloc[0]) img = Image.open(path) img = img.convert('RGB') width, height = img.size # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min) crop_width_min = crop_x_max - crop_x_min crop_height_min = crop_y_max - crop_y_min crop_width_max = width - crop_x_min crop_height_max = height - crop_y_min # Randomly select a width and a height crop_width = np.random.uniform(crop_width_min, crop_width_max) crop_height = np.random.uniform(crop_height_min, crop_height_max) else: sampled_ind = 0 faces, images, head_channels, heatmaps, paths, gazes, imsizes, gaze_inouts = [], [], [], [], [], [], [], [] index_tracker = -1 for i, row in df.iterrows(): index_tracker = index_tracker + 1 if not self.test: if index_tracker < sampled_ind or index_tracker >= ( sampled_ind + self.seq_len_limit): continue face_x1 = row['xmin'] # note: Already in image coordinates face_y1 = row['ymin'] # note: Already in image coordinates face_x2 = row['xmax'] # note: Already in image coordinates face_y2 = row['ymax'] # note: Already in image coordinates gaze_x = row['gazex'] # note: Already in image coordinates gaze_y = row['gazey'] # note: Already in image coordinates impath = os.path.join(self.data_dir, show_name, clip, row['path']) img = Image.open(impath) img = img.convert('RGB') width, height = img.size imsize = torch.FloatTensor([width, height]) # imsizes.append(imsize) face_x1, face_y1, face_x2, face_y2 = map( float, [face_x1, face_y1, face_x2, face_y2]) gaze_x, gaze_y = map(float, [gaze_x, gaze_y]) if gaze_x == -1 and gaze_y == -1: gaze_inside = False else: if gaze_x < 0: # move gaze point that was sliglty outside the image back in gaze_x = 0 if gaze_y < 0: gaze_y = 0 gaze_inside = True if not self.test: ## data augmentation # Jitter (expansion-only) bounding box size. if cond_jitter < 0.5: k = cond_jitter * 0.1 face_x1 -= k * abs(face_x2 - face_x1) face_y1 -= k * abs(face_y2 - face_y1) face_x2 += k * abs(face_x2 - face_x1) face_y2 += k * abs(face_y2 - face_y1) face_x1 = np.clip(face_x1, 0, width) face_x2 = np.clip(face_x2, 0, width) face_y1 = np.clip(face_y1, 0, height) face_y2 = np.clip(face_y2, 0, height) # Random Crop if cond_crop < 0.5: # Crop it img = TF.crop(img, crop_y_min, crop_x_min, crop_height, crop_width) # Record the crop's (x, y) offset offset_x, offset_y = crop_x_min, crop_y_min # convert coordinates into the cropped frame face_x1, face_y1, face_x2, face_y2 = face_x1 - offset_x, face_y1 - offset_y, face_x2 - offset_x, face_y2 - offset_y if gaze_inside: gaze_x, gaze_y = (gaze_x- offset_x), \ (gaze_y - offset_y) else: gaze_x = -1 gaze_y = -1 width, height = crop_width, crop_height # Flip? if cond_flip < 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) x_max_2 = width - face_x1 x_min_2 = width - face_x2 face_x2 = x_max_2 face_x1 = x_min_2 if gaze_x != -1 and gaze_y != -1: gaze_x = width - gaze_x # Random color change if cond_color < 0.5: img = TF.adjust_brightness(img, brightness_factor=n1) img = TF.adjust_contrast(img, contrast_factor=n2) img = TF.adjust_saturation(img, saturation_factor=n3) # Face crop face = img.copy().crop( (int(face_x1), int(face_y1), int(face_x2), int(face_y2))) # Head channel image head_channel = imutils.get_head_box_channel( face_x1, face_y1, face_x2, face_y2, width, height, resolution=self.input_size, coordconv=False).unsqueeze(0) if self.transform is not None: img = self.transform(img) face = self.transform(face) # Deconv output if gaze_inside: gaze_x /= float(width) # fractional gaze gaze_y /= float(height) gaze_heatmap = torch.zeros( self.output_size, self.output_size) # set the size of the output gaze_map = imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') gazes.append(torch.FloatTensor([gaze_x, gaze_y])) else: gaze_map = torch.zeros(self.output_size, self.output_size) gazes.append(torch.FloatTensor([-1, -1])) faces.append(face) images.append(img) head_channels.append(head_channel) heatmaps.append(gaze_map) gaze_inouts.append(torch.FloatTensor([int(gaze_inside)])) if self.imshow: for i in range(len(faces)): fig = plt.figure(111) img = 255 - imutils.unnorm(images[i].numpy()) * 255 img = np.clip(img, 0, 255) plt.imshow(np.transpose(img, (1, 2, 0))) plt.imshow(imresize(heatmaps[i], (self.input_size, self.input_size)), cmap='jet', alpha=0.3) plt.imshow(imresize(1 - head_channels[i].squeeze(0), (self.input_size, self.input_size)), alpha=0.2) plt.savefig( os.path.join('debug', 'viz_%d_inout=%d.png' % (i, gaze_inouts[i]))) plt.close('all') faces = torch.stack(faces) images = torch.stack(images) head_channels = torch.stack(head_channels) heatmaps = torch.stack(heatmaps) gazes = torch.stack(gazes) gaze_inouts = torch.stack(gaze_inouts) # imsizes = torch.stack(imsizes) # print(faces.shape, images.shape, head_channels.shape, heatmaps.shape) if self.test: return images, faces, head_channels, heatmaps, gazes, gaze_inouts else: # train return images, faces, head_channels, heatmaps, gaze_inouts
def Adjust_saturation(image): return F.adjust_saturation(image, 2)
def __getitem__(self, index): if self.test: g = self.X_test.get_group(self.keys[index]) cont_gaze = [] for i, row in g.iterrows(): path = row['path'] x_min = row['bbox_x_min'] y_min = row['bbox_y_min'] x_max = row['bbox_x_max'] y_max = row['bbox_y_max'] eye_x = row['eye_x'] eye_y = row['eye_y'] gaze_x = row['gaze_x'] gaze_y = row['gaze_y'] cont_gaze.append([gaze_x, gaze_y ]) # all ground truth gaze are stacked up for j in range(len(cont_gaze), 20): cont_gaze.append( [-1, -1]) # pad dummy gaze to match size for batch processing cont_gaze = torch.FloatTensor(cont_gaze) gaze_inside = True # always consider test samples as inside else: path = self.X_train.iloc[index] eye_x, eye_y, gaze_x, gaze_y = self.y_train.iloc[index] gaze_inside = True # bool(inout) img = Image.open(os.path.join(self.data_dir, path)) img = img.convert('RGB') width, height = img.size # print('gaze coords: ', type(gaze_x), type(gaze_y), gaze_x, gaze_y) # print('eye coords: ', type(eye_x), type(eye_y), eye_x, eye_y) # expand face bbox a bit k = 0.1 x_min = (eye_x - 0.15) * width y_min = (eye_y - 0.15) * height x_max = (eye_x + 0.15) * width y_max = (eye_y + 0.15) * height if x_min < 0: x_min = 0 if y_min < 0: y_min = 0 if x_max < 0: x_max = 0 if y_max < 0: y_max = 0 x_min -= k * abs(x_max - x_min) y_min -= k * abs(y_max - y_min) x_max += k * abs(x_max - x_min) y_max += k * abs(y_max - y_min) # x_min = eye_x - 0.15 # y_min = eye_y - 0.15 # x_max = eye_x + 0.15 # y_max = eye_y + 0.15 # if x_min < 0: # x_min = 0 # if y_min < 0: # y_min = 0 # if x_max < 0: # x_max = 0 # if y_max < 0: # y_max = 0 # print('bbx', [x_min, y_min, x_max, y_max]) x_min, y_min, x_max, y_max = map(float, [x_min, y_min, x_max, y_max]) # print(x_min, y_min, x_max, y_max) if self.imshow: img.save("origin_img.jpg") if self.test: imsize = torch.IntTensor([width, height]) else: ## data augmentation # Jitter (expansion-only) bounding box size if np.random.random_sample() <= 0.5: k = np.random.random_sample() * 0.2 x_min -= k * abs(x_max - x_min) y_min -= k * abs(y_max - y_min) x_max += k * abs(x_max - x_min) y_max += k * abs(y_max - y_min) # Random Crop if np.random.random_sample() <= 0.5: # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target crop_x_min = np.min([gaze_x * width, x_min, x_max]) crop_y_min = np.min([gaze_y * height, y_min, y_max]) crop_x_max = np.max([gaze_x * width, x_min, x_max]) crop_y_max = np.max([gaze_y * height, y_min, y_max]) # Randomly select a random top left corner if crop_x_min >= 0: crop_x_min = np.random.uniform(0, crop_x_min) if crop_y_min >= 0: crop_y_min = np.random.uniform(0, crop_y_min) # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min) crop_width_min = crop_x_max - crop_x_min crop_height_min = crop_y_max - crop_y_min crop_width_max = width - crop_x_min crop_height_max = height - crop_y_min # Randomly select a width and a height crop_width = np.random.uniform(crop_width_min, crop_width_max) crop_height = np.random.uniform(crop_height_min, crop_height_max) # Crop it img = TF.crop(img, crop_y_min, crop_x_min, crop_height, crop_width) # Record the crop's (x, y) offset offset_x, offset_y = crop_x_min, crop_y_min # convert coordinates into the cropped frame x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y # if gaze_inside: gaze_x, gaze_y = (gaze_x * width - offset_x) / float(crop_width), \ (gaze_y * height - offset_y) / float(crop_height) # else: # gaze_x = -1; gaze_y = -1 width, height = crop_width, crop_height # Random flip if np.random.random_sample() <= 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) x_max_2 = width - x_min x_min_2 = width - x_max x_max = x_max_2 x_min = x_min_2 gaze_x = 1 - gaze_x # Random color change if np.random.random_sample() <= 0.5: img = TF.adjust_brightness(img, brightness_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_contrast(img, contrast_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_saturation(img, saturation_factor=np.random.uniform( 0, 1.5)) # print('bbx2', [x_min, y_min, x_max, y_max]) head_channel = imutils.get_head_box_channel( x_min, y_min, x_max, y_max, width, height, resolution=self.input_size, coordconv=False).unsqueeze(0) # Crop the face face = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) if self.imshow: img.save("img_aug.jpg") face.save('face_aug.jpg') if self.transform is not None: img = self.transform(img) face = self.transform(face) # print('imsize2', img.size()) # generate the heat map used for deconv prediction gaze_heatmap = torch.zeros( self.output_size, self.output_size) # set the size of the output # print([gaze_x * self.output_size, gaze_y * self.output_size]) # print(self.output_size) if self.test: # aggregated heatmap num_valid = 0 for gaze_x, gaze_y in cont_gaze: if gaze_x != -1: num_valid += 1 gaze_heatmap = imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') gaze_heatmap /= num_valid else: # if gaze_inside: gaze_heatmap = imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') if self.imshow: fig = plt.figure(111) img = 255 - imutils.unnorm(img.numpy()) * 255 img = np.clip(img, 0, 255) plt.imshow(np.transpose(img, (1, 2, 0))) plt.imshow(imresize(gaze_heatmap, (self.input_size, self.input_size)), cmap='jet', alpha=0.3) plt.imshow(imresize(1 - head_channel.squeeze(0), (self.input_size, self.input_size)), alpha=0.2) plt.savefig('viz_aug.png') if self.test: return img, face, head_channel, gaze_heatmap, cont_gaze, imsize, path else: return img, face, head_channel, gaze_heatmap, path, gaze_inside
def __getitem__(self, idx): gaze_inside = True data = self.data[idx] image_path = data['filename'] image_path = os.path.join(self.root_dir, image_path) #print(image_path) eye = [float(data['hx']) / 640, float(data['hy']) / 480] gaze = [float(data['gaze_cx']) / 640, float(data['gaze_cy']) / 480] image_path = image_path.replace('\\', '/') img = Image.open(image_path) img = img.convert('RGB') width, height = img.size gaze_x, gaze_y = gaze eye_x, eye_y = eye #Get bounding boxes and class labels as well as gt index for gazed object gt_bboxes, gt_labels = np.zeros(1), np.zeros(1) gt_labels = np.expand_dims(gt_labels, axis=0) gaze_idx = np.copy(data['gazeIdx']).astype( np.int64) #index of gazed object gaze_class = np.copy(data['gaze_item']).astype( np.int64) #class of gazed object if self.use_gtbox: gt_bboxes = np.copy(data['ann']['bboxes']) / [640, 480, 640, 480] gt_labels = np.copy(data['ann']['labels']) gtbox = gt_bboxes[gaze_idx] k = 0.1 x_min = (eye_x - 0.15) * width y_min = (eye_y - 0.15) * height x_max = (eye_x + 0.15) * width y_max = (eye_y + 0.15) * height if x_min < 0: x_min = 0 if y_min < 0: y_min = 0 if x_max < 0: x_max = 0 if y_max < 0: y_max = 0 x_min -= k * abs(x_max - x_min) y_min -= k * abs(y_max - y_min) x_max += k * abs(x_max - x_min) y_max += k * abs(y_max - y_min) x_min, y_min, x_max, y_max = map(float, [x_min, y_min, x_max, y_max]) if self.imshow: img.save("origin_img.jpg") if self.training == 'test': imsize = torch.IntTensor([width, height]) else: ## data augmentation # Jitter (expansion-only) bounding box size if np.random.random_sample() <= 0.5: k = np.random.random_sample() * 0.2 x_min -= k * abs(x_max - x_min) y_min -= k * abs(y_max - y_min) x_max += k * abs(x_max - x_min) y_max += k * abs(y_max - y_min) # Random Crop if np.random.random_sample() <= 0.5: # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target crop_x_min = np.min([gaze_x * width, x_min, x_max]) crop_y_min = np.min([gaze_y * height, y_min, y_max]) crop_x_max = np.max([gaze_x * width, x_min, x_max]) crop_y_max = np.max([gaze_y * height, y_min, y_max]) # Randomly select a random top left corner if crop_x_min >= 0: crop_x_min = np.random.uniform(0, crop_x_min) if crop_y_min >= 0: crop_y_min = np.random.uniform(0, crop_y_min) # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min) crop_width_min = crop_x_max - crop_x_min crop_height_min = crop_y_max - crop_y_min crop_width_max = width - crop_x_min crop_height_max = height - crop_y_min # Randomly select a width and a height crop_width = np.random.uniform(crop_width_min, crop_width_max) crop_height = np.random.uniform(crop_height_min, crop_height_max) # Crop it img = TF.crop(img, crop_y_min, crop_x_min, crop_height, crop_width) # Record the crop's (x, y) offset offset_x, offset_y = crop_x_min, crop_y_min # convert coordinates into the cropped frame x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y # if gaze_inside: gaze_x, gaze_y = (gaze_x * width - offset_x) / float(crop_width), \ (gaze_y * height - offset_y) / float(crop_height) # else: # gaze_x = -1; gaze_y = -1 width, height = crop_width, crop_height # Random flip if np.random.random_sample() <= 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) x_max_2 = width - x_min x_min_2 = width - x_max x_max = x_max_2 x_min = x_min_2 gaze_x = 1 - gaze_x # Random color change if np.random.random_sample() <= 0.5: img = TF.adjust_brightness(img, brightness_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_contrast(img, contrast_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_saturation(img, saturation_factor=np.random.uniform( 0, 1.5)) # Random flip if np.random.random_sample() <= 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) x_max_2 = width - x_min x_min_2 = width - x_max x_max = x_max_2 x_min = x_min_2 gaze_x = 1 - gaze_x # Random color change if np.random.random_sample() <= 0.5: img = TF.adjust_brightness(img, brightness_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_contrast(img, contrast_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_saturation(img, saturation_factor=np.random.uniform( 0, 1.5)) # print('bbx2', [x_min, y_min, x_max, y_max]) head_channel = chong_imutils.get_head_box_channel( x_min, y_min, x_max, y_max, width, height, resolution=self.input_size, coordconv=False).unsqueeze(0) # Crop the face face = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) if self.imshow: img.save("img_aug.jpg") face.save('face_aug.jpg') if self.transform is not None: img = self.transform(img) face = self.transform(face) # generate the heat map used for deconv prediction gaze_heatmap = torch.zeros( self.output_size, self.output_size) # set the size of the output if self.training == 'test': # aggregated heatmap gaze_heatmap = chong_imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') else: # if gaze_inside: gaze_heatmap = chong_imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') if self.training == 'test' and self.use_gtbox: return img, face, head_channel, eye, gaze_heatmap, gaze, gaze_inside, image_path, gtbox elif self.training == 'test': return img, face, head_channel, eye, gaze_heatmap, gaze, gaze_inside, image_path else: return img, face, head_channel, gaze_heatmap, image_path, gaze_inside
def __call__(self, img: Image.Image): return TF.adjust_saturation(img, self.params)
def torchvision(self, img): img = torchvision.adjust_hue(img, hue_factor=0.1) img = torchvision.adjust_saturation(img, saturation_factor=1.2) img = torchvision.adjust_brightness(img, brightness_factor=1.2) return img
def __call__(self, inputs): img1 = inputs[0] img2 = inputs[1] depth = inputs[2] phase = inputs[3] fb = inputs[4] h = img1.height w = img1.width w0 = w if self.size == [-1]: divisor = 32.0 h = int(math.ceil(h / divisor) * divisor) w = int(math.ceil(w / divisor) * divisor) self.size = (h, w) scale_transform = transforms.Compose( [transforms.Resize(self.size, Image.BICUBIC)]) img1 = scale_transform(img1) if img2 is not None: img2 = scale_transform(img2) if fb is not None: scale = float(self.size[1]) / float(w0) fb = fb * scale if phase == 'test': return img1, img2, depth, fb if not self.size == 0: if depth is not None: arr_depth = np.array(depth, dtype=np.float32) arr_depth /= 65535.0 # cm->m, /10 arr_depth[arr_depth < 0.0] = 0.0 depth = Image.fromarray(arr_depth, 'F') if depth is not None: scale_transform_d = transforms.Compose( [transforms.Resize(self.size, Image.BICUBIC)]) depth = scale_transform_d(depth) if self.flip and not (img2 is not None and depth is not None): flip_prob = random.random() flip_transform = transforms.Compose( [RandomHorizontalFlip(flip_prob)]) if img2 is None: img1 = flip_transform(img1) else: if flip_prob < 0.5: img1_ = img1 img2_ = img2 img1 = flip_transform(img2_) img2 = flip_transform(img1_) if depth is not None: depth = flip_transform(depth) if self.rotation and not (img2 is not None and depth is not None): if random.random() < 0.5: degree = random.randrange(-500, 500) / 100 img1 = F.rotate(img1, degree, Image.BICUBIC) if depth is not None: depth = F.rotate(depth, degree, Image.BILINEAR) if img2 is not None: img2 = F.rotate(img2, degree, Image.BICUBIC) if depth is not None: depth = np.array(depth, dtype=np.float32) depth = depth * 2.0 depth -= 1.0 if self.augment: if random.random() < 0.5: brightness = random.uniform(0.8, 1.0) contrast = random.uniform(0.8, 1.0) saturation = random.uniform(0.8, 1.0) img1 = F.adjust_brightness(img1, brightness) img1 = F.adjust_contrast(img1, contrast) img1 = F.adjust_saturation(img1, saturation) if img2 is not None: img2 = F.adjust_brightness(img2, brightness) img2 = F.adjust_contrast(img2, contrast) img2 = F.adjust_saturation(img2, saturation) return img1, img2, depth, fb
def __getitem__(self, index): index_ = index % self.sizex ps = self.ps inp_path = self.inp_filenames[index_] tar_path = self.tar_filenames[index_] inp_img = Image.open(inp_path) tar_img = Image.open(tar_path) w,h = tar_img.size padw = ps-w if w<ps else 0 padh = ps-h if h<ps else 0 # Reflect Pad in case image is smaller than patch_size if padw!=0 or padh!=0: inp_img = TF.pad(inp_img, (0,0,padw,padh), padding_mode='reflect') tar_img = TF.pad(tar_img, (0,0,padw,padh), padding_mode='reflect') aug = random.randint(0, 2) if aug == 1: inp_img = TF.adjust_gamma(inp_img, 1) tar_img = TF.adjust_gamma(tar_img, 1) aug = random.randint(0, 2) if aug == 1: sat_factor = 1 + (0.2 - 0.4*np.random.rand()) inp_img = TF.adjust_saturation(inp_img, sat_factor) tar_img = TF.adjust_saturation(tar_img, sat_factor) inp_img = TF.to_tensor(inp_img) tar_img = TF.to_tensor(tar_img) hh, ww = tar_img.shape[1], tar_img.shape[2] rr = random.randint(0, hh-ps) cc = random.randint(0, ww-ps) aug = random.randint(0, 8) # Crop patch inp_img = inp_img[:, rr:rr+ps, cc:cc+ps] tar_img = tar_img[:, rr:rr+ps, cc:cc+ps] # Data Augmentations if aug==1: inp_img = inp_img.flip(1) tar_img = tar_img.flip(1) elif aug==2: inp_img = inp_img.flip(2) tar_img = tar_img.flip(2) elif aug==3: inp_img = torch.rot90(inp_img,dims=(1,2)) tar_img = torch.rot90(tar_img,dims=(1,2)) elif aug==4: inp_img = torch.rot90(inp_img,dims=(1,2), k=2) tar_img = torch.rot90(tar_img,dims=(1,2), k=2) elif aug==5: inp_img = torch.rot90(inp_img,dims=(1,2), k=3) tar_img = torch.rot90(tar_img,dims=(1,2), k=3) elif aug==6: inp_img = torch.rot90(inp_img.flip(1),dims=(1,2)) tar_img = torch.rot90(tar_img.flip(1),dims=(1,2)) elif aug==7: inp_img = torch.rot90(inp_img.flip(2),dims=(1,2)) tar_img = torch.rot90(tar_img.flip(2),dims=(1,2)) filename = os.path.splitext(os.path.split(tar_path)[-1])[0] return tar_img, inp_img, filename
def augment_batch(images: torch.Tensor, p: float) -> torch.Tensor: warnings.warn("augment_batch is deprecated", DeprecationWarning) batch_size, channels, h_orig, w_orig = images.size() images = pad(images, padding=(w_orig - 1, h_orig - 1, w_orig - 1, h_orig - 1), padding_mode='reflect') batch_size, channels, h, w = images.size() mask = (torch.rand(batch_size) < p).logical_and( torch.rand(batch_size) < 0.5) images[mask] = hflip(images[mask]) output_images = images.new_zeros((batch_size, channels, h_orig, w_orig)) translate = (0, 0) angle_step = choice([0, 1, 2, 3]) angle = -90 * angle_step scale_iso_mask = torch.rand(batch_size) < p scale_iso = lognormal(0, 0.2 * math.log(2)) scale = (scale_iso, scale_iso) p_rot = 1 - math.sqrt(1 - p) rot_mask = torch.rand(batch_size) < p_rot theta = uniform(-180, 180) angle += theta scale_mask = torch.rand(batch_size) < p scale_factor = lognormal(0, 0.2 * math.log(2)) scale_x, scale_y = scale scale = (scale_x * scale_factor, scale_y / scale_factor) new_size = (int(h * scale[0]), int(w * scale[1])) if torch.any(rot_mask): affine_transformed = affine(images[rot_mask], angle=angle, translate=list(translate), shear=[0., 0.], scale=1) images[rot_mask] = affine_transformed resize_mask = scale_iso_mask.logical_and(scale_mask) resized_images = resize(images[resize_mask], list(new_size)) output_images[resize_mask.logical_not()] = center_crop( images[resize_mask.logical_not()], (h_orig, w_orig)) output_images[resize_mask] = center_crop(resized_images, (h_orig, w_orig)) images = output_images mask = torch.rand(batch_size) < p brightness = normal(1, 0.2) images[mask] = adjust_brightness(images[mask], brightness) mask = torch.rand(batch_size) < p contrast = lognormal(0, (0.5 * math.log(2))) images[mask] = adjust_contrast(images[mask], contrast) mask = torch.rand(batch_size) < p image_data = rgb_to_ycbcr(images[mask]) image_data[..., 0, :, :] = (1 - image_data[..., 0, :, :]) images[mask] = ycbcr_to_rgb(image_data) mask = torch.rand(batch_size) < p if torch.any(mask): hue_factor = uniform(-0.5, 0.5) images[mask] = adjust_hue(images[mask], hue_factor) mask = torch.rand(batch_size) < p saturation = lognormal(0, math.log(2)) images[mask] = adjust_saturation(images[mask], saturation) mask = torch.rand(batch_size) < p std_dev = abs(normal(0, 0.1)) noise_images = torch.randn_like(images[mask]) * std_dev images[mask] += noise_images.clamp(0, 1) return images
def batch_satuation(batch, factor): for i in range(batch.size(0)): img = TF.adjust_saturation(toimg(batch[i]), factor[i]) batch[i] = totensor(img) return batch
def __call__(self, img): saturation = np.random.uniform(self.low, self.high) img = functional.adjust_saturation(img, saturation) return img
def __call__(self, imgs): return [ F.adjust_saturation(img=img, saturation_factor=self.saturation_factor) for img in imgs ]
def __call__(self, x): x = TF.adjust_gamma(x, gamma=1) x = TF.adjust_saturation(x, self.saturation_factor) x = TF.adjust_brightness(x, self.brightness_factor) x = TF.adjust_contrast(x, self.contrast_factor) return x