예제 #1
0
    def test_adjusts_L_mode(self):
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_rgb = Image.fromarray(x_np, mode='RGB')

        x_l = x_rgb.convert('L')
        assert F.adjust_brightness(x_l, 2).mode == 'L'
        assert F.adjust_saturation(x_l, 2).mode == 'L'
        assert F.adjust_contrast(x_l, 2).mode == 'L'
        assert F.adjust_hue(x_l, 0.4).mode == 'L'
        assert F.adjust_gamma(x_l, 0.5).mode == 'L'
예제 #2
0
    def test_adjust_saturation(self):
        x_shape = [2, 2, 3]
        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
        x_pil = Image.fromarray(x_np, mode='RGB')

        # test 0
        y_pil = F.adjust_saturation(x_pil, 1)
        y_np = np.array(y_pil)
        assert np.allclose(y_np, x_np)

        # test 1
        y_pil = F.adjust_saturation(x_pil, 0.5)
        y_np = np.array(y_pil)
        y_ans = [2, 4, 8, 87, 128, 173, 39, 25, 138, 133, 215, 88]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
        assert np.allclose(y_np, y_ans)

        # test 2
        y_pil = F.adjust_saturation(x_pil, 2)
        y_np = np.array(y_pil)
        y_ans = [0, 6, 22, 0, 149, 255, 32, 0, 255, 4, 255, 0]
        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
        assert np.allclose(y_np, y_ans)
예제 #3
0
 def torchvision_transform(self, img):
     img = torchvision.adjust_hue(img, hue_factor=0.1)
     img = torchvision.adjust_saturation(img, saturation_factor=1.2)
     img = torchvision.adjust_brightness(img, brightness_factor=1.2)
     return img
예제 #4
0
 def __call__(self, img, mask):
     assert img.size == mask.size
     return tf.adjust_saturation(
         img, random.uniform(1 - self.saturation,
                             1 + self.saturation)), mask
예제 #5
0
    def __getitem__(self, idx):

        gaze_inside = True
        image_path = self.paths[idx][0][0]
        image_path = os.path.join(self.root_dir, image_path)
        img = Image.open(image_path)
        img = img.convert('RGB')
        width, height = img.size
        # print('imsize', img.size)
        # print('img path', image_path)

        box = self.bboxes[0, idx][0]
        eye = self.eyes[0, idx][0]
        # todo: process gaze differently for training or testing
        gaze = self.gazes[0, idx].mean(axis=0)
        # print("Gaze", gaze.shape, gaze)
        # image = cv2.imread(image_path, cv2.IMREAD_COLOR)

        # if random.random() > 0.5 and self.training == 'train':
        #     eye = [1.0 - eye[0], eye[1]]
        #     gaze = [1.0 - gaze[0], gaze[1]]
        #     image = cv2.flip(image, 1)
        gaze_x, gaze_y = gaze.tolist()
        eye_x, eye_y = eye.tolist()
        #print('gaze coords: ', type(gaze_x), type(gaze_y), gaze_x, gaze_y)
        #print('eye coords: ', type(eye_x), type(eye_y), eye_x, eye_y)

        # expand face bbox a bit
        k = 0.1
        x_min = (eye_x - 0.15) * width
        y_min = (eye_y - 0.15) * height
        x_max = (eye_x + 0.15) * width
        y_max = (eye_y + 0.15) * height
        if x_min < 0:
            x_min = 0
        if y_min < 0:
            y_min = 0
        if x_max < 0:
            x_max = 0
        if y_max < 0:
            y_max = 0
        x_min -= k * abs(x_max - x_min)
        y_min -= k * abs(y_max - y_min)
        x_max += k * abs(x_max - x_min)
        y_max += k * abs(y_max - y_min)
        x_min, y_min, x_max, y_max = map(float, [x_min, y_min, x_max, y_max])
        #print(x_min, y_min, x_max, y_max)
        if self.imshow:
            img.save("origin_img.jpg")

        if self.training == 'test':
            imsize = torch.IntTensor([width, height])
        else:
            ## data augmentation

            # Jitter (expansion-only) bounding box size
            if np.random.random_sample() <= 0.5:
                k = np.random.random_sample() * 0.2
                x_min -= k * abs(x_max - x_min)
                y_min -= k * abs(y_max - y_min)
                x_max += k * abs(x_max - x_min)
                y_max += k * abs(y_max - y_min)

            # Random Crop
            if np.random.random_sample() <= 0.5:
                # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target
                crop_x_min = np.min([gaze_x * width, x_min, x_max])
                crop_y_min = np.min([gaze_y * height, y_min, y_max])
                crop_x_max = np.max([gaze_x * width, x_min, x_max])
                crop_y_max = np.max([gaze_y * height, y_min, y_max])

                # Randomly select a random top left corner
                if crop_x_min >= 0:
                    crop_x_min = np.random.uniform(0, crop_x_min)
                if crop_y_min >= 0:
                    crop_y_min = np.random.uniform(0, crop_y_min)

                # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min)
                crop_width_min = crop_x_max - crop_x_min
                crop_height_min = crop_y_max - crop_y_min
                crop_width_max = width - crop_x_min
                crop_height_max = height - crop_y_min
                # Randomly select a width and a height
                crop_width = np.random.uniform(crop_width_min, crop_width_max)
                crop_height = np.random.uniform(crop_height_min,
                                                crop_height_max)

                # Crop it
                img = TF.crop(img, crop_y_min, crop_x_min, crop_height,
                              crop_width)

                # Record the crop's (x, y) offset
                offset_x, offset_y = crop_x_min, crop_y_min

                # convert coordinates into the cropped frame
                x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y
                # if gaze_inside:
                gaze_x, gaze_y = (gaze_x * width - offset_x) / float(crop_width), \
                                 (gaze_y * height - offset_y) / float(crop_height)
                # else:
                #     gaze_x = -1; gaze_y = -1

                width, height = crop_width, crop_height

            # Random flip
            if np.random.random_sample() <= 0.5:
                img = img.transpose(Image.FLIP_LEFT_RIGHT)
                x_max_2 = width - x_min
                x_min_2 = width - x_max
                x_max = x_max_2
                x_min = x_min_2
                gaze_x = 1 - gaze_x

            # Random color change
            if np.random.random_sample() <= 0.5:
                img = TF.adjust_brightness(img,
                                           brightness_factor=np.random.uniform(
                                               0.5, 1.5))
                img = TF.adjust_contrast(img,
                                         contrast_factor=np.random.uniform(
                                             0.5, 1.5))
                img = TF.adjust_saturation(img,
                                           saturation_factor=np.random.uniform(
                                               0, 1.5))
        # print('bbx2',  [x_min, y_min, x_max, y_max])

        head_channel = chong_imutils.get_head_box_channel(
            x_min,
            y_min,
            x_max,
            y_max,
            width,
            height,
            resolution=self.input_size,
            coordconv=False).unsqueeze(0)

        # Crop the face
        face = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        if self.imshow:
            img.save("img_aug.jpg")
            face.save('face_aug.jpg')

        if self.transform is not None:
            img = self.transform(img)
            face = self.transform(face)
        #print('imsize2', img.size())
        # generate the heat map used for deconv prediction
        gaze_heatmap = torch.zeros(
            self.output_size, self.output_size)  # set the size of the output
        #print([gaze_x * self.output_size, gaze_y * self.output_size])
        #print(self.output_size)

        if self.training == 'test':  # aggregated heatmap
            gaze_heatmap = chong_imutils.draw_labelmap(
                gaze_heatmap,
                [gaze_x * self.output_size, gaze_y * self.output_size],
                3,
                type='Gaussian')

        else:
            # if gaze_inside:
            gaze_heatmap = chong_imutils.draw_labelmap(
                gaze_heatmap,
                [gaze_x * self.output_size, gaze_y * self.output_size],
                3,
                type='Gaussian')
        return
        if self.imshow:
            fig = plt.figure(111)
            img = 255 - chong_imutils.unnorm(img.numpy()) * 255
            img = np.clip(img, 0, 255)
            plt.imshow(np.transpose(img, (1, 2, 0)))
            plt.imshow(imresize(gaze_heatmap,
                                (self.input_size, self.input_size)),
                       cmap='jet',
                       alpha=0.3)
            plt.imshow(imresize(1 - head_channel.squeeze(0),
                                (self.input_size, self.input_size)),
                       alpha=0.2)
            plt.savefig('viz_aug.png')

        if self.training == 'test':
            return img, face, head_channel, eye, gaze_heatmap, gaze, gaze_inside, image_path
        else:
            return img, face, head_channel, gaze_heatmap, image_path, gaze_inside
예제 #6
0
    def __getitem__(self, idx):
        rgb, depth, gt, K = self._load_data(idx)

        if self.augment and self.mode == 'train':
            # Top crop if needed
            if self.args.top_crop > 0:
                width, height = rgb.size
                rgb = TF.crop(rgb, self.args.top_crop, 0,
                              height - self.args.top_crop, width)
                depth = TF.crop(depth, self.args.top_crop, 0,
                                height - self.args.top_crop, width)
                gt = TF.crop(gt, self.args.top_crop, 0,
                             height - self.args.top_crop, width)
                K[3] = K[3] - self.args.top_crop

            width, height = rgb.size

            _scale = np.random.uniform(1.0, 1.5)
            scale = np.int(height * _scale)
            degree = np.random.uniform(-5.0, 5.0)
            flip = np.random.uniform(0.0, 1.0)

            # Horizontal flip
            if flip > 0.5:
                rgb = TF.hflip(rgb)
                depth = TF.hflip(depth)
                gt = TF.hflip(gt)
                K[2] = width - K[2]

            # Rotation
            rgb = TF.rotate(rgb, angle=degree, resample=Image.BICUBIC)
            depth = TF.rotate(depth, angle=degree, resample=Image.NEAREST)
            gt = TF.rotate(gt, angle=degree, resample=Image.NEAREST)

            # Color jitter
            brightness = np.random.uniform(0.6, 1.4)
            contrast = np.random.uniform(0.6, 1.4)
            saturation = np.random.uniform(0.6, 1.4)

            rgb = TF.adjust_brightness(rgb, brightness)
            rgb = TF.adjust_contrast(rgb, contrast)
            rgb = TF.adjust_saturation(rgb, saturation)

            # Resize
            rgb = TF.resize(rgb, scale, Image.BICUBIC)
            depth = TF.resize(depth, scale, Image.NEAREST)
            gt = TF.resize(gt, scale, Image.NEAREST)

            K[0] = K[0] * _scale
            K[1] = K[1] * _scale
            K[2] = K[2] * _scale
            K[3] = K[3] * _scale

            # Crop
            width, height = rgb.size

            assert self.height <= height and self.width <= width, \
                "patch size is larger than the input size"

            h_start = random.randint(0, height - self.height)
            w_start = random.randint(0, width - self.width)

            rgb = TF.crop(rgb, h_start, w_start, self.height, self.width)
            depth = TF.crop(depth, h_start, w_start, self.height, self.width)
            gt = TF.crop(gt, h_start, w_start, self.height, self.width)

            K[2] = K[2] - w_start
            K[3] = K[3] - h_start

            rgb = TF.to_tensor(rgb)
            rgb = TF.normalize(rgb, (0.485, 0.456, 0.406),
                               (0.229, 0.224, 0.225),
                               inplace=True)

            depth = TF.to_tensor(np.array(depth))
            depth = depth / _scale

            gt = TF.to_tensor(np.array(gt))
            gt = gt / _scale
        elif self.mode in ['train', 'val']:
            # Top crop if needed
            if self.args.top_crop > 0:
                width, height = rgb.size
                rgb = TF.crop(rgb, self.args.top_crop, 0,
                              height - self.args.top_crop, width)
                depth = TF.crop(depth, self.args.top_crop, 0,
                                height - self.args.top_crop, width)
                gt = TF.crop(gt, self.args.top_crop, 0,
                             height - self.args.top_crop, width)
                K[3] = K[3] - self.args.top_crop

            # Crop
            width, height = rgb.size

            assert self.height <= height and self.width <= width, \
                "patch size is larger than the input size"

            h_start = random.randint(0, height - self.height)
            w_start = random.randint(0, width - self.width)

            rgb = TF.crop(rgb, h_start, w_start, self.height, self.width)
            depth = TF.crop(depth, h_start, w_start, self.height, self.width)
            gt = TF.crop(gt, h_start, w_start, self.height, self.width)

            K[2] = K[2] - w_start
            K[3] = K[3] - h_start

            rgb = TF.to_tensor(rgb)
            rgb = TF.normalize(rgb, (0.485, 0.456, 0.406),
                               (0.229, 0.224, 0.225),
                               inplace=True)

            depth = TF.to_tensor(np.array(depth))

            gt = TF.to_tensor(np.array(gt))
        else:
            if self.args.top_crop > 0 and self.args.test_crop:
                width, height = rgb.size
                rgb = TF.crop(rgb, self.args.top_crop, 0,
                              height - self.args.top_crop, width)
                depth = TF.crop(depth, self.args.top_crop, 0,
                                height - self.args.top_crop, width)
                gt = TF.crop(gt, self.args.top_crop, 0,
                             height - self.args.top_crop, width)
                K[3] = K[3] - self.args.top_crop

            rgb = TF.to_tensor(rgb)
            rgb = TF.normalize(rgb, (0.485, 0.456, 0.406),
                               (0.229, 0.224, 0.225),
                               inplace=True)

            depth = TF.to_tensor(np.array(depth))

            gt = TF.to_tensor(np.array(gt))

        if self.args.num_sample > 0:
            depth = self.get_sparse_depth(depth, self.args.num_sample)

        output = {'rgb': rgb, 'dep': depth, 'gt': gt, 'K': torch.Tensor(K)}

        return output
예제 #7
0
 def __call__(self, img, lbl_semseg, lbl_side):
     assert img.size == lbl_semseg.size == lbl_side.size
     return tf.adjust_saturation(
         img, random.uniform(1 - self.saturation,
                             1 + self.saturation)), lbl_semseg, lbl_side
    def train_trans(image, mask):
        # Generate random parameters for augmentation
        bf = np.random.uniform(1 - args.colorjitter_factor,
                               1 + args.colorjitter_factor)
        cf = np.random.uniform(1 - args.colorjitter_factor,
                               1 + args.colorjitter_factor)
        sf = np.random.uniform(1 - args.colorjitter_factor,
                               1 + args.colorjitter_factor)
        hf = np.random.uniform(-args.colorjitter_factor,
                               +args.colorjitter_factor)
        pflip = np.random.randint(0, 1) > 0.5

        # Random scaling
        scale_factor = np.random.uniform(0.75, 2.0)
        scaled_train_size = [
            int(element * scale_factor) for element in args.train_size
        ]

        # Resize, 1 for Image.LANCZOS
        image = TF.resize(image, scaled_train_size, interpolation=1)
        # Resize, 0 for Image.NEAREST
        mask = TF.resize(mask, scaled_train_size, interpolation=0)

        # Random cropping
        if not args.train_size == args.crop_size:
            if image.size[1] <= args.crop_size[
                    0]:  # PIL image: (width, height) vs. args.size: (height, width)
                pad_h = args.crop_size[0] - image.size[1] + 1
                pad_w = args.crop_size[1] - image.size[0] + 1
                image = ImageOps.expand(image,
                                        border=(0, 0, pad_w, pad_h),
                                        fill=0)
                mask = ImageOps.expand(mask,
                                       border=(0, 0, pad_w, pad_h),
                                       fill=19)

            # From PIL to Tensor
            image = TF.to_tensor(image)
            mask = TF.to_tensor(mask)
            h, w = image.size()[1], image.size()[
                2]  #scaled_train_size #args.train_size
            th, tw = args.crop_size

            i = np.random.randint(0, h - th)
            j = np.random.randint(0, w - tw)
            image_crop = image[:, i:i + th, j:j + tw]
            mask_crop = mask[:, i:i + th, j:j + tw]

            image = TF.to_pil_image(image_crop)
            mask = TF.to_pil_image(mask_crop[0, :, :])

        # H-flip
        if pflip == True and args.hflip == True:
            image = TF.hflip(image)
            mask = TF.hflip(mask)

        # Color jitter
        image = TF.adjust_brightness(image, bf)
        image = TF.adjust_contrast(image, cf)
        image = TF.adjust_saturation(image, sf)
        image = TF.adjust_hue(image, hf)

        # From PIL to Tensor
        image = TF.to_tensor(image)

        # Normalize
        image = TF.normalize(image, args.dataset_mean, args.dataset_std)

        # Convert ids to train_ids
        mask = np.array(mask, np.uint8)  # PIL Image to numpy array
        mask = torch.from_numpy(mask)  # Numpy array to tensor

        return image, mask
예제 #9
0
    def __call__(self, inputs):

        img1 = inputs[0]
        img2 = inputs[1]
        depth = inputs[2]
        phase = inputs[3]
        fb = inputs[4]

        h = img1.height
        w = img1.width
        w0 = w

        if self.size == [-1]:
            divisor = 32.0
            h = int(math.ceil(h / divisor) * divisor)
            w = int(math.ceil(w / divisor) * divisor)
            self.size = (h, w)

        scale_transform = transforms.Compose(
            [transforms.Resize(self.size, Image.BICUBIC)])

        img1 = scale_transform(img1)
        if img2 is not None:
            img2 = scale_transform(img2)

        if fb is not None:
            # Scaling of the focal for e.g. stereo photo loss
            scale = float(self.size[1]) / float(w0)
            fb = fb * scale

        if phase == 'test':
            return img1, img2, depth, fb

        if depth is not None:
            scale_transform_d = transforms.Compose(
                [transforms.Resize(self.size, Image.NEAREST)])
            depth = scale_transform_d(depth)

        if not self.size == 0:

            if depth is not None:
                # This maps the VKITTI depth to [0, 1]
                # with 1 being 655.35 meters
                arr_depth = np.array(depth, dtype=np.float32)
                arr_depth /= 65535.0  # cm->m
                arr_depth[arr_depth < 0.0] = 0.0
                depth = Image.fromarray(arr_depth, 'F')

        if depth is not None:
            # Maps depth to [-1, 1] to match tanh output of
            # the depth model
            depth = np.array(depth, dtype=np.float32)
            depth = depth * 2.0
            depth -= 1.0

        if random.random() < 0.5:
            # Some brightness, contrast and saturation augmentation
            brightness = random.uniform(0.8, 1.0)
            contrast = random.uniform(0.8, 1.0)
            saturation = random.uniform(0.8, 1.0)

            img1 = F.adjust_brightness(img1, brightness)
            img1 = F.adjust_contrast(img1, contrast)
            img1 = F.adjust_saturation(img1, saturation)

            if img2 is not None:
                img2 = F.adjust_brightness(img2, brightness)
                img2 = F.adjust_contrast(img2, contrast)
                img2 = F.adjust_saturation(img2, saturation)
        return img1, img2, depth, fb
    def transform_triplets(self, img, gt1, gt2):

        # resize image and covert to tensor
        img = TF.to_pil_image(img)
        img = TF.resize(img, [self.img_size, self.img_size])

        gt1 = TF.to_pil_image(gt1)
        gt1 = TF.resize(gt1, [self.img_size, self.img_size])

        gt2 = TF.to_pil_image(gt2)
        gt2 = TF.resize(gt2, [self.img_size, self.img_size])

        if self.with_random_hflip and random.random() > 0.5:
            img = TF.hflip(img)
            gt1 = TF.hflip(gt1)
            gt2 = TF.hflip(gt2)

        if self.with_random_vflip and random.random() > 0.5:
            img = TF.vflip(img)
            gt1 = TF.vflip(gt1)
            gt2 = TF.vflip(gt2)

        if self.with_random_rot90 and random.random() > 0.5:
            img = TF.rotate(img, 90)
            gt1 = TF.rotate(gt1, 90)
            gt2 = TF.rotate(gt2, 90)

        if self.with_random_rot180 and random.random() > 0.5:
            img = TF.rotate(img, 180)
            gt1 = TF.rotate(gt1, 180)
            gt2 = TF.rotate(gt2, 180)

        if self.with_random_rot270 and random.random() > 0.5:
            img = TF.rotate(img, 270)
            gt1 = TF.rotate(gt1, 270)
            gt2 = TF.rotate(gt2, 270)

        if self.with_color_jittering and random.random() > 0.5:
            img = TF.adjust_hue(img, hue_factor=random.random() * 0.5 -
                                0.25)  # -0.25 ~ +0.25
            img = TF.adjust_saturation(
                img,
                saturation_factor=random.random() * 0.8 + 0.8)  # 0.8 ~ +1.6
            gt1 = TF.adjust_hue(gt1, hue_factor=random.random() * 0.5 -
                                0.25)  # -0.25 ~ +0.25
            gt1 = TF.adjust_saturation(
                gt1,
                saturation_factor=random.random() * 0.8 + 0.8)  # 0.8 ~ +1.6
            gt2 = TF.adjust_hue(gt2, hue_factor=random.random() * 0.5 -
                                0.25)  # -0.25 ~ +0.25
            gt2 = TF.adjust_saturation(
                gt2,
                saturation_factor=random.random() * 0.8 + 0.8)  # 0.8 ~ +1.6

        if self.with_random_crop and random.random() > 0.5:
            i, j, h, w = transforms.RandomResizedCrop(size=self.img_size). \
                get_params(img=img, scale=(0.5, 1.0), ratio=self.crop_ratio)
            img = TF.resized_crop(img,
                                  i,
                                  j,
                                  h,
                                  w,
                                  size=(self.img_size, self.img_size))
            gt1 = TF.resized_crop(gt1,
                                  i,
                                  j,
                                  h,
                                  w,
                                  size=(self.img_size, self.img_size))
            gt2 = TF.resized_crop(gt2,
                                  i,
                                  j,
                                  h,
                                  w,
                                  size=(self.img_size, self.img_size))

        # to tensor
        img = TF.to_tensor(img)
        gt1 = TF.to_tensor(gt1)
        gt2 = TF.to_tensor(gt2)

        return img, gt1, gt2
예제 #11
0
 def __call__(self, sample):
     if np.random.random() < 0.5:
         saturation = np.random.uniform(0.8, 1.2)
         sample['left'] = F.adjust_saturation(sample['left'], saturation)
         sample['right'] = F.adjust_saturation(sample['right'], saturation)
     return sample
예제 #12
0
파일: transform.py 프로젝트: laoyezi/SARNet
def adjust_saturation(image, mask, factor=0.5, p=1):
    if random.random() <= p:
        image = tf.adjust_saturation(image, factor)
    return image, mask
예제 #13
0
    def __call__(self, img, gt=None, batch_size=1):
        """
    Args:
     blob: blob to be transformed.
    """
        #color
        img = TF.adjust_brightness(img, self.TF_params.brightness_factor)
        img = TF.adjust_contrast(img, self.TF_params.contrast_factor)
        img = TF.adjust_gamma(img, self.TF_params.gamma, gain=1)
        img = TF.adjust_hue(img, self.TF_params.hue_factor)
        img = TF.adjust_saturation(img, self.TF_params.saturation_factor)
        #affine
        #here we do not use translate and scale in affine function.
        scale = 1.0
        translate = (0, 0)
        #resample =  PIL.Image.BICUBIC or PIL.Image.NEAREST or PIL.Image.BILINEAR
        img = TF.affine(img,
                        self.TF_params.angle,
                        translate,
                        scale,
                        self.TF_params.shear,
                        PIL.Image.BICUBIC,
                        fillcolor=None)
        if gt is not None:
            gt = TF.affine(gt,
                           self.TF_params.angle,
                           translate,
                           scale,
                           self.TF_params.shear,
                           PIL.Image.NEAREST,
                           fillcolor=255)

        if self.TF_params.hflip:
            img = TF.hflip(img)
            if gt is not None:
                gt = TF.hflip(gt)
        img_crops = []
        if gt is not None:
            gt_crops = []
        else:
            gt_crops = None

        for b in range(batch_size):
            self.getNewRandomCrop(img)
            img_crop = TF.resized_crop(img,
                                       self.crop_tuple[0],
                                       self.crop_tuple[1],
                                       self.crop_tuple[2],
                                       self.crop_tuple[3],
                                       self.TF_params.size,
                                       interpolation=PIL.Image.BICUBIC)
            img_crops.append(np.array(img_crop))
            if gt is not None:
                gt_crop = TF.resized_crop(gt,
                                          self.crop_tuple[0],
                                          self.crop_tuple[1],
                                          self.crop_tuple[2],
                                          self.crop_tuple[3],
                                          self.TF_params.size,
                                          interpolation=PIL.Image.NEAREST)
                gt_crops.append(np.array(gt_crop))

        return img_crops, gt_crops
예제 #14
0
def saturation(im, factor):
    if factor == 1:
        return im
    assert isPIL(im) or isinstance(torch.Tensor), f"Got type {type(im)}."
    return TFF.adjust_saturation(im, factor)
예제 #15
0
 def torchvision_transform(self, img):
     img = torchvision.adjust_brightness(img, 1.5)
     img = torchvision.adjust_contrast(img, 1.5)
     img = torchvision.adjust_saturation(img, 1.5)
     img = torchvision.adjust_hue(img, 0.5)
     return img
예제 #16
0
    def __getitem__(self, index):

        squeen = self.sequeueslists[index]
        image_filenames = [
            join(squeen, x) for x in listdir(squeen) if is_image_file(x)
        ]

        randi = 0
        cropsize = self.crop_size
        hr_scale = Resize((cropsize, cropsize), interpolation=Image.BICUBIC)

        # first image of seq
        imgname = join(squeen, str(randi + 1) + '.jpg')
        hr_image = Image.open(imgname)
        w, h = hr_image.size
        ragey = random.randint(0, h - cropsize)
        rangx = random.randint(0, w - cropsize)

        ################################
        hr_image = self.seq_randomcrop(hr_image, ragey, rangx, cropsize,
                                       cropsize)

        hfp = random.random()
        if hfp < 0.5:
            hr_image = self.randomHflip(hr_image)

        vfp = random.random()
        if vfp < 0.5:
            hr_image = self.randomVflip(hr_image)

        brightness = 0.2
        contrast = 0.2
        saturation = 0.1
        hue = 0.1
        transforms = []

        brightness_factor = np.random.uniform(max(0, 1 - brightness),
                                              1 + brightness)
        transforms.append(
            Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))

        contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast)
        transforms.append(
            Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))

        saturation_factor = np.random.uniform(max(0, 1 - saturation),
                                              1 + saturation)
        transforms.append(
            Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))

        hue_factor = np.random.uniform(-hue, hue)
        transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))

        np.random.shuffle(transforms)
        color_transform = Compose(transforms)

        hr_image = color_transform(hr_image)
        ################################

        hr_image = ToTensor()(hr_image)

        lr_image = self.lr_transform(hr_image)
        # Y channel
        hr_image = torch.unsqueeze(hr_image, dim=0)
        bic_hr = torch.unsqueeze(ToTensor()(hr_scale(lr_image)), dim=0)
        lr_image = torch.unsqueeze(ToTensor()(lr_image), dim=0)

        t0 = lr_image
        t1 = bic_hr
        t2 = hr_image

        for i in range(randi + 1, randi + self.relation):
            imgname = join(squeen, str(i + 1) + '.jpg')
            hr_image = Image.open(imgname)
            # data argument
            hr_image = self.seq_randomcrop(hr_image, rangx, ragey, cropsize,
                                           cropsize)
            if hfp < 0.5:
                hr_image = self.randomHflip(hr_image)

            if vfp < 0.5:
                hr_image = self.randomVflip(hr_image)

            hr_image = color_transform(hr_image)

            hr_image = ToTensor()(hr_image)

            lr_image = self.lr_transform(hr_image)
            # Y channel
            hr_image = torch.unsqueeze(hr_image, dim=0)
            bic_hr = torch.unsqueeze(ToTensor()(hr_scale(lr_image)), dim=0)
            lr_image = torch.unsqueeze(ToTensor()(lr_image), dim=0)

            t0 = torch.cat((t0, lr_image), 0)
            t1 = torch.cat((t1, bic_hr), 0)
            t2 = torch.cat((t2, hr_image), 0)

        return t0, t1, t2
def saturation(img, saturate):
    return ImageEnhance.Color(img).enhance(saturate)
    return F.adjust_saturation(img, saturate_value)
예제 #18
0
 def __call__(self, image):
     if random.random() < self.prob:
         saturation_factor = random.uniform(0.5, 2)
         image = F.adjust_saturation(image, saturation_factor)
     return image
예제 #19
0
def train_trans_alt(image, mask):

    colorjitter_factor = 0.2
    th, tw = args.train_size
    h, w = 512, 1024
    crop_scales = [1.0, 0.8, 0.6, 0.4]

    # Generate random parameters for augmentation
    pflip = np.random.randint(0, 1) > 0.5
    bf = np.random.uniform(1 - colorjitter_factor, 1 + colorjitter_factor)
    cf = np.random.uniform(1 - colorjitter_factor, 1 + colorjitter_factor)
    sf = np.random.uniform(1 - colorjitter_factor, 1 + colorjitter_factor)
    hf = np.random.uniform(-colorjitter_factor, colorjitter_factor)

    # Resize, 1 for Image.LANCZOS
    image = TF.resize(image, (h, w), interpolation=1)
    # Resize, 0 for Image.NEAREST
    mask = TF.resize(mask, (h, w), interpolation=0)

    # Random cropping
    # From PIL to Tensor

    crop_scale = np.random.choice(crop_scales)
    if crop_scale != 1.0:
        image = TF.to_tensor(image)
        mask = TF.to_tensor(mask)
        h, w = args.train_size
        ch, cw = [int(x * crop_scale) for x in (h, w)]
        i = np.random.randint(0, h - ch)
        j = np.random.randint(0, w - cw)
        image = image[:, i:i + ch, j:j + cw]
        mask = mask[:, i:i + ch, j:j + cw]
        image = TF.to_pil_image(image)
        mask = TF.to_pil_image(mask[0, :, :])

    # Resize, 1 for Image.LANCZOS
    image = TF.resize(image, (th, tw), interpolation=1)
    # Resize, 0 for Image.NEAREST
    mask = TF.resize(mask, (th, tw), interpolation=0)

    # H-flip
    if pflip == True and args.hflip == True:
        image = TF.hflip(image)
        mask = TF.hflip(mask)

    #Color jitter
    image = TF.adjust_brightness(image, bf)
    image = TF.adjust_contrast(image, cf)
    image = TF.adjust_saturation(image, sf)
    image = TF.adjust_hue(image, hf)

    # From PIL to Tensor
    image = TF.to_tensor(image)

    # Normalize
    image = TF.normalize(image, args.dataset_mean, args.dataset_std)

    # Convert ids to train_ids
    mask = np.array(mask, np.uint8)  # PIL Image to numpy array
    mask = torch.from_numpy(mask)  # Numpy array to tensor

    return image, mask
예제 #20
0
    def transform_fn(self, image, mask):
        if self.num_classes == 1:
            ### Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape H x W x C to a PIL Image while preserving the value range.
            image = array_to_img(image, data_format="channels_last")
            mask = array_to_img(mask, data_format="channels_last")
            ## Input type float32 is not supported

            ##!!!
            ## the preprocess funcions from Keras are very convenient
            ##!!!

            # Resize
            #resize = transforms.Resize(size=(520, 520))
            #image = resize(image)
            #mask = resize(mask)

            # Random crop
            #i, j, h, w = transforms.RandomCrop.get_params(
            #    image, output_size=(512, 512))
            #image = TF.crop(image, i, j, h, w)
            #mask = TF.crop(mask, i, j, h, w)

            ## https://pytorch.org/docs/stable/torchvision/transforms.html
            ## https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py
            # Random horizontal flipping
            if random.random() > 0.5:
                image = TF.hflip(image)
                mask = TF.hflip(mask)

            # Random vertical flipping
            if random.random() > 0.5:
                image = TF.vflip(image)
                mask = TF.vflip(mask)

            # Random to_grayscale
            # if random.random() > 0.6:
            #     image = TF.to_grayscale(image, num_output_channels=3)

            angle = random.randint(0, 90)
            translate = (random.uniform(0, 100), random.uniform(0, 100))
            scale = random.uniform(0.5, 2)
            shear = random.uniform(-10, 10)
            image = TF.affine(image, angle, translate, scale, shear)
            mask = TF.affine(mask, angle, translate, scale, shear)

            # Random adjust_brightness
            image = TF.adjust_brightness(image,
                                         brightness_factor=random.uniform(
                                             0.8, 1.2))

            # Random adjust_saturation
            image = TF.adjust_saturation(image,
                                         saturation_factor=random.uniform(
                                             0.8, 1.2))

            # Random adjust_hue
            # `hue_factor` is the amount of shift in H channel and must be in the
            #     interval `[-0.5, 0.5]`.
            #image = TF.adjust_hue(image, hue_factor=random.uniform(-0.2, 0.2))

            #image = TF.adjust_gamma(image, gamma=random.uniform(0.8, 1.5), gain=1)

            angle = random.randint(0, 90)
            image = TF.rotate(image, angle)
            mask = TF.rotate(mask, angle)

            # Transform to tensor
            image = img_to_array(image, data_format="channels_last")
            mask = img_to_array(mask, data_format="channels_last")

        else:
            ### Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape H x W x C to a PIL Image while preserving the value range.
            image = array_to_img(image, data_format="channels_last")
            mask_pil_array = [None] * mask.shape[-1]
            for i in range(mask.shape[-1]):
                mask_pil_array[i] = array_to_img(mask[:, :, i, np.newaxis],
                                                 data_format="channels_last")

            ## https://pytorch.org/docs/stable/torchvision/transforms.html
            ## https://github.com/pytorch/vision/blob/master/torchvision/transforms/functional.py
            # Random horizontal flipping
            if random.random() > 0.5:
                image = TF.hflip(image)
                for i in range(mask.shape[-1]):
                    mask_pil_array[i] = TF.hflip(mask_pil_array[i])

            # Random vertical flipping
            if random.random() > 0.5:
                image = TF.vflip(image)
                for i in range(mask.shape[-1]):
                    mask_pil_array[i] = TF.vflip(mask_pil_array[i])

            # Random to_grayscale
            # if random.random() > 0.6:
            #     image = TF.to_grayscale(image, num_output_channels=3)

            angle = random.randint(0, 90)
            translate = (random.uniform(0, 100), random.uniform(0, 100))
            scale = random.uniform(0.5, 2)
            shear = random.uniform(0, 0)
            image = TF.affine(image, angle, translate, scale, shear)
            for i in range(mask.shape[-1]):
                mask_pil_array[i] = TF.affine(mask_pil_array[i], angle,
                                              translate, scale, shear)

            # Random adjust_brightness
            image = TF.adjust_brightness(image,
                                         brightness_factor=random.uniform(
                                             0.8, 1.2))

            # Random adjust_saturation
            image = TF.adjust_saturation(image,
                                         saturation_factor=random.uniform(
                                             0.8, 1.2))

            # Random adjust_hue
            # `hue_factor` is the amount of shift in H channel and must be in the
            #     interval `[-0.5, 0.5]`.
            # image = TF.adjust_hue(image, hue_factor=random.uniform(-0.2, 0.2))

            # image = TF.adjust_gamma(image, gamma=random.uniform(0.8, 1.5), gain=1)

            #angle = random.randint(0, 90)
            #image = TF.rotate(image, angle)
            #for i in range(mask.shape[-1]):
            #    mask_pil_array[i] = TF.rotate(mask_pil_array[i], angle)

            # Transform to tensor
            image = img_to_array(image, data_format="channels_last")
            for i in range(mask.shape[-1]):
                # img_to_array(mask_pil_array[i], data_format="channels_last"): 512, 512, 1
                mask[:, :, i] = img_to_array(
                    mask_pil_array[i],
                    data_format="channels_last")[:, :, 0].astype('uint8')

        ### img_to_array will scale the image to (0,255)
        ### when use img_to_array, the image and mask will in (0,255)
        image = (image / 255.0).astype('float32')
        mask = (mask / 255.0).astype('uint8')
        #print(11)
        return image, mask
    def __getitem__(self, index):
        sequence_path = self.all_sequence_paths[index]
        df = pd.read_csv(
            sequence_path,
            header=None,
            index_col=False,
            names=['path', 'xmin', 'ymin', 'xmax', 'ymax', 'gazex', 'gazey'])
        show_name = sequence_path.split('/')[-3]
        clip = sequence_path.split('/')[-2]
        seq_len = len(df.index)

        # moving-avg smoothing
        window_size = 11  # should be odd number
        df['xmin'] = myutils.smooth_by_conv(window_size, df, 'xmin')
        df['ymin'] = myutils.smooth_by_conv(window_size, df, 'ymin')
        df['xmax'] = myutils.smooth_by_conv(window_size, df, 'xmax')
        df['ymax'] = myutils.smooth_by_conv(window_size, df, 'ymax')

        if not self.test:
            # cond for data augmentation
            cond_jitter = np.random.random_sample()
            cond_flip = np.random.random_sample()
            cond_color = np.random.random_sample()
            if cond_color < 0.5:
                n1 = np.random.uniform(0.5, 1.5)
                n2 = np.random.uniform(0.5, 1.5)
                n3 = np.random.uniform(0.5, 1.5)
            cond_crop = np.random.random_sample()

            # if longer than seq_len_limit, cut it down to the limit with the init index randomly sampled
            if seq_len > self.seq_len_limit:
                sampled_ind = np.random.randint(0,
                                                seq_len - self.seq_len_limit)
                seq_len = self.seq_len_limit
            else:
                sampled_ind = 0

            if cond_crop < 0.5:
                sliced_x_min = df['xmin'].iloc[sampled_ind:sampled_ind +
                                               seq_len]
                sliced_x_max = df['xmax'].iloc[sampled_ind:sampled_ind +
                                               seq_len]
                sliced_y_min = df['ymin'].iloc[sampled_ind:sampled_ind +
                                               seq_len]
                sliced_y_max = df['ymax'].iloc[sampled_ind:sampled_ind +
                                               seq_len]

                sliced_gaze_x = df['gazex'].iloc[sampled_ind:sampled_ind +
                                                 seq_len]
                sliced_gaze_y = df['gazey'].iloc[sampled_ind:sampled_ind +
                                                 seq_len]

                check_sum = sliced_gaze_x.sum() + sliced_gaze_y.sum()
                all_outside = check_sum == -2 * seq_len

                # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target
                if all_outside:
                    crop_x_min = np.min(
                        [sliced_x_min.min(),
                         sliced_x_max.min()])
                    crop_y_min = np.min(
                        [sliced_y_min.min(),
                         sliced_y_max.min()])
                    crop_x_max = np.max(
                        [sliced_x_min.max(),
                         sliced_x_max.max()])
                    crop_y_max = np.max(
                        [sliced_y_min.max(),
                         sliced_y_max.max()])
                else:
                    crop_x_min = np.min([
                        sliced_gaze_x.min(),
                        sliced_x_min.min(),
                        sliced_x_max.min()
                    ])
                    crop_y_min = np.min([
                        sliced_gaze_y.min(),
                        sliced_y_min.min(),
                        sliced_y_max.min()
                    ])
                    crop_x_max = np.max([
                        sliced_gaze_x.max(),
                        sliced_x_min.max(),
                        sliced_x_max.max()
                    ])
                    crop_y_max = np.max([
                        sliced_gaze_y.max(),
                        sliced_y_min.max(),
                        sliced_y_max.max()
                    ])

                # Randomly select a random top left corner
                if crop_x_min >= 0:
                    crop_x_min = np.random.uniform(0, crop_x_min)
                if crop_y_min >= 0:
                    crop_y_min = np.random.uniform(0, crop_y_min)

                # Get image size
                path = os.path.join(self.data_dir, show_name, clip,
                                    df['path'].iloc[0])
                img = Image.open(path)
                img = img.convert('RGB')
                width, height = img.size

                # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min)
                crop_width_min = crop_x_max - crop_x_min
                crop_height_min = crop_y_max - crop_y_min
                crop_width_max = width - crop_x_min
                crop_height_max = height - crop_y_min
                # Randomly select a width and a height
                crop_width = np.random.uniform(crop_width_min, crop_width_max)
                crop_height = np.random.uniform(crop_height_min,
                                                crop_height_max)
        else:
            sampled_ind = 0


        faces, images, head_channels, heatmaps, paths, gazes, imsizes, gaze_inouts = [], [], [], [], [], [], [], []
        index_tracker = -1
        for i, row in df.iterrows():
            index_tracker = index_tracker + 1
            if not self.test:
                if index_tracker < sampled_ind or index_tracker >= (
                        sampled_ind + self.seq_len_limit):
                    continue

            face_x1 = row['xmin']  # note: Already in image coordinates
            face_y1 = row['ymin']  # note: Already in image coordinates
            face_x2 = row['xmax']  # note: Already in image coordinates
            face_y2 = row['ymax']  # note: Already in image coordinates
            gaze_x = row['gazex']  # note: Already in image coordinates
            gaze_y = row['gazey']  # note: Already in image coordinates

            impath = os.path.join(self.data_dir, show_name, clip, row['path'])
            img = Image.open(impath)
            img = img.convert('RGB')

            width, height = img.size
            imsize = torch.FloatTensor([width, height])
            # imsizes.append(imsize)

            face_x1, face_y1, face_x2, face_y2 = map(
                float, [face_x1, face_y1, face_x2, face_y2])
            gaze_x, gaze_y = map(float, [gaze_x, gaze_y])
            if gaze_x == -1 and gaze_y == -1:
                gaze_inside = False
            else:
                if gaze_x < 0:  # move gaze point that was sliglty outside the image back in
                    gaze_x = 0
                if gaze_y < 0:
                    gaze_y = 0
                gaze_inside = True

            if not self.test:
                ## data augmentation
                # Jitter (expansion-only) bounding box size.
                if cond_jitter < 0.5:
                    k = cond_jitter * 0.1
                    face_x1 -= k * abs(face_x2 - face_x1)
                    face_y1 -= k * abs(face_y2 - face_y1)
                    face_x2 += k * abs(face_x2 - face_x1)
                    face_y2 += k * abs(face_y2 - face_y1)
                    face_x1 = np.clip(face_x1, 0, width)
                    face_x2 = np.clip(face_x2, 0, width)
                    face_y1 = np.clip(face_y1, 0, height)
                    face_y2 = np.clip(face_y2, 0, height)

                # Random Crop
                if cond_crop < 0.5:
                    # Crop it
                    img = TF.crop(img, crop_y_min, crop_x_min, crop_height,
                                  crop_width)

                    # Record the crop's (x, y) offset
                    offset_x, offset_y = crop_x_min, crop_y_min

                    # convert coordinates into the cropped frame
                    face_x1, face_y1, face_x2, face_y2 = face_x1 - offset_x, face_y1 - offset_y, face_x2 - offset_x, face_y2 - offset_y
                    if gaze_inside:
                        gaze_x, gaze_y = (gaze_x- offset_x), \
                                         (gaze_y - offset_y)
                    else:
                        gaze_x = -1
                        gaze_y = -1

                    width, height = crop_width, crop_height

                # Flip?
                if cond_flip < 0.5:
                    img = img.transpose(Image.FLIP_LEFT_RIGHT)
                    x_max_2 = width - face_x1
                    x_min_2 = width - face_x2
                    face_x2 = x_max_2
                    face_x1 = x_min_2
                    if gaze_x != -1 and gaze_y != -1:
                        gaze_x = width - gaze_x

                # Random color change
                if cond_color < 0.5:
                    img = TF.adjust_brightness(img, brightness_factor=n1)
                    img = TF.adjust_contrast(img, contrast_factor=n2)
                    img = TF.adjust_saturation(img, saturation_factor=n3)

            # Face crop
            face = img.copy().crop(
                (int(face_x1), int(face_y1), int(face_x2), int(face_y2)))

            # Head channel image
            head_channel = imutils.get_head_box_channel(
                face_x1,
                face_y1,
                face_x2,
                face_y2,
                width,
                height,
                resolution=self.input_size,
                coordconv=False).unsqueeze(0)
            if self.transform is not None:
                img = self.transform(img)
                face = self.transform(face)

            # Deconv output
            if gaze_inside:
                gaze_x /= float(width)  # fractional gaze
                gaze_y /= float(height)
                gaze_heatmap = torch.zeros(
                    self.output_size,
                    self.output_size)  # set the size of the output
                gaze_map = imutils.draw_labelmap(
                    gaze_heatmap,
                    [gaze_x * self.output_size, gaze_y * self.output_size],
                    3,
                    type='Gaussian')
                gazes.append(torch.FloatTensor([gaze_x, gaze_y]))
            else:
                gaze_map = torch.zeros(self.output_size, self.output_size)
                gazes.append(torch.FloatTensor([-1, -1]))
            faces.append(face)
            images.append(img)
            head_channels.append(head_channel)
            heatmaps.append(gaze_map)
            gaze_inouts.append(torch.FloatTensor([int(gaze_inside)]))

        if self.imshow:
            for i in range(len(faces)):
                fig = plt.figure(111)
                img = 255 - imutils.unnorm(images[i].numpy()) * 255
                img = np.clip(img, 0, 255)
                plt.imshow(np.transpose(img, (1, 2, 0)))
                plt.imshow(imresize(heatmaps[i],
                                    (self.input_size, self.input_size)),
                           cmap='jet',
                           alpha=0.3)
                plt.imshow(imresize(1 - head_channels[i].squeeze(0),
                                    (self.input_size, self.input_size)),
                           alpha=0.2)
                plt.savefig(
                    os.path.join('debug',
                                 'viz_%d_inout=%d.png' % (i, gaze_inouts[i])))
                plt.close('all')

        faces = torch.stack(faces)
        images = torch.stack(images)
        head_channels = torch.stack(head_channels)
        heatmaps = torch.stack(heatmaps)
        gazes = torch.stack(gazes)
        gaze_inouts = torch.stack(gaze_inouts)
        # imsizes = torch.stack(imsizes)
        # print(faces.shape, images.shape, head_channels.shape, heatmaps.shape)

        if self.test:
            return images, faces, head_channels, heatmaps, gazes, gaze_inouts
        else:  # train
            return images, faces, head_channels, heatmaps, gaze_inouts
예제 #22
0
def Adjust_saturation(image):
    return F.adjust_saturation(image, 2)
    def __getitem__(self, index):
        if self.test:
            g = self.X_test.get_group(self.keys[index])
            cont_gaze = []
            for i, row in g.iterrows():
                path = row['path']
                x_min = row['bbox_x_min']
                y_min = row['bbox_y_min']
                x_max = row['bbox_x_max']
                y_max = row['bbox_y_max']
                eye_x = row['eye_x']
                eye_y = row['eye_y']
                gaze_x = row['gaze_x']
                gaze_y = row['gaze_y']
                cont_gaze.append([gaze_x, gaze_y
                                  ])  # all ground truth gaze are stacked up
            for j in range(len(cont_gaze), 20):
                cont_gaze.append(
                    [-1,
                     -1])  # pad dummy gaze to match size for batch processing
            cont_gaze = torch.FloatTensor(cont_gaze)
            gaze_inside = True  # always consider test samples as inside

        else:
            path = self.X_train.iloc[index]
            eye_x, eye_y, gaze_x, gaze_y = self.y_train.iloc[index]
            gaze_inside = True  # bool(inout)

        img = Image.open(os.path.join(self.data_dir, path))
        img = img.convert('RGB')
        width, height = img.size
        # print('gaze coords: ', type(gaze_x), type(gaze_y), gaze_x, gaze_y)
        # print('eye coords: ', type(eye_x), type(eye_y), eye_x, eye_y)
        # expand face bbox a bit
        k = 0.1
        x_min = (eye_x - 0.15) * width
        y_min = (eye_y - 0.15) * height
        x_max = (eye_x + 0.15) * width
        y_max = (eye_y + 0.15) * height
        if x_min < 0:
            x_min = 0
        if y_min < 0:
            y_min = 0
        if x_max < 0:
            x_max = 0
        if y_max < 0:
            y_max = 0
        x_min -= k * abs(x_max - x_min)
        y_min -= k * abs(y_max - y_min)
        x_max += k * abs(x_max - x_min)
        y_max += k * abs(y_max - y_min)

        # x_min = eye_x - 0.15
        # y_min = eye_y - 0.15
        # x_max = eye_x + 0.15
        # y_max = eye_y + 0.15
        # if x_min < 0:
        #     x_min = 0
        # if y_min < 0:
        #     y_min = 0
        # if x_max < 0:
        #     x_max = 0
        # if y_max < 0:
        #     y_max = 0

        # print('bbx',  [x_min, y_min, x_max, y_max])

        x_min, y_min, x_max, y_max = map(float, [x_min, y_min, x_max, y_max])
        # print(x_min, y_min, x_max, y_max)
        if self.imshow:
            img.save("origin_img.jpg")

        if self.test:
            imsize = torch.IntTensor([width, height])
        else:
            ## data augmentation

            # Jitter (expansion-only) bounding box size
            if np.random.random_sample() <= 0.5:
                k = np.random.random_sample() * 0.2
                x_min -= k * abs(x_max - x_min)
                y_min -= k * abs(y_max - y_min)
                x_max += k * abs(x_max - x_min)
                y_max += k * abs(y_max - y_min)

            # Random Crop
            if np.random.random_sample() <= 0.5:
                # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target
                crop_x_min = np.min([gaze_x * width, x_min, x_max])
                crop_y_min = np.min([gaze_y * height, y_min, y_max])
                crop_x_max = np.max([gaze_x * width, x_min, x_max])
                crop_y_max = np.max([gaze_y * height, y_min, y_max])

                # Randomly select a random top left corner
                if crop_x_min >= 0:
                    crop_x_min = np.random.uniform(0, crop_x_min)
                if crop_y_min >= 0:
                    crop_y_min = np.random.uniform(0, crop_y_min)

                # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min)
                crop_width_min = crop_x_max - crop_x_min
                crop_height_min = crop_y_max - crop_y_min
                crop_width_max = width - crop_x_min
                crop_height_max = height - crop_y_min
                # Randomly select a width and a height
                crop_width = np.random.uniform(crop_width_min, crop_width_max)
                crop_height = np.random.uniform(crop_height_min,
                                                crop_height_max)

                # Crop it
                img = TF.crop(img, crop_y_min, crop_x_min, crop_height,
                              crop_width)

                # Record the crop's (x, y) offset
                offset_x, offset_y = crop_x_min, crop_y_min

                # convert coordinates into the cropped frame
                x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y
                # if gaze_inside:
                gaze_x, gaze_y = (gaze_x * width - offset_x) / float(crop_width), \
                                 (gaze_y * height - offset_y) / float(crop_height)
                # else:
                #     gaze_x = -1; gaze_y = -1

                width, height = crop_width, crop_height

            # Random flip
            if np.random.random_sample() <= 0.5:
                img = img.transpose(Image.FLIP_LEFT_RIGHT)
                x_max_2 = width - x_min
                x_min_2 = width - x_max
                x_max = x_max_2
                x_min = x_min_2
                gaze_x = 1 - gaze_x

            # Random color change
            if np.random.random_sample() <= 0.5:
                img = TF.adjust_brightness(img,
                                           brightness_factor=np.random.uniform(
                                               0.5, 1.5))
                img = TF.adjust_contrast(img,
                                         contrast_factor=np.random.uniform(
                                             0.5, 1.5))
                img = TF.adjust_saturation(img,
                                           saturation_factor=np.random.uniform(
                                               0, 1.5))
        # print('bbx2',  [x_min, y_min, x_max, y_max])

        head_channel = imutils.get_head_box_channel(
            x_min,
            y_min,
            x_max,
            y_max,
            width,
            height,
            resolution=self.input_size,
            coordconv=False).unsqueeze(0)

        # Crop the face
        face = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        if self.imshow:
            img.save("img_aug.jpg")
            face.save('face_aug.jpg')

        if self.transform is not None:
            img = self.transform(img)
            face = self.transform(face)
        # print('imsize2', img.size())

        # generate the heat map used for deconv prediction
        gaze_heatmap = torch.zeros(
            self.output_size, self.output_size)  # set the size of the output
        # print([gaze_x * self.output_size, gaze_y * self.output_size])
        # print(self.output_size)
        if self.test:  # aggregated heatmap
            num_valid = 0
            for gaze_x, gaze_y in cont_gaze:
                if gaze_x != -1:
                    num_valid += 1
                    gaze_heatmap = imutils.draw_labelmap(
                        gaze_heatmap,
                        [gaze_x * self.output_size, gaze_y * self.output_size],
                        3,
                        type='Gaussian')
            gaze_heatmap /= num_valid
        else:
            # if gaze_inside:
            gaze_heatmap = imutils.draw_labelmap(
                gaze_heatmap,
                [gaze_x * self.output_size, gaze_y * self.output_size],
                3,
                type='Gaussian')

        if self.imshow:
            fig = plt.figure(111)
            img = 255 - imutils.unnorm(img.numpy()) * 255
            img = np.clip(img, 0, 255)
            plt.imshow(np.transpose(img, (1, 2, 0)))
            plt.imshow(imresize(gaze_heatmap,
                                (self.input_size, self.input_size)),
                       cmap='jet',
                       alpha=0.3)
            plt.imshow(imresize(1 - head_channel.squeeze(0),
                                (self.input_size, self.input_size)),
                       alpha=0.2)
            plt.savefig('viz_aug.png')

        if self.test:
            return img, face, head_channel, gaze_heatmap, cont_gaze, imsize, path
        else:
            return img, face, head_channel, gaze_heatmap, path, gaze_inside
예제 #24
0
    def __getitem__(self, idx):

        gaze_inside = True
        data = self.data[idx]
        image_path = data['filename']
        image_path = os.path.join(self.root_dir, image_path)
        #print(image_path)

        eye = [float(data['hx']) / 640, float(data['hy']) / 480]
        gaze = [float(data['gaze_cx']) / 640, float(data['gaze_cy']) / 480]

        image_path = image_path.replace('\\', '/')
        img = Image.open(image_path)
        img = img.convert('RGB')
        width, height = img.size
        gaze_x, gaze_y = gaze
        eye_x, eye_y = eye

        #Get bounding boxes and class labels as well as gt index for gazed object
        gt_bboxes, gt_labels = np.zeros(1), np.zeros(1)
        gt_labels = np.expand_dims(gt_labels, axis=0)
        gaze_idx = np.copy(data['gazeIdx']).astype(
            np.int64)  #index of gazed object
        gaze_class = np.copy(data['gaze_item']).astype(
            np.int64)  #class of gazed object
        if self.use_gtbox:
            gt_bboxes = np.copy(data['ann']['bboxes']) / [640, 480, 640, 480]
            gt_labels = np.copy(data['ann']['labels'])

            gtbox = gt_bboxes[gaze_idx]

        k = 0.1
        x_min = (eye_x - 0.15) * width
        y_min = (eye_y - 0.15) * height
        x_max = (eye_x + 0.15) * width
        y_max = (eye_y + 0.15) * height
        if x_min < 0:
            x_min = 0
        if y_min < 0:
            y_min = 0
        if x_max < 0:
            x_max = 0
        if y_max < 0:
            y_max = 0
        x_min -= k * abs(x_max - x_min)
        y_min -= k * abs(y_max - y_min)
        x_max += k * abs(x_max - x_min)
        y_max += k * abs(y_max - y_min)
        x_min, y_min, x_max, y_max = map(float, [x_min, y_min, x_max, y_max])

        if self.imshow:
            img.save("origin_img.jpg")

        if self.training == 'test':
            imsize = torch.IntTensor([width, height])
        else:
            ## data augmentation

            # Jitter (expansion-only) bounding box size
            if np.random.random_sample() <= 0.5:
                k = np.random.random_sample() * 0.2
                x_min -= k * abs(x_max - x_min)
                y_min -= k * abs(y_max - y_min)
                x_max += k * abs(x_max - x_min)
                y_max += k * abs(y_max - y_min)

            # Random Crop
            if np.random.random_sample() <= 0.5:
                # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target
                crop_x_min = np.min([gaze_x * width, x_min, x_max])
                crop_y_min = np.min([gaze_y * height, y_min, y_max])
                crop_x_max = np.max([gaze_x * width, x_min, x_max])
                crop_y_max = np.max([gaze_y * height, y_min, y_max])

                # Randomly select a random top left corner
                if crop_x_min >= 0:
                    crop_x_min = np.random.uniform(0, crop_x_min)
                if crop_y_min >= 0:
                    crop_y_min = np.random.uniform(0, crop_y_min)

                # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min)
                crop_width_min = crop_x_max - crop_x_min
                crop_height_min = crop_y_max - crop_y_min
                crop_width_max = width - crop_x_min
                crop_height_max = height - crop_y_min
                # Randomly select a width and a height
                crop_width = np.random.uniform(crop_width_min, crop_width_max)
                crop_height = np.random.uniform(crop_height_min,
                                                crop_height_max)

                # Crop it
                img = TF.crop(img, crop_y_min, crop_x_min, crop_height,
                              crop_width)

                # Record the crop's (x, y) offset
                offset_x, offset_y = crop_x_min, crop_y_min

                # convert coordinates into the cropped frame
                x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y
                # if gaze_inside:
                gaze_x, gaze_y = (gaze_x * width - offset_x) / float(crop_width), \
                                 (gaze_y * height - offset_y) / float(crop_height)
                # else:
                #     gaze_x = -1; gaze_y = -1

                width, height = crop_width, crop_height

            # Random flip
            if np.random.random_sample() <= 0.5:
                img = img.transpose(Image.FLIP_LEFT_RIGHT)
                x_max_2 = width - x_min
                x_min_2 = width - x_max
                x_max = x_max_2
                x_min = x_min_2
                gaze_x = 1 - gaze_x

            # Random color change
            if np.random.random_sample() <= 0.5:
                img = TF.adjust_brightness(img,
                                           brightness_factor=np.random.uniform(
                                               0.5, 1.5))
                img = TF.adjust_contrast(img,
                                         contrast_factor=np.random.uniform(
                                             0.5, 1.5))
                img = TF.adjust_saturation(img,
                                           saturation_factor=np.random.uniform(
                                               0, 1.5))

            # Random flip
            if np.random.random_sample() <= 0.5:
                img = img.transpose(Image.FLIP_LEFT_RIGHT)
                x_max_2 = width - x_min
                x_min_2 = width - x_max
                x_max = x_max_2
                x_min = x_min_2
                gaze_x = 1 - gaze_x

            # Random color change
            if np.random.random_sample() <= 0.5:
                img = TF.adjust_brightness(img,
                                           brightness_factor=np.random.uniform(
                                               0.5, 1.5))
                img = TF.adjust_contrast(img,
                                         contrast_factor=np.random.uniform(
                                             0.5, 1.5))
                img = TF.adjust_saturation(img,
                                           saturation_factor=np.random.uniform(
                                               0, 1.5))
        # print('bbx2',  [x_min, y_min, x_max, y_max])

        head_channel = chong_imutils.get_head_box_channel(
            x_min,
            y_min,
            x_max,
            y_max,
            width,
            height,
            resolution=self.input_size,
            coordconv=False).unsqueeze(0)

        # Crop the face
        face = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        if self.imshow:
            img.save("img_aug.jpg")
            face.save('face_aug.jpg')

        if self.transform is not None:
            img = self.transform(img)
            face = self.transform(face)

        # generate the heat map used for deconv prediction
        gaze_heatmap = torch.zeros(
            self.output_size, self.output_size)  # set the size of the output
        if self.training == 'test':  # aggregated heatmap
            gaze_heatmap = chong_imutils.draw_labelmap(
                gaze_heatmap,
                [gaze_x * self.output_size, gaze_y * self.output_size],
                3,
                type='Gaussian')

        else:
            # if gaze_inside:
            gaze_heatmap = chong_imutils.draw_labelmap(
                gaze_heatmap,
                [gaze_x * self.output_size, gaze_y * self.output_size],
                3,
                type='Gaussian')

        if self.training == 'test' and self.use_gtbox:
            return img, face, head_channel, eye, gaze_heatmap, gaze, gaze_inside, image_path, gtbox
        elif self.training == 'test':
            return img, face, head_channel, eye, gaze_heatmap, gaze, gaze_inside, image_path
        else:
            return img, face, head_channel, gaze_heatmap, image_path, gaze_inside
예제 #25
0
 def __call__(self, img: Image.Image):
     return TF.adjust_saturation(img, self.params)
예제 #26
0
 def torchvision(self, img):
     img = torchvision.adjust_hue(img, hue_factor=0.1)
     img = torchvision.adjust_saturation(img, saturation_factor=1.2)
     img = torchvision.adjust_brightness(img, brightness_factor=1.2)
     return img
예제 #27
0
    def __call__(self, inputs):

        img1 = inputs[0]
        img2 = inputs[1]
        depth = inputs[2]
        phase = inputs[3]
        fb = inputs[4]

        h = img1.height
        w = img1.width
        w0 = w

        if self.size == [-1]:
            divisor = 32.0
            h = int(math.ceil(h / divisor) * divisor)
            w = int(math.ceil(w / divisor) * divisor)
            self.size = (h, w)

        scale_transform = transforms.Compose(
            [transforms.Resize(self.size, Image.BICUBIC)])

        img1 = scale_transform(img1)
        if img2 is not None:
            img2 = scale_transform(img2)

        if fb is not None:
            scale = float(self.size[1]) / float(w0)
            fb = fb * scale
        if phase == 'test':
            return img1, img2, depth, fb

        if not self.size == 0:

            if depth is not None:
                arr_depth = np.array(depth, dtype=np.float32)
                arr_depth /= 65535.0  # cm->m, /10

                arr_depth[arr_depth < 0.0] = 0.0
                depth = Image.fromarray(arr_depth, 'F')

        if depth is not None:
            scale_transform_d = transforms.Compose(
                [transforms.Resize(self.size, Image.BICUBIC)])
            depth = scale_transform_d(depth)

        if self.flip and not (img2 is not None and depth is not None):

            flip_prob = random.random()
            flip_transform = transforms.Compose(
                [RandomHorizontalFlip(flip_prob)])
            if img2 is None:
                img1 = flip_transform(img1)
            else:
                if flip_prob < 0.5:
                    img1_ = img1
                    img2_ = img2
                    img1 = flip_transform(img2_)
                    img2 = flip_transform(img1_)
            if depth is not None:
                depth = flip_transform(depth)

        if self.rotation and not (img2 is not None and depth is not None):
            if random.random() < 0.5:
                degree = random.randrange(-500, 500) / 100
                img1 = F.rotate(img1, degree, Image.BICUBIC)
                if depth is not None:
                    depth = F.rotate(depth, degree, Image.BILINEAR)
                if img2 is not None:
                    img2 = F.rotate(img2, degree, Image.BICUBIC)
        if depth is not None:
            depth = np.array(depth, dtype=np.float32)
            depth = depth * 2.0
            depth -= 1.0

        if self.augment:
            if random.random() < 0.5:

                brightness = random.uniform(0.8, 1.0)
                contrast = random.uniform(0.8, 1.0)
                saturation = random.uniform(0.8, 1.0)

                img1 = F.adjust_brightness(img1, brightness)
                img1 = F.adjust_contrast(img1, contrast)
                img1 = F.adjust_saturation(img1, saturation)

                if img2 is not None:
                    img2 = F.adjust_brightness(img2, brightness)
                    img2 = F.adjust_contrast(img2, contrast)
                    img2 = F.adjust_saturation(img2, saturation)
        return img1, img2, depth, fb
예제 #28
0
    def __getitem__(self, index):
        index_ = index % self.sizex
        ps = self.ps

        inp_path = self.inp_filenames[index_]
        tar_path = self.tar_filenames[index_]

        inp_img = Image.open(inp_path)
        tar_img = Image.open(tar_path)

        w,h = tar_img.size
        padw = ps-w if w<ps else 0
        padh = ps-h if h<ps else 0

        # Reflect Pad in case image is smaller than patch_size
        if padw!=0 or padh!=0:
            inp_img = TF.pad(inp_img, (0,0,padw,padh), padding_mode='reflect')
            tar_img = TF.pad(tar_img, (0,0,padw,padh), padding_mode='reflect')

        aug    = random.randint(0, 2)
        if aug == 1:
            inp_img = TF.adjust_gamma(inp_img, 1)
            tar_img = TF.adjust_gamma(tar_img, 1)

        aug    = random.randint(0, 2)
        if aug == 1:
            sat_factor = 1 + (0.2 - 0.4*np.random.rand())
            inp_img = TF.adjust_saturation(inp_img, sat_factor)
            tar_img = TF.adjust_saturation(tar_img, sat_factor)

        inp_img = TF.to_tensor(inp_img)
        tar_img = TF.to_tensor(tar_img)

        hh, ww = tar_img.shape[1], tar_img.shape[2]

        rr     = random.randint(0, hh-ps)
        cc     = random.randint(0, ww-ps)
        aug    = random.randint(0, 8)

        # Crop patch
        inp_img = inp_img[:, rr:rr+ps, cc:cc+ps]
        tar_img = tar_img[:, rr:rr+ps, cc:cc+ps]

        # Data Augmentations
        if aug==1:
            inp_img = inp_img.flip(1)
            tar_img = tar_img.flip(1)
        elif aug==2:
            inp_img = inp_img.flip(2)
            tar_img = tar_img.flip(2)
        elif aug==3:
            inp_img = torch.rot90(inp_img,dims=(1,2))
            tar_img = torch.rot90(tar_img,dims=(1,2))
        elif aug==4:
            inp_img = torch.rot90(inp_img,dims=(1,2), k=2)
            tar_img = torch.rot90(tar_img,dims=(1,2), k=2)
        elif aug==5:
            inp_img = torch.rot90(inp_img,dims=(1,2), k=3)
            tar_img = torch.rot90(tar_img,dims=(1,2), k=3)
        elif aug==6:
            inp_img = torch.rot90(inp_img.flip(1),dims=(1,2))
            tar_img = torch.rot90(tar_img.flip(1),dims=(1,2))
        elif aug==7:
            inp_img = torch.rot90(inp_img.flip(2),dims=(1,2))
            tar_img = torch.rot90(tar_img.flip(2),dims=(1,2))
        
        filename = os.path.splitext(os.path.split(tar_path)[-1])[0]

        return tar_img, inp_img, filename
예제 #29
0
def augment_batch(images: torch.Tensor, p: float) -> torch.Tensor:
    warnings.warn("augment_batch is deprecated", DeprecationWarning)
    batch_size, channels, h_orig, w_orig = images.size()
    images = pad(images,
                 padding=(w_orig - 1, h_orig - 1, w_orig - 1, h_orig - 1),
                 padding_mode='reflect')
    batch_size, channels, h, w = images.size()
    mask = (torch.rand(batch_size) < p).logical_and(
        torch.rand(batch_size) < 0.5)
    images[mask] = hflip(images[mask])
    output_images = images.new_zeros((batch_size, channels, h_orig, w_orig))

    translate = (0, 0)
    angle_step = choice([0, 1, 2, 3])
    angle = -90 * angle_step

    scale_iso_mask = torch.rand(batch_size) < p
    scale_iso = lognormal(0, 0.2 * math.log(2))
    scale = (scale_iso, scale_iso)

    p_rot = 1 - math.sqrt(1 - p)
    rot_mask = torch.rand(batch_size) < p_rot
    theta = uniform(-180, 180)
    angle += theta

    scale_mask = torch.rand(batch_size) < p
    scale_factor = lognormal(0, 0.2 * math.log(2))
    scale_x, scale_y = scale
    scale = (scale_x * scale_factor, scale_y / scale_factor)
    new_size = (int(h * scale[0]), int(w * scale[1]))

    if torch.any(rot_mask):
        affine_transformed = affine(images[rot_mask],
                                    angle=angle,
                                    translate=list(translate),
                                    shear=[0., 0.],
                                    scale=1)
        images[rot_mask] = affine_transformed

    resize_mask = scale_iso_mask.logical_and(scale_mask)
    resized_images = resize(images[resize_mask], list(new_size))
    output_images[resize_mask.logical_not()] = center_crop(
        images[resize_mask.logical_not()], (h_orig, w_orig))
    output_images[resize_mask] = center_crop(resized_images, (h_orig, w_orig))

    images = output_images

    mask = torch.rand(batch_size) < p
    brightness = normal(1, 0.2)
    images[mask] = adjust_brightness(images[mask], brightness)

    mask = torch.rand(batch_size) < p
    contrast = lognormal(0, (0.5 * math.log(2)))
    images[mask] = adjust_contrast(images[mask], contrast)

    mask = torch.rand(batch_size) < p
    image_data = rgb_to_ycbcr(images[mask])
    image_data[..., 0, :, :] = (1 - image_data[..., 0, :, :])
    images[mask] = ycbcr_to_rgb(image_data)

    mask = torch.rand(batch_size) < p
    if torch.any(mask):
        hue_factor = uniform(-0.5, 0.5)
        images[mask] = adjust_hue(images[mask], hue_factor)

    mask = torch.rand(batch_size) < p
    saturation = lognormal(0, math.log(2))
    images[mask] = adjust_saturation(images[mask], saturation)

    mask = torch.rand(batch_size) < p
    std_dev = abs(normal(0, 0.1))
    noise_images = torch.randn_like(images[mask]) * std_dev
    images[mask] += noise_images.clamp(0, 1)

    return images
예제 #30
0
def batch_satuation(batch, factor):
    for i in range(batch.size(0)):
        img = TF.adjust_saturation(toimg(batch[i]), factor[i])
        batch[i] = totensor(img)
    return batch
예제 #31
0
 def __call__(self, img):
     saturation = np.random.uniform(self.low, self.high)
     img = functional.adjust_saturation(img, saturation)
     return img
예제 #32
0
 def __call__(self, imgs):
     return [
         F.adjust_saturation(img=img,
                             saturation_factor=self.saturation_factor)
         for img in imgs
     ]
 def __call__(self, x):
     x = TF.adjust_gamma(x, gamma=1)
     x = TF.adjust_saturation(x, self.saturation_factor)
     x = TF.adjust_brightness(x, self.brightness_factor)
     x = TF.adjust_contrast(x, self.contrast_factor)
     return x