Beispiel #1
0
 def torchvision_transform(self, img):
     img = torchvision.crop(img, top=0, left=0, height=64, width=64)
     return torchvision.resize(img, (512, 512))
 def __call__(self, image):
     size = self.get_size(image.size)
     image = F.resize(image, size)
     # image = F.resize(image, (512, 512))
     # image = F.resize(image, 448)
     return image
Beispiel #3
0
def _(x: Image.Image, size: Tuple[int, ...], **kwargs) -> Image.Image:
    return F.resize(x, size, **kwargs)
while (True):
    try:
        ret, frame = cap.read()
        if ret == False:
            break
        if args.vive != "no":
            frame = undistort(frame)
        cv_img = cv2.resize(frame, (320, 320))
    except:
        break

    if args.demo != "no":
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_frame = Image.fromarray(frame)
        pil_frame = TF.resize(pil_frame, (320, 320))
        if args.gray != "no":
            pil_frame = TF.to_grayscale(pil_frame, num_output_channels=3)
        img = TF.to_tensor(pil_frame).unsqueeze(0)

        t0 = time.time()
        output = model(img.cuda())
        avg_inference += (time.time() - t0)
        cnt += 1

        zero = torch.zeros((1, 320, 320), requires_grad=False)
        one = torch.ones((1, 320, 320), requires_grad=False)

        front_mask = torch.sigmoid(output.cpu()).squeeze(0)
        output_mask = torch.where(front_mask > 0.5, one, zero)
        mask = TF.to_pil_image(output_mask)
Beispiel #5
0
from PIL import Image
import numpy as np
import torch
import torchvision.transforms.functional as TF

img = Image.open("sample.jpg")
img = TF.resize(img, (256, 256))
img_tensor = TF.to_tensor(img)

# shift operation (x: +50, y: +100)
theta = np.array([[1, 0, 50], [0, 1, 100]])
t1 = theta[:, [0, 1]]
t2 = theta[:, [2]]

shifted_tensor = torch.zeros_like(img_tensor)
for x in range(img_tensor.size(1)):
    for y in range(img_tensor.size(2)):
        pos = np.array([[x], [y]])
        npos = t1 @ pos + t2
        nx, ny = npos[0][0], npos[1][0]
        if 0 <= nx < img_tensor.size(1) and 0 <= ny < img_tensor.size(2):
            shifted_tensor[:, nx, ny] = img_tensor[:, x, y]
shifted_img = TF.to_pil_image(shifted_tensor)

# scaling operation (x2)
theta = np.array([[2, 0, 0], [0, 2, 0]])
t1 = theta[:, [0, 1]]
t2 = theta[:, [2]]

shifted_tensor = torch.zeros_like(img_tensor)
for x in range(img_tensor.size(1)):
Beispiel #6
0
def run_eval(args):

    print('running evaluation...')

    if args.save_output:
        if os.path.exists(args.output_dir) is False:
            os.mkdir(args.output_dir)

    running_psnr = []
    running_ssim = []

    if args.dataset == 'rain100h':
        datadir = r'./datasets/Rain100H/val'
        val_dirs = glob.glob(os.path.join(datadir, 'norain-*.png'))
    elif args.dataset == 'rain100l':
        datadir = r'./datasets/Rain100L/val'
        val_dirs = glob.glob(os.path.join(datadir, '*x2.png'))
    elif args.dataset == 'rain800':
        datadir = r'./datasets/Rain800/val'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))
    elif args.dataset == 'rain800-real':
        datadir = r'./datasets/Rain800/test_nature'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))
    elif args.dataset == 'did-mdn-test1':
        datadir = r'./datasets/DID-MDN/val'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))
    elif args.dataset == 'did-mdn-test2':
        datadir = r'./datasets/DID-MDN/testing_fu'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))
    elif args.dataset == 'rain1400':
        datadir = r'./datasets/Rain1400/val/rainy_image'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))

    for idx in range(len(val_dirs)):

        this_dir = val_dirs[idx]

        if args.dataset == 'rain100h':
            gt = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB)
            img_mix = cv2.imread(val_dirs[idx].replace('norain', 'rain'),
                                 cv2.IMREAD_COLOR)
            img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB)
        elif args.dataset == 'rain100l':
            img_mix = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB)
            gt = cv2.imread(val_dirs[idx].replace('x2.png', '.png'),
                            cv2.IMREAD_COLOR)
            gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB)
        elif args.dataset == 'rain800':
            img = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w, c = img.shape
            gt = img[:, 0:int(w / 2), :]
            img_mix = img[:, int(w / 2):, :]
        elif args.dataset == 'rain800-real':
            img = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w, c = img.shape
            gt = img[:, 0:int(w / 2), :]
            img_mix = img[:, int(w / 2):, :]
        elif args.dataset == 'did-mdn-test1':
            img = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w, c = img.shape
            img_mix = img[:, 0:int(w / 2), :]
            gt = img[:, int(w / 2):, :]
        elif args.dataset == 'did-mdn-test2':
            img = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w, c = img.shape
            gt = img[:, 0:int(w / 2), :]
            img_mix = img[:, int(w / 2):, :]
        elif args.dataset == 'rain1400':
            img_mix = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB)
            suff = '_' + this_dir.split('_')[-1]
            this_gt_dir = this_dir.replace('rainy_image',
                                           'ground_truth').replace(
                                               suff, '.jpg')
            gt = cv2.imread(this_gt_dir, cv2.IMREAD_COLOR)
            gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB)

        # we recommend to use TF.resize since it was also used during trainig
        # You may also try cv2.resize, but it will produce slightly different results
        img_mix = TF.resize(TF.to_pil_image(img_mix),
                            [args.in_size, args.in_size])
        img_mix = TF.to_tensor(img_mix).unsqueeze(0)

        gt = TF.resize(TF.to_pil_image(gt), [args.in_size, args.in_size])
        gt = TF.to_tensor(gt).unsqueeze(0)

        with torch.no_grad():
            G_pred1 = net_G(img_mix.to(device))[:, 0:3, :, :]
            G_pred2 = net_G(img_mix.to(device))[:, 3:6, :, :]

        G_pred1 = np.array(G_pred1.cpu().detach())
        G_pred1 = G_pred1[0, :].transpose([1, 2, 0])
        G_pred2 = np.array(G_pred2.cpu().detach())
        G_pred2 = G_pred2[0, :].transpose([1, 2, 0])
        gt = np.array(gt.cpu().detach())
        gt = gt[0, :].transpose([1, 2, 0])
        img_mix = np.array(img_mix.cpu().detach())
        img_mix = img_mix[0, :].transpose([1, 2, 0])

        G_pred1[G_pred1 > 1] = 1
        G_pred1[G_pred1 < 0] = 0
        G_pred2[G_pred2 > 1] = 1
        G_pred2[G_pred2 < 0] = 0

        psnr = utils.cpt_rgb_psnr(G_pred1, gt, PIXEL_MAX=1.0)
        ssim = utils.cpt_rgb_ssim(G_pred1, gt)
        running_psnr.append(psnr)
        running_ssim.append(ssim)

        if args.save_output:
            fname = this_dir.split('\\')[-1]
            plt.imsave(
                os.path.join(args.output_dir, fname[:-4] + '_input.png'),
                img_mix)
            plt.imsave(os.path.join(args.output_dir, fname[:-4] + '_gt1.png'),
                       gt)
            plt.imsave(
                os.path.join(args.output_dir, fname[:-4] + '_output1.png'),
                G_pred1)
            plt.imsave(
                os.path.join(args.output_dir, fname[:-4] + '_output2.png'),
                G_pred2)

        print('id: %d, running psnr: %.4f, running ssim: %.4f' %
              (idx, np.mean(running_psnr), np.mean(running_ssim)))

    print('Dataset: %s, average psnr: %.4f, average ssim: %.4f' %
          (args.dataset, np.mean(running_psnr), np.mean(running_ssim)))
Beispiel #7
0
for i in range(nimgs):
    image, fn = dataset[i]
    # for plotting, add denormalized img
    attn_plot = []
    attn_plot.append(image.permute(1,2,0) / 2 + 0.5) # (C, H, W) -> (H, W, C)
    image = image.unsqueeze(0)
    output, attn_weights_list = evaluate()
    h, w = attn_plot[0].shape[:2] # image size (h, w)

    result = tokenizer.decode(output[0].tolist(), skip_special_tokens=True)
    result = "<start> " + result[:-1] + " <end> " 
    attn_weights_list.pop(-2) # remove the attention map correspoding to '.'

    for attn in attn_weights_list:
        attn_resized = F.resize(attn, (h,w)).permute(1,2,0)
        attn_map = (attn_resized - attn_resized.min()) / (attn_resized.max()- attn_resized.min())
        attn_plot.append(attn_map)

    fig = plt.figure(figsize=(8, 8))
    fig.suptitle("Visualization", fontsize=24)
    nplots = len(attn_plot)
    nrows = int(np.ceil(nplots / 4))

    for i in range(nplots):
        ax = fig.add_subplot(nrows, 4, i+1)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        title, result = result.split(' ', 1)
        ax.set_title(title)
        ax.imshow(attn_plot[i])
Beispiel #8
0
    def __getitem__(self, index):
        min_scale, max_scale, crop_size = [self.cfg.DATA.TEST_CROP_SIZE] * 3
        sampling_rate = self.cfg.DATA.SAMPLING_RATE
        target_fps = self.cfg.DATA.TARGET_FPS
        # initialize video container.
        video_container = container.get_video_container(
            self._path_to_videos[index].decode(),
            self.cfg.DATA_LOADER.ENABLE_MULTI_THREAD_DECODE,
            self.cfg.DATA.DECODING_BACKEND,
        )
        # video info.
        fps = float(video_container.streams.video[0].average_rate)
        video_frames = video_container.streams.video[0].frames
        duration = video_container.streams.video[0].duration
        time_base = float(video_container.streams.video[0].time_base)
        video_seconds = duration * time_base
        target_sampling_rate = int(sampling_rate * fps / target_fps)
        # decode all the frames.
        start, end = self._duration_secs[index]
        pts_per_frame = int(duration / video_frames)
        start_pts = int(start / video_seconds *
                        video_frames) * pts_per_frame if start > 0 else 0
        end_pts = int(end / video_seconds *
                      video_frames) * pts_per_frame if end > 0 else duration

        margin = 1024
        seek_offset = max(start_pts - margin, 0)
        # seek to nearest key frame, and decode from it to get subsequent frames in [start_pts, end_pts].
        video_container.seek(seek_offset,
                             any_frame=False,
                             backward=True,
                             stream=video_container.streams.video[0])
        frames = []
        for i, frame in enumerate(video_container.decode(video=0)):
            if frame.pts < start_pts:
                continue
            if frame.pts > end_pts:
                break
            image = frame.to_image()
            scaled_image = F.resize(image, size=[min_scale, max_scale])
            frames.append(scaled_image)
        video_container.close()
        # sampling
        original_frames = len(frames)
        frames = frames[0:len(frames):target_sampling_rate]
        frames = torch.as_tensor(np.stack(frames))
        # TODO: this is useful if GlobalAvgPool is used in the SlowFast Network.
        # make the min length of frames be NUM_FRAMES.
        #if len(frames) < self.cfg.DATA.NUM_FRAMES:
        #    indices = torch.linspace(0, len(frames), self.cfg.DATA.NUM_FRAMES)
        #    indices = torch.clamp(indices, 0, frames.shape[0] - 1).long()
        #    frames = torch.index_select(frames, 0, indices)

        frames = utils.tensor_normalize(frames, self.cfg.DATA.MEAN,
                                        self.cfg.DATA.STD)
        # T H W C -> C T H W
        frames = frames.permute(3, 0, 1, 2)

        # Perform data augmentation.
        frames = utils.spatial_sampling(
            frames,
            spatial_idx=1,  # 0: left crop, 1: center crop, 2: right crop
            min_scale=min_scale,
            max_scale=max_scale,
            crop_size=crop_size,
            random_horizontal_flip=False,
            inverse_uniform_sampling=False)

        label = self._labels[index]
        meta = {
            "video_frames": video_frames,
            "video_fps": fps,
            "target_sampling_rate": target_sampling_rate,
            "sampled_frames": frames.shape[1],
            "original_frames": original_frames,
            "video_seconds": video_seconds,
            "video_name": self._path_to_videos[index].decode().split("/")[-1],
            "start_seconds": start,
            "end_seconds": end,
            "video_label": label,
        }
        if self.cfg.MODEL.VIDEO_EXTRACTOR:
            length = (frames.shape[1] //
                      self.cfg.SLOWFAST.ALPHA) * self.cfg.SLOWFAST.ALPHA
            # C x T x H x W
            frames = frames[:, :length]
            frames = utils.pack_pathway_output(self.cfg, frames)
            return frames, label, index, meta
        else:
            return frames, label, index, meta
 def resize(self, image, landmarks, img_size):
     image = tf.resize(image, img_size)
     return image, landmarks
Beispiel #10
0
    def transform_triplets(self, img, gt1, gt2):

        # resize image and covert to tensor
        img = TF.to_pil_image(img)
        img = TF.resize(img, [self.img_size, self.img_size])

        gt1 = TF.to_pil_image(gt1)
        gt1 = TF.resize(gt1, [self.img_size, self.img_size])

        gt2 = TF.to_pil_image(gt2)
        gt2 = TF.resize(gt2, [self.img_size, self.img_size])

        if self.with_random_hflip and random.random() > 0.5:
            img = TF.hflip(img)
            gt1 = TF.hflip(gt1)
            gt2 = TF.hflip(gt2)

        if self.with_random_vflip and random.random() > 0.5:
            img = TF.vflip(img)
            gt1 = TF.vflip(gt1)
            gt2 = TF.vflip(gt2)

        if self.with_random_rot90 and random.random() > 0.5:
            img = TF.rotate(img, 90)
            gt1 = TF.rotate(gt1, 90)
            gt2 = TF.rotate(gt2, 90)

        if self.with_random_rot180 and random.random() > 0.5:
            img = TF.rotate(img, 180)
            gt1 = TF.rotate(gt1, 180)
            gt2 = TF.rotate(gt2, 180)

        if self.with_random_rot270 and random.random() > 0.5:
            img = TF.rotate(img, 270)
            gt1 = TF.rotate(gt1, 270)
            gt2 = TF.rotate(gt2, 270)

        if self.with_color_jittering and random.random() > 0.5:
            img = TF.adjust_hue(img, hue_factor=random.random() * 0.5 -
                                0.25)  # -0.25 ~ +0.25
            img = TF.adjust_saturation(
                img,
                saturation_factor=random.random() * 0.8 + 0.8)  # 0.8 ~ +1.6
            gt1 = TF.adjust_hue(gt1, hue_factor=random.random() * 0.5 -
                                0.25)  # -0.25 ~ +0.25
            gt1 = TF.adjust_saturation(
                gt1,
                saturation_factor=random.random() * 0.8 + 0.8)  # 0.8 ~ +1.6
            gt2 = TF.adjust_hue(gt2, hue_factor=random.random() * 0.5 -
                                0.25)  # -0.25 ~ +0.25
            gt2 = TF.adjust_saturation(
                gt2,
                saturation_factor=random.random() * 0.8 + 0.8)  # 0.8 ~ +1.6

        if self.with_random_crop and random.random() > 0.5:
            i, j, h, w = transforms.RandomResizedCrop(size=self.img_size). \
                get_params(img=img, scale=(0.5, 1.0), ratio=self.crop_ratio)
            img = TF.resized_crop(img,
                                  i,
                                  j,
                                  h,
                                  w,
                                  size=(self.img_size, self.img_size))
            gt1 = TF.resized_crop(gt1,
                                  i,
                                  j,
                                  h,
                                  w,
                                  size=(self.img_size, self.img_size))
            gt2 = TF.resized_crop(gt2,
                                  i,
                                  j,
                                  h,
                                  w,
                                  size=(self.img_size, self.img_size))

        # to tensor
        img = TF.to_tensor(img)
        gt1 = TF.to_tensor(gt1)
        gt2 = TF.to_tensor(gt2)

        return img, gt1, gt2
Beispiel #11
0
    def test_resize(self):
        script_fn = torch.jit.script(F.resize)
        tensor, pil_img = self._create_data(26, 36, device=self.device)
        batch_tensors = self._create_data_batch(16,
                                                18,
                                                num_samples=4,
                                                device=self.device)

        for dt in [None, torch.float32, torch.float64, torch.float16]:

            if dt == torch.float16 and torch.device(self.device).type == "cpu":
                # skip float16 on CPU case
                continue

            if dt is not None:
                # This is a trivial cast to float of uint8 data to test all cases
                tensor = tensor.to(dt)
                batch_tensors = batch_tensors.to(dt)

            for size in [32, 26, [
                    32,
            ], [32, 32], (32, 32), [26, 35]]:
                for interpolation in [BILINEAR, BICUBIC, NEAREST]:
                    resized_tensor = F.resize(tensor,
                                              size=size,
                                              interpolation=interpolation)
                    resized_pil_img = F.resize(pil_img,
                                               size=size,
                                               interpolation=interpolation)

                    self.assertEqual(resized_tensor.size()[1:],
                                     resized_pil_img.size[::-1],
                                     msg="{}, {}".format(size, interpolation))

                    if interpolation not in [
                            NEAREST,
                    ]:
                        # We can not check values if mode = NEAREST, as results are different
                        # E.g. resized_tensor  = [[a, a, b, c, d, d, e, ...]]
                        # E.g. resized_pil_img = [[a, b, c, c, d, e, f, ...]]
                        resized_tensor_f = resized_tensor
                        # we need to cast to uint8 to compare with PIL image
                        if resized_tensor_f.dtype == torch.uint8:
                            resized_tensor_f = resized_tensor_f.to(torch.float)

                        # Pay attention to high tolerance for MAE
                        self.approxEqualTensorToPIL(resized_tensor_f,
                                                    resized_pil_img,
                                                    tol=8.0,
                                                    msg="{}, {}".format(
                                                        size, interpolation))

                    if isinstance(size, int):
                        script_size = [
                            size,
                        ]
                    else:
                        script_size = size

                    resize_result = script_fn(tensor,
                                              size=script_size,
                                              interpolation=interpolation)
                    self.assertTrue(resized_tensor.equal(resize_result),
                                    msg="{}, {}".format(size, interpolation))

                    self._test_fn_on_batch(batch_tensors,
                                           F.resize,
                                           size=script_size,
                                           interpolation=interpolation)

        # assert changed type warning
        with self.assertWarnsRegex(
                UserWarning,
                r"Argument interpolation should be of type InterpolationMode"):
            res1 = F.resize(tensor, size=32, interpolation=2)
            res2 = F.resize(tensor, size=32, interpolation=BILINEAR)
            self.assertTrue(res1.equal(res2))
Beispiel #12
0
    def __call__(self, img):

        targetSz = int(round(random.uniform(self.minSize, self.maxSize)))

        return F.resize(img, targetSz)
Beispiel #13
0
    def __getitem__(self, index):

        # first load the RGB image
        image = Image.open(self.images[index]).convert('RGB')

        # next load the target
        sem_target = Image.open(self.targets_semantic[index]).convert('L')
        dep_target = Image.open(self.targets_depth[index]).convert('L')

        # If augmenting, apply random transforms
        # Else we should just resize the image down to the correct size
        if self.augment:
            # Resize
            image = TF.resize(image,
                              size=(128 + 10, 256 + 10),
                              interpolation=Image.BILINEAR)
            sem_target = TF.resize(sem_target,
                                   size=(128 + 10, 256 + 10),
                                   interpolation=Image.NEAREST)
            dep_target = TF.resize(dep_target,
                                   size=(128 + 10, 256 + 10),
                                   interpolation=Image.NEAREST)

            # Random crop
            i, j, h, w = transforms.RandomCrop.get_params(image,
                                                          output_size=(128,
                                                                       256))
            image = TF.crop(image, i, j, h, w)
            sem_target = TF.crop(sem_target, i, j, h, w)
            dep_target = TF.crop(dep_target, i, j, h, w)

            # Random horizontal flipping
            if random.random() > 0.5:
                image = TF.hflip(image)
                sem_target = TF.hflip(sem_target)
                dep_target = TF.hflip(dep_target)
            # Random vertical flipping
            # (I found this caused issues with the sky=road during prediction)
            # if random.random() > 0.5:
            #    image = TF.vflip(image)
            #    target = TF.vflip(target)
        else:
            # Resize
            image = TF.resize(image,
                              size=(128, 256),
                              interpolation=Image.BILINEAR)
            sem_target = TF.resize(sem_target,
                                   size=(128, 256),
                                   interpolation=Image.NEAREST)
            dep_target = TF.resize(dep_target,
                                   size=(128, 256),
                                   interpolation=Image.BILINEAR)

        # convert to pytorch tensors
        # target = TF.to_tensor(target)

        sem_target = torch.from_numpy(np.array(sem_target, dtype=np.uint8))
        dep_target = torch.from_numpy(np.array(dep_target, dtype=np.uint8))
        image = TF.to_tensor(image)

        # convert the labels into a mask
        targetrgb = self.mask_to_rgb(sem_target)
        targetmask = self.mask_to_class(dep_target)
        targetmask = targetmask.long()
        targetrgb = targetrgb.long()

        # finally return the image pair
        return image, targetmask, targetrgb, dep_target
Beispiel #14
0
 def torchvision_transform(self, img):
     return torchvision.resize(img, (512, 512))
Beispiel #15
0
    def __getitem__(self, idx):
        rgb, depth, gt, confidence, K = self._load_data(idx)

        if self.augment and self.mode == 'train':
            # Top crop if needed
            if self.args.top_crop > 0:
                width, height = rgb.size
                rgb = TF.crop(rgb, self.args.top_crop, 0,
                              height - self.args.top_crop, width)
                depth = TF.crop(depth, self.args.top_crop, 0,
                                height - self.args.top_crop, width)
                confidence = TF.crop(confidence, self.args.top_crop, 0,
                                     height - self.args.top_crop, width)
                gt = TF.crop(gt, self.args.top_crop, 0,
                             height - self.args.top_crop, width)
                K[3] = K[3] - self.args.top_crop

            width, height = rgb.size

            _scale = np.random.uniform(1.0, 1.5)
            scale = np.int(height * _scale)
            degree = np.random.uniform(-5.0, 5.0)
            flip = np.random.uniform(0.0, 1.0)

            # Horizontal flip
            if flip > 0.5:
                rgb = TF.hflip(rgb)
                depth = TF.hflip(depth)
                confidence = TF.hflip(confidence)
                gt = TF.hflip(gt)
                K[2] = width - K[2]

            # Rotation
            rgb = TF.rotate(rgb, angle=degree, resample=Image.BICUBIC)
            depth = TF.rotate(depth, angle=degree, resample=Image.NEAREST)
            confidence = TF.rotate(confidence,
                                   angle=degree,
                                   resample=Image.NEAREST)
            gt = TF.rotate(gt, angle=degree, resample=Image.NEAREST)

            # Color jitter
            brightness = np.random.uniform(0.6, 1.4)
            contrast = np.random.uniform(0.6, 1.4)
            saturation = np.random.uniform(0.6, 1.4)

            rgb = TF.adjust_brightness(rgb, brightness)
            rgb = TF.adjust_contrast(rgb, contrast)
            rgb = TF.adjust_saturation(rgb, saturation)

            # Resize
            rgb = TF.resize(rgb, scale, Image.BICUBIC)
            depth = TF.resize(depth, scale, Image.NEAREST)
            confidence = TF.resize(confidence, scale, Image.NEAREST)
            gt = TF.resize(gt, scale, Image.NEAREST)

            K[0] = K[0] * _scale
            K[1] = K[1] * _scale
            K[2] = K[2] * _scale
            K[3] = K[3] * _scale

            # Crop
            width, height = rgb.size

            assert self.height <= height and self.width <= width, \
                "patch size is larger than the input size"

            h_start = random.randint(0, height - self.height)
            w_start = random.randint(0, width - self.width)

            rgb = TF.crop(rgb, h_start, w_start, self.height, self.width)
            depth = TF.crop(depth, h_start, w_start, self.height, self.width)
            confidence = TF.crop(confidence, h_start, w_start, self.height,
                                 self.width)
            gt = TF.crop(gt, h_start, w_start, self.height, self.width)

            K[2] = K[2] - w_start
            K[3] = K[3] - h_start

            rgb = TF.to_tensor(rgb)
            rgb = TF.normalize(rgb, (0.485, 0.456, 0.406),
                               (0.229, 0.224, 0.225),
                               inplace=True)

            depth = TF.to_tensor(np.array(depth))
            depth = depth / _scale

            confidence = TF.to_tensor(np.array(confidence))
            confidence = confidence / _scale

            gt = TF.to_tensor(np.array(gt))
            gt = gt / _scale
        elif self.mode in ['train', 'val']:
            # Top crop if needed
            if self.args.top_crop > 0:
                width, height = rgb.size
                rgb = TF.crop(rgb, self.args.top_crop, 0,
                              height - self.args.top_crop, width)
                depth = TF.crop(depth, self.args.top_crop, 0,
                                height - self.args.top_crop, width)
                confidence = TF.crop(confidence, self.args.top_crop, 0,
                                     height - self.args.top_crop, width)
                gt = TF.crop(gt, self.args.top_crop, 0,
                             height - self.args.top_crop, width)
                K[3] = K[3] - self.args.top_crop

            # Crop
            width, height = rgb.size

            assert self.height <= height and self.width <= width, \
                "patch size is larger than the input size"

            h_start = random.randint(0, height - self.height)
            w_start = random.randint(0, width - self.width)

            rgb = TF.crop(rgb, h_start, w_start, self.height, self.width)
            depth = TF.crop(depth, h_start, w_start, self.height, self.width)
            confidence = TF.crop(confidence, h_start, w_start, self.height,
                                 self.width)
            gt = TF.crop(gt, h_start, w_start, self.height, self.width)

            K[2] = K[2] - w_start
            K[3] = K[3] - h_start

            rgb = TF.to_tensor(rgb)
            rgb = TF.normalize(rgb, (0.485, 0.456, 0.406),
                               (0.229, 0.224, 0.225),
                               inplace=True)

            depth = TF.to_tensor(np.array(depth))
            confidence = TF.to_tensor(np.array(confidence))

            gt = TF.to_tensor(np.array(gt))
        else:
            if self.args.top_crop > 0 and self.args.test_crop:
                width, height = rgb.size
                rgb = TF.crop(rgb, self.args.top_crop, 0,
                              height - self.args.top_crop, width)
                depth = TF.crop(depth, self.args.top_crop, 0,
                                height - self.args.top_crop, width)
                confidence = TF.crop(confidence, self.args.top_crop, 0,
                                     height - self.args.top_crop, width)
                gt = TF.crop(gt, self.args.top_crop, 0,
                             height - self.args.top_crop, width)
                K[3] = K[3] - self.args.top_crop

            rgb = TF.to_tensor(rgb)
            rgb = TF.normalize(rgb, (0.485, 0.456, 0.406),
                               (0.229, 0.224, 0.225),
                               inplace=True)

            depth = TF.to_tensor(np.array(depth))
            confidence = TF.to_tensor(np.array(confidence))
            gt = TF.to_tensor(np.array(gt))

        if self.args.num_sample > 0:
            depth, confidence = self.get_sparse_depth(depth, confidence,
                                                      self.args.num_sample)

        output = {
            'rgb': rgb,
            'dep': depth,
            'confidence': confidence,
            'gt': gt,
            'K': torch.Tensor(K)
        }

        return output
    def process_image(self, img, is_train, multi_crop = False, bboxes = None, no_crop=False):
        '''
        Pre-processing of the images
        Arguments:
            img:                    single input image (PIL)
            is_train:               True for training mode, false for validation/testing mode
            multi_crop (optional):  If True, uses 12 crops in validation
            bboxes (optional):      Bounding boxes of the foreground object
            no_crop (optional):     If True, skips cropping in in both training and validation
        '''
        if bboxes is None:
            bboxes = []
        # In training, random scaling, flipping, and color augmentation
        if is_train:
            if no_crop:
                img = self.resize(img)
            else:
                img = self.scale_aug(img)
            img = self.flip_aug(img)
            img = self.color_aug(img)
            img = self.tensor_aug(img)
            img = self.norm_aug(img)
            return img
        # In validation
        else:
            # We will collect all crops of the image in *imgs*
            if no_crop:
                imgs = [self.resize(img)]
            else:
                min_size = min(img.size)
                scale_ratio = min(self.im_size) / min_size * 1.3
                resized_img = F.resize(img, (int(img.size[1]*scale_ratio), int(img.size[0]*scale_ratio)))
                imgs = [self.center_crop(resized_img)]

            # Add all bboxes and their flip
            for bbox in bboxes:
                bbox_shape = np.array([bbox[2] - bbox[0], bbox[3] - bbox[1]])
                padding = bbox_shape.max() * 0.1
                # Add offset to the shorter side to crop a square patch
                offset = (bbox_shape - np.min(bbox_shape))[::-1] // 2
                bbox_crop = img.crop((bbox[1] - padding - offset[1], 
                           bbox[0] - padding - offset[0],
                           bbox[3] + padding + offset[1], 
                           bbox[2] + padding + offset[0])) # (w - crop_w, h - crop_h, w, h))
                #img.save('crop{}.jpg'.format(np.random.randint(0,10)))
                bbox_crop = self.resize(bbox_crop)
                imgs.append(bbox_crop)
                imgs.append(self.flip(bbox_crop))

            # Add all crops 
            if multi_crop:
                imgs.append(self.flip(self.center_crop(resized_img)))
                imgs.extend(self.multi_crop(self.resize_for_crop(img)))

            # Convert everything to normalized tensor
            tensor_imgs = []
            for img in imgs:
                img = self.tensor_aug(img)
                img = self.norm_aug(img)
                tensor_imgs.append(img)
            return tensor_imgs
 def __call__(self, img):
     size = self.size
     w, h = img.size
     target_size = self.target_size(w, h, size, self.largest)
     return F.resize(img, target_size, self.interpolation)
Beispiel #18
0
    def __call__(self, img):

        for i, im in enumerate(img):
            img[i] = F.resize(im, self.size, self.interpolation)

        return img
Beispiel #19
0
  def single_image_loss(self, x):
    # x: Batch, H, W, 3
    x = TVF.resize(x.permute(0, 3, 1, 2), size=(self.in_size, self.in_size))

    # out: Batch, 1
    return self.last(self.main(x))
Beispiel #20
0
 def __call__(self, image, target) -> Tuple[torch.tensor, dict]:
     image = resize(image, size=self.output_shape, interpolation=self.interpolation)
     target['boxes'] = self.resize_boxes(target['boxes'])
     return image, target
Beispiel #21
0
def resize(image, target, size, max_size=None):
    # size can be min_size (scalar) or (w, h) tuple

    def get_size_with_aspect_ratio(image_size, size, max_size=None):
        w, h = image_size
        if max_size is not None:
            min_original_size = float(min((w, h)))
            max_original_size = float(max((w, h)))
            if max_original_size / min_original_size * size > max_size:
                size = int(
                    round(max_size * min_original_size / max_original_size))

        if (w <= h and w == size) or (h <= w and h == size):
            return (h, w)

        if w < h:
            ow = size
            oh = int(size * h / w)
        else:
            oh = size
            ow = int(size * w / h)

        return (oh, ow)

    def get_size(image_size, size, max_size=None):
        if isinstance(size, (list, tuple)):
            return size[::-1]
        else:
            return get_size_with_aspect_ratio(image_size, size, max_size)

    size = get_size(image.size, size, max_size)
    rescaled_image = F.resize(image, size)

    if target is None:
        return rescaled_image, None

    ratios = tuple(
        float(s) / float(s_orig)
        for s, s_orig in zip(rescaled_image.size, image.size))
    ratio_width, ratio_height = ratios

    target = target.copy()
    if "boxes" in target:
        boxes = target["boxes"]
        scaled_boxes = boxes * torch.as_tensor(
            [ratio_width, ratio_height, ratio_width, ratio_height])
        target["boxes"] = scaled_boxes

    if "area" in target:
        area = target["area"]
        scaled_area = area * (ratio_width * ratio_height)
        target["area"] = scaled_area

    h, w = size
    target["size"] = torch.tensor([h, w])

    if "masks" in target:
        target['masks'] = interpolate(
            target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5

    return rescaled_image, target
 def get_resized_bytes(self, img):
     img = trans_fn.resize(img, self.size)
     buf = BytesIO()
     img.save(buf, format='jpeg', quality=self.quality)
     img_bytes = buf.getvalue()
     return img_bytes
Beispiel #23
0
    def transform(self, img1, img2):

        # resize image and covert to tensor
        img1 = TF.to_pil_image(img1)
        img1 = TF.resize(img1, [self.img_size, self.img_size], interpolation=3)
        img2 = TF.to_pil_image(img2)
        img2 = TF.resize(img2, [self.img_size, self.img_size], interpolation=3)

        if self.with_random_hflip and random.random() > 0.5:
            img1 = TF.hflip(img1)
            img2 = TF.hflip(img2)

        if self.with_random_vflip and random.random() > 0.5:
            img1 = TF.vflip(img1)
            img2 = TF.vflip(img2)

        if self.with_random_rot90 and random.random() > 0.5:
            img1 = TF.rotate(img1, 90)
            img2 = TF.rotate(img2, 90)

        if self.with_random_rot180 and random.random() > 0.5:
            img1 = TF.rotate(img1, 180)
            img2 = TF.rotate(img2, 180)

        if self.with_random_rot270 and random.random() > 0.5:
            img1 = TF.rotate(img1, 270)
            img2 = TF.rotate(img2, 270)

        if self.with_random_crop and random.random() > 0.5:
            i, j, h, w = transforms.RandomResizedCrop(size=self.img_size). \
                get_params(img=img1, scale=(0.5, 1.0), ratio=(0.9, 1.1))
            img1 = TF.resized_crop(img1,
                                   i,
                                   j,
                                   h,
                                   w,
                                   size=(self.img_size, self.img_size))
            img2 = TF.resized_crop(img2,
                                   i,
                                   j,
                                   h,
                                   w,
                                   size=(self.img_size, self.img_size))

        if self.with_random_patch:
            i, j, h, w = transforms.RandomResizedCrop(size=self.img_size). \
                get_params(img=img1, scale=(1/16.0, 1/9.0), ratio=(0.9, 1.1))
            img1 = TF.resized_crop(img1,
                                   i,
                                   j,
                                   h,
                                   w,
                                   size=(self.img_size, self.img_size))
            img2 = TF.resized_crop(img2,
                                   i,
                                   j,
                                   h,
                                   w,
                                   size=(self.img_size, self.img_size))

        # to tensor
        img1 = TF.to_tensor(img1)
        img2 = TF.to_tensor(img2)

        return img1, img2
Beispiel #24
0
 def resize(image_or_guide):
     return F.resize(image_or_guide,
                     [length * 2 for length in image_or_guide.shape[-2:]])
Beispiel #25
0
 def __call__(self, image):
     size = self.get_size(image.size)
     image = F.resize(image, size)
     return image
Beispiel #26
0
 def __call__(self, img, mask):
     if self.do_mask:
         return F.resize(img, self.size, self.interpolation), F.resize(
             mask, self.size, Image.NEAREST)
     else:
         return F.resize(img, self.size, self.interpolation), mask
Beispiel #27
0
 def __call__(self, image, target):
     size = random.randint(self.min_size, self.max_size)
     image = F.resize(image, size)
     target = F.resize(target, size, interpolation=Image.NEAREST)
     return image, target
Beispiel #28
0
 def __call__(self, x):
     return TF.resize(x, random.randint(self.min_size, self.max_size))
Beispiel #29
0
 def apply_image(self, image: torch.Tensor, params) -> torch.Tensor:
     return VF.resize(image, params)
Beispiel #30
0
 def __call__(self, clip):
     size = self.get_size(clip[0].size)
     clip = [F.resize(image, size) for image in clip]
     return clip