Beispiel #1
0
    def test_rotate(self):
        x = np.zeros((100, 100, 3), dtype=np.uint8)
        x[40, 40] = [255, 255, 255]

        with self.assertRaises(TypeError):
            F.rotate(x, 10)

        img = F.to_pil_image(x)

        result = F.rotate(img, 45)
        assert result.size == (100, 100)
        r, c, ch = np.where(result)
        assert all(x in r for x in [49, 50])
        assert all(x in c for x in [36])
        assert all(x in ch for x in [0, 1, 2])

        result = F.rotate(img, 45, expand=True)
        assert result.size == (142, 142)
        r, c, ch = np.where(result)
        assert all(x in r for x in [70, 71])
        assert all(x in c for x in [57])
        assert all(x in ch for x in [0, 1, 2])

        result = F.rotate(img, 45, center=(40, 40))
        assert result.size == (100, 100)
        r, c, ch = np.where(result)
        assert all(x in r for x in [40])
        assert all(x in c for x in [40])
        assert all(x in ch for x in [0, 1, 2])

        result_a = F.rotate(img, 90)
        result_b = F.rotate(img, -270)

        assert np.all(np.array(result_a) == np.array(result_b))
Beispiel #2
0
    def test_random_affine(self):

        with self.assertRaises(ValueError):
            transforms.RandomAffine(-0.7)
            transforms.RandomAffine([-0.7])
            transforms.RandomAffine([-0.7, 0, 0.7])

            transforms.RandomAffine([-90, 90], translate=2.0)
            transforms.RandomAffine([-90, 90], translate=[-1.0, 1.0])
            transforms.RandomAffine([-90, 90], translate=[-1.0, 0.0, 1.0])

            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.0])
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[-1.0, 1.0])
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, -0.5])
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 3.0, -0.5])

            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=-7)
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10])
            transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10])

        x = np.zeros((100, 100, 3), dtype=np.uint8)
        img = F.to_pil_image(x)

        t = transforms.RandomAffine(10, translate=[0.5, 0.3], scale=[0.7, 1.3], shear=[-10, 10])
        for _ in range(100):
            angle, translations, scale, shear = t.get_params(t.degrees, t.translate, t.scale, t.shear,
                                                             img_size=img.size)
            assert -10 < angle < 10
            assert -img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5, \
                "{} vs {}".format(translations[0], img.size[0] * 0.5)
            assert -img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5, \
                "{} vs {}".format(translations[1], img.size[1] * 0.5)
            assert 0.7 < scale < 1.3
            assert -10 < shear < 10

        # Checking if RandomAffine can be printed as string
        t.__repr__()

        t = transforms.RandomAffine(10, resample=Image.BILINEAR)
        assert "Image.BILINEAR" in t.__repr__()
Beispiel #3
0
    def test_affine(self):
        input_img = np.zeros((200, 200, 3), dtype=np.uint8)
        pts = []
        cnt = [100, 100]
        for pt in [(80, 80), (100, 80), (100, 100)]:
            for i in range(-5, 5):
                for j in range(-5, 5):
                    input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55]
                    pts.append((pt[0] + i, pt[1] + j))
        pts = list(set(pts))

        with self.assertRaises(TypeError):
            F.affine(input_img, 10)

        pil_img = F.to_pil_image(input_img)

        def _to_3x3_inv(inv_result_matrix):
            result_matrix = np.zeros((3, 3))
            result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3))
            result_matrix[2, 2] = 1
            return np.linalg.inv(result_matrix)

        def _test_transformation(a, t, s, sh):
            a_rad = math.radians(a)
            s_rad = math.radians(sh)
            # 1) Check transformation matrix:
            c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]],
                                 [0.0, 0.0, 1.0]])
            c_inv_matrix = np.linalg.inv(c_matrix)
            t_matrix = np.array([[1.0, 0.0, t[0]], [0.0, 1.0, t[1]],
                                 [0.0, 0.0, 1.0]])
            r_matrix = np.array(
                [[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0],
                 [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0],
                 [0.0, 0.0, 1.0]])
            true_matrix = np.dot(
                t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix)))
            result_matrix = _to_3x3_inv(
                F._get_inverse_affine_matrix(center=cnt,
                                             angle=a,
                                             translate=t,
                                             scale=s,
                                             shear=sh))
            assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10
            # 2) Perform inverse mapping:
            true_result = np.zeros((200, 200, 3), dtype=np.uint8)
            inv_true_matrix = np.linalg.inv(true_matrix)
            for y in range(true_result.shape[0]):
                for x in range(true_result.shape[1]):
                    res = np.dot(inv_true_matrix, [x, y, 1])
                    _x = int(res[0] + 0.5)
                    _y = int(res[1] + 0.5)
                    if 0 <= _x < input_img.shape[
                            1] and 0 <= _y < input_img.shape[0]:
                        true_result[y, x, :] = input_img[_y, _x, :]

            result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh)
            assert result.size == pil_img.size
            # Compute number of different pixels:
            np_result = np.array(result)
            n_diff_pixels = np.sum(np_result != true_result) / 3
            # Accept 3 wrong pixels
            assert n_diff_pixels < 3, \
                "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\
                "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0]))

        # Test rotation
        a = 45
        _test_transformation(a=a, t=(0, 0), s=1.0, sh=0.0)

        # Test translation
        t = [10, 15]
        _test_transformation(a=0.0, t=t, s=1.0, sh=0.0)

        # Test scale
        s = 1.2
        _test_transformation(a=0.0, t=(0.0, 0.0), s=s, sh=0.0)

        # Test shear
        sh = 45.0
        _test_transformation(a=0.0, t=(0.0, 0.0), s=1.0, sh=sh)

        # Test rotation, scale, translation, shear
        for a in range(-90, 90, 25):
            for t1 in range(-10, 10, 5):
                for s in [0.75, 0.98, 1.0, 1.1, 1.2]:
                    for sh in range(-15, 15, 5):
                        _test_transformation(a=a, t=(t1, t1), s=s, sh=sh)
Beispiel #4
0
    def __getitem__(self, idx):
        # processing img
        img_name = self.img_names[idx]
        # image path
        imgA = cv2.imread(img_name)
        imgA = cv2.resize(imgA, (352, 352))

        img_filename = os.path.basename(img_name).split('.')[0]
        # processing pseudo
        imgC = cv2.imread(self.pseudo_path + img_filename.split('.')[0] +
                          '.png')
        imgC = cv2.resize(imgC, (352, 352))

        # processing label
        imgB = cv2.imread(self.label_path + img_filename + '.png', 0)
        if not self.is_test:
            imgB = cv2.resize(imgB, (352, 352))
        img_label = imgB
        # print(np.unique(img_label))
        # make data augmentation here
        if self.is_data_augment:
            # convert to pil format so we can data augment them
            img_label = np.expand_dims(img_label, -1)
            pil_imgA = TF.to_pil_image(imgA)
            pil_img_label = TF.to_pil_image(img_label)
            pil_imgC = TF.to_pil_image(imgC)

            if random.random() > 0.5:
                # random cropping
                crop_size = int(min(imgA.shape[:2]) * 0.8)
                i, j, w, h = transforms.RandomCrop.get_params(
                    pil_imgA, output_size=(crop_size, crop_size))
                pil_imgA = TF.crop(pil_imgA, i, j, w, h)
                pil_img_label = TF.crop(pil_img_label, i, j, w, h)
                pil_imgC = TF.crop(pil_imgC, i, j, w, h)

            # -- data augmentation --
            # Random horizontal flipping
            if random.random() > 0.5:
                pil_imgA = TF.hflip(pil_imgA)
                pil_img_label = TF.hflip(pil_img_label)
                pil_imgC = TF.hflip(pil_imgC)

            # Random vertical flipping
            if random.random() > 0.5:
                pil_imgA = TF.vflip(pil_imgA)
                pil_img_label = TF.vflip(pil_img_label)
                pil_imgC = TF.vflip(pil_imgC)

            # random cutout
            if self.random_cutout:
                if random.random() > 0.5:
                    cutout_size = int(min(imgA.shape[:2]) * self.random_cutout)
                    i, j, w, h = transforms.RandomCrop.get_params(
                        pil_imgA,
                        output_size=(random.randint(0, cutout_size),
                                     random.randint(0, cutout_size)))
                    color_code = random.randint(0, 255)
                    rect = Image.new('RGB', (w, h),
                                     (color_code, color_code, color_code))
                    pil_imgA.paste(rect, (i, j))

            pil_imgA = pil_imgA.resize((352, 352))
            pil_img_label = pil_img_label.resize((352, 352))
            pil_imgC = pil_imgC.resize((352, 352))

            # convert pil back to numpy
            imgA = np.array(pil_imgA)
            img_label = np.array(pil_img_label)
            imgC = np.array(pil_imgC)

        # only need to process the original dataset, tr and rp already processed
        if 'tr' not in img_filename and 'rp' not in img_filename:
            img_label[img_label < 19] = 0
            img_label[(img_label <= 38) & (img_label >= 19)] = 1
            img_label[img_label > 38] = 2

        img_label_onehot = (np.arange(
            self.num_class) == img_label[..., None]).astype(float)
        img_label_onehot = img_label_onehot.transpose(2, 0,
                                                      1)  # n_class * w * H

        # label smoothing
        if self.is_label_smooth:
            img_label_onehot[0] = img_label_onehot[
                0] * 0.9  # since there are so many labels on the first axis, we smooth it

        onehot_label = torch.FloatTensor(img_label_onehot)
        if self.transform:
            imgA = self.transform(imgA)
            imgC = self.transform(imgC)

        return imgA, imgC, onehot_label, img_name
Beispiel #5
0
def tensor2img(t, padding=16):
    std = torch.Tensor([0.229, 0.224, 0.225]).reshape(-1, 1, 1)
    mu = torch.Tensor([0.485, 0.456, 0.406]).reshape(-1, 1, 1)
    img = to_pil_image(t * std + mu if t.shape[0] > 1 else t)
    w, h = img.size
    return img.crop((padding, padding, w - padding, h - padding))
def run_eval(args):

    print('running evaluation...')

    if args.save_output:
        if os.path.exists(args.output_dir) is False:
            os.mkdir(args.output_dir)

    running_psnr = []
    running_ssim = []

    if args.dataset == 'rain100h':
        datadir = r'./datasets/Rain100H/val'
        val_dirs = glob.glob(os.path.join(datadir, 'norain-*.png'))
    elif args.dataset == 'rain100l':
        datadir = r'./datasets/Rain100L/val'
        val_dirs = glob.glob(os.path.join(datadir, '*x2.png'))
    elif args.dataset == 'rain800':
        datadir = r'./datasets/Rain800/val'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))
    elif args.dataset == 'rain800-real':
        datadir = r'./datasets/Rain800/test_nature'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))
    elif args.dataset == 'did-mdn-test1':
        datadir = r'./datasets/DID-MDN/val'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))
    elif args.dataset == 'did-mdn-test2':
        datadir = r'./datasets/DID-MDN/testing_fu'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))
    elif args.dataset == 'rain1400':
        datadir = r'./datasets/Rain1400/val/rainy_image'
        val_dirs = glob.glob(os.path.join(datadir, '*.jpg'))

    for idx in range(len(val_dirs)):

        this_dir = val_dirs[idx]

        if args.dataset == 'rain100h':
            gt = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB)
            img_mix = cv2.imread(val_dirs[idx].replace('norain', 'rain'),
                                 cv2.IMREAD_COLOR)
            img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB)
        elif args.dataset == 'rain100l':
            img_mix = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB)
            gt = cv2.imread(val_dirs[idx].replace('x2.png', '.png'),
                            cv2.IMREAD_COLOR)
            gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB)
        elif args.dataset == 'rain800':
            img = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w, c = img.shape
            gt = img[:, 0:int(w / 2), :]
            img_mix = img[:, int(w / 2):, :]
        elif args.dataset == 'rain800-real':
            img = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w, c = img.shape
            gt = img[:, 0:int(w / 2), :]
            img_mix = img[:, int(w / 2):, :]
        elif args.dataset == 'did-mdn-test1':
            img = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w, c = img.shape
            img_mix = img[:, 0:int(w / 2), :]
            gt = img[:, int(w / 2):, :]
        elif args.dataset == 'did-mdn-test2':
            img = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            h, w, c = img.shape
            gt = img[:, 0:int(w / 2), :]
            img_mix = img[:, int(w / 2):, :]
        elif args.dataset == 'rain1400':
            img_mix = cv2.imread(this_dir, cv2.IMREAD_COLOR)
            img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB)
            suff = '_' + this_dir.split('_')[-1]
            this_gt_dir = this_dir.replace('rainy_image',
                                           'ground_truth').replace(
                                               suff, '.jpg')
            gt = cv2.imread(this_gt_dir, cv2.IMREAD_COLOR)
            gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB)

        # we recommend to use TF.resize since it was also used during trainig
        # You may also try cv2.resize, but it will produce slightly different results
        img_mix = TF.resize(TF.to_pil_image(img_mix),
                            [args.in_size, args.in_size])
        img_mix = TF.to_tensor(img_mix).unsqueeze(0)

        gt = TF.resize(TF.to_pil_image(gt), [args.in_size, args.in_size])
        gt = TF.to_tensor(gt).unsqueeze(0)

        with torch.no_grad():
            G_pred1 = net_G(img_mix.to(device))[:, 0:3, :, :]
            G_pred2 = net_G(img_mix.to(device))[:, 3:6, :, :]

        G_pred1 = np.array(G_pred1.cpu().detach())
        G_pred1 = G_pred1[0, :].transpose([1, 2, 0])
        G_pred2 = np.array(G_pred2.cpu().detach())
        G_pred2 = G_pred2[0, :].transpose([1, 2, 0])
        gt = np.array(gt.cpu().detach())
        gt = gt[0, :].transpose([1, 2, 0])
        img_mix = np.array(img_mix.cpu().detach())
        img_mix = img_mix[0, :].transpose([1, 2, 0])

        G_pred1[G_pred1 > 1] = 1
        G_pred1[G_pred1 < 0] = 0
        G_pred2[G_pred2 > 1] = 1
        G_pred2[G_pred2 < 0] = 0

        psnr = utils.cpt_rgb_psnr(G_pred1, gt, PIXEL_MAX=1.0)
        ssim = utils.cpt_rgb_ssim(G_pred1, gt)
        running_psnr.append(psnr)
        running_ssim.append(ssim)

        if args.save_output:
            fname = this_dir.split('/')[-1]
            plt.imsave(
                os.path.join(args.output_dir, fname[:-4] + '_input.png'),
                img_mix)
            plt.imsave(os.path.join(args.output_dir, fname[:-4] + '_gt1.png'),
                       gt)
            plt.imsave(
                os.path.join(args.output_dir, fname[:-4] + '_output1.png'),
                G_pred1)
            plt.imsave(
                os.path.join(args.output_dir, fname[:-4] + '_output2.png'),
                G_pred2)

        print('id: %d, running psnr: %.4f, running ssim: %.4f' %
              (idx, np.mean(running_psnr), np.mean(running_ssim)))

    print('Dataset: %s, average psnr: %.4f, average ssim: %.4f' %
          (args.dataset, np.mean(running_psnr), np.mean(running_ssim)))
Beispiel #7
0
    def __call__(self, image, target):
        original_w, original_h = image.size
        image = F.to_tensor(image)
        boxes = target['boxes']
        labels = target['labels']
        masks = target['masks']

        # Keep choosing a minimum overlap until a successful crop is made
        min_overlap = 0.75

        # Try up to 50 times for this choice of minimum overlap
        max_trials = 50
        for _ in range(max_trials):
            min_scale = 0.75
            scale_h = random.uniform(min_scale, 1)
            scale_w = random.uniform(min_scale, 1)
            new_h = int(scale_h * original_h)
            new_w = int(scale_w * original_w)

            # Aspect ratio has to be in [0.5, 2]
            aspect_ratio = new_h / new_w
            if not 0.5 < aspect_ratio < 2:
                continue

            # Crop coordinates
            left = random.randint(0, original_w - new_w)
            right = left + new_w
            top = random.randint(0, original_h - new_h)
            bottom = top + new_h
            crop = torch.LongTensor([left, top, right, bottom])

            # Calculate Jaccard overlap between the crop and the bounding boxes
            overlap = find_jaccard_overlap(crop.unsqueeze(0), boxes)
            # (1, n_objects), n_objects is the no. of objects in this image
            overlap = overlap.squeeze(0)  # (n_objects)

            # If not a single bounding box has a Jaccard overlap of greater than the minimum, try again
            if overlap.max().item() < min_overlap:
                continue

            # Crop image
            new_image = image[:, top:bottom, left:right]  # (3, new_h, new_w)

            # Find boxes in cropped region
            boxes_in_crop = (boxes[:, 0] < right) * (boxes[:, 2] > left) * (boxes[:, 1] < bottom) * (boxes[:, 3] > top)
            if not boxes_in_crop.any():
                continue

            # Discard bounding boxes that don't meet this criterion
            new_boxes = boxes[boxes_in_crop, :]
            new_masks = masks[boxes_in_crop, :]
            new_labels = labels[boxes_in_crop]

            # Calculate bounding boxes' new coordinates in the crop
            new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2])  # crop[:2] is [left, top]
            new_boxes[:, :2] -= crop[:2]
            new_boxes[:, 2:] = torch.min(new_boxes[:, 2:], crop[2:])  # crop[2:] is [right, bottom]
            new_boxes[:, 2:] -= crop[:2]

            # Crop masks
            new_masks = new_masks[:, top:bottom, left:right]

            new_target = {}
            new_target['boxes'] = new_boxes
            new_target['labels'] = new_labels
            new_target['masks'] = new_masks
            new_target['image_name'] = target['image_name']

            new_image = F.to_pil_image(new_image)
            return new_image, new_target

        image = F.to_pil_image(image)
        return image, target
Beispiel #8
0
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_s,
                             shuffle=False,
                             num_workers=0)
    model = Unet(3, 21)
    model = model.cuda()

    model.load_state_dict(
        torch.load(args.path + '/Unet_results' + '/training_results.pt'))
    inputs, labels, predctions = test_model(model, test_loader)
    inputs = inputs.cpu()
    labels = labels.cpu()
    predctions = predctions.cpu()
    fig = plt.figure(figsize=(10, 10))
    plt.clf()
    columns = 3
    rows = batch_s
    for i in range(0, columns * rows):
        if i % 3 == 0:
            fig.add_subplot(rows, columns, i + 1)
            plt.imshow(to_pil_image(re_normalize((inputs[i // 3]))))
        if i % 3 == 1:
            fig.add_subplot(rows, columns, i + 1)
            plt.imshow((((labels[i // 3]))))
        if i % 3 == 2:
            fig.add_subplot(rows, columns, i + 1)
            plt.imshow((((predctions[i // 3]))))
    plt.show()
    plt.savefig(args.path + '/Unet_results' + '/test_pics.png',
                bbox_inches='tight')
Beispiel #9
0
data_loader = DataLoader(dataset, batch_size=16, shuffle=True)


class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.out = nn.Conv2d(3, 3, 1)

    def forward(self, x):
        return self.out(x)


model = Network().eval()

with torch.no_grad():
    image = next(iter(data_loader))
    image = image

    model.out.weight = torch.nn.Parameter(torch.tensor([[[[1]], [[0]], [[0]]],
                                                        [[[0]], [[1]], [[0]]],
                                                        [[[0]], [[0]],
                                                         [[1]]]]).float(),
                                          requires_grad=False)
    model.out.bias = torch.nn.Parameter(torch.tensor([0, 0, 0]).float(),
                                        requires_grad=False)
    image_filled = model(image)
    # save_image(denormalize(image), 'image.png')
    # save_image(denormalize(image_filled), 'manual.png')
    to_pil_image(make_grid(denormalize(image))).show()
    to_pil_image(make_grid(denormalize(image_filled))).show()
Beispiel #10
0
               Dict['TL_x']/W,Dict['TL_y']/H,Dict['TR_x']/W,Dict['TR_y']/H]
        norm_img= torch.FloatTensor(norm_img)
        kpt = torch.FloatTensor(kpt)
        return norm_img, kpt


# Do some checking and visualization
data = TrainData(ROOT_DIR + '/train.csv', ROOT_DIR + '/train_images')
print(len(data))    # should be 3000

img, kpt = data[0] # get a sample
print(img.size())   # should be [3, H, W]
print(img.max())    # should be <= 1.0
print(kpt.size())   # should be [8]

img = tf.to_pil_image(img) # convert tensor of shape (3, H, W) to PIL.Image
vis = draw_kpts(img, kpt, c='orange')
plt.imshow(vis)
plt.show()
#%%
class ConvBlock(nn.Module):
    def __init__(self, cin, cout):
        super().__init__() # necessary
        self.conv = nn.Conv2d(cin, cout, (3, 3), padding=1)
        self.bn = nn.BatchNorm2d(cout)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
Beispiel #11
0
def PIL_ShowTensor3(tensor1, tensor2, tensor3):
    pil_img1 = tvF.to_pil_image(tensor1)
    pil_img2 = tvF.to_pil_image(tensor2)
    pil_img3 = tvF.to_pil_image(tensor3)
    PIL_ShowPILImage3(pil_img1, pil_img2, pil_img3)
Beispiel #12
0
def PIL_ShowTensor2(tensor1, tensor2):
    pil_img1 = tvF.to_pil_image(tensor1)
    pil_img2 = tvF.to_pil_image(tensor2)
    PIL_ShowPILImage2(pil_img1, pil_img2)
Beispiel #13
0
def PIL_ShowTensor(tensor):
    pil_img = tvF.to_pil_image(tensor)
    fig = plt.figure()
    plt.imshow(pil_img)
    plt.show()
with torch.no_grad():
    for file in tqdm(source_files, desc='Generating images from source'):
        # load HR image
        input_img = Image.open(file)
        input_img = TF.to_tensor(input_img)

        # Resize HR image to clean it up and make sure it can be resized again
        resize2_img = utils.imresize(input_img, 1.0 / opt.cleanup_factor, True)
        _, w, h = resize2_img.size()
        w = w - w % opt.upscale_factor
        h = h - h % opt.upscale_factor
        resize2_cut_img = resize2_img[:, :w, :h]

        # Save resize2_cut_img as HR image for TDSR
        path = os.path.join(tdsr_hr_dir, os.path.basename(file))
        resize2_cut_img = TF.to_pil_image(resize2_cut_img)
        resize2_cut_img.save(path, 'PNG')

        # Generate resize3_cut_img and apply model
        kernel_path = kernel_paths[np.random.randint(0, kernel_num)]
        mat = loadmat(kernel_path)
        k = np.array([mat['Kernel']]).squeeze()
        resize3_cut_img = imresize(np.array(resize2_cut_img),
                                   scale_factor=1.0 / opt.upscale_factor,
                                   kernel=k)

        # Save resize3_cut_img as LR image for TDSR
        path = os.path.join(tdsr_lr_dir, os.path.basename(file))
        TF.to_pil_image(resize3_cut_img).save(path, 'PNG')

    for file in tqdm(target_files, desc='Generating images from target'):
Beispiel #15
0
 def replace_original(self, img: torch.Tensor, mask: torch.Tensor,
                      inpainted_resized: torch.Tensor) -> torch.Tensor:
     inpainted = F.to_pil_image(inpainted_resized).resize(
         (img.shape[-1], img.shape[-2]))
     return torch.where(mask == 1, F.to_tensor(inpainted), img)
Beispiel #16
0
    def __getitem__(self, idx):
        # pylint: disable=too-many-locals

        # Sample a random transformation
        rotation = np.random.uniform(-self._max_rotation_jitter,
                                     self._max_rotation_jitter)
        scale = np.exp(
            np.random.uniform(-self._max_scale_jitter, self._max_scale_jitter))
        shear = np.random.uniform(-self._max_shear_jitter,
                                  self._max_shear_jitter,
                                  size=2)

        # Compute the "extended" patch size. This is the size of the patch that
        # we will first transform and then center crop to the final size.
        extpatch_w, extpatch_h = self._compute_extended_patch_size(
            w=self._patch_w,
            h=self._patch_h,
            rotation=rotation,
            scale=scale,
            shear=shear,
        )

        # The slide may not be large enough for the extended patch size. In
        # this case, we will downscale the target patch size until the extended
        # patch size fits.
        adjmul = min(1.0, self._slide.W / extpatch_w,
                     self._slide.H / extpatch_h)
        extpatch_w = min(int(np.ceil(extpatch_w * adjmul)), self._slide.W)
        extpatch_h = min(int(np.ceil(extpatch_h * adjmul)), self._slide.H)
        patch_w = int(self._patch_w * adjmul)
        patch_h = int(self._patch_h * adjmul)

        # Extract the extended patch by sampling uniformly from the size of the
        # slide
        x, y = [
            np.random.randint(a - b + 1)
            for a, b in zip((self._slide.W, self._slide.H), (extpatch_w,
                                                             extpatch_h))
        ]
        image = self._slide.image[y:y + extpatch_h, x:x + extpatch_w]
        image = (255 * (image + 1) / 2).astype(np.uint8)
        image = to_pil_image(image)
        label = to_pil_image(self._slide.label[y:y + extpatch_h,
                                               x:x + extpatch_w])

        # Apply augmentations
        output_size = (max(extpatch_w, patch_w), max(extpatch_h, patch_h))
        transformation = _get_inverse_affine_matrix(
            center=(image.size[0] * 0.5, image.size[1] * 0.5),
            angle=rotation,
            translate=[(a - b) / 2 for a, b in zip(output_size, image.size)],
            scale=scale,
            shear=shear,
        )
        image = self.image_augmentation(image)
        image = np.array(
            image.transform(
                output_size,
                Image.AFFINE,
                transformation,
                resample=Image.BILINEAR,
            ))
        image = center_crop(image, (patch_h, patch_w))
        label = np.array(
            label.transform(
                output_size,
                Image.AFFINE,
                transformation,
                resample=Image.NEAREST,
            ))
        label = center_crop(label, (patch_h, patch_w))
        if np.random.rand() < 0.5:
            image = np.flip(image, 0).copy()
            label = np.flip(label, 0).copy()

        # Convert image to the correct data format (float32 in [-1, 1] and in
        # CHW order)
        image = 2 * image.astype(np.float32) / 255 - 1
        image = image.transpose(2, 0, 1)

        return self._slide.prepare_data(image, label)
Beispiel #17
0
def ycbcr_to_rgb(image: torch.Tensor) -> torch.Tensor:
    ycbcr_image = F.to_pil_image(image, mode='YCbCr')
    rgb_image = ycbcr_image.convert('RGB')
    rgb_tensor = F.to_tensor(rgb_image)
    return rgb_tensor
Beispiel #18
0
 def tensor2img(self, ts):
     img = np.asarray(F.to_pil_image(ts))
     return img
Beispiel #19
0
loss_func = roi_loss_func(roi_mask=None, towards_target=True)

gen_images = []
fig, axs = plt.subplots(len(alphas),
                        len(decays),
                        squeeze=False,
                        figsize=(len(decays) * 10, len(alphas) * 5))
for i, alpha in tqdm(enumerate(alphas)):
    for j, decay in enumerate(decays):
        gen_image, _, loss, losses = optimize(generator,
                                              encoder,
                                              target,
                                              loss_func,
                                              alpha=alpha,
                                              decay=decay)
        gen_images.append(to_pil_image(gen_image))

        axs[i, j].plot(range(len(losses)), losses)
        axs[i, j].set_title(
            'alpha: {:.3g}, decay: {:.3g}, min_loss: {:.0f}'.format(
                alpha, decay, loss))
        axs[i, j].set_xlabel('Iteration')
        axs[i, j].set_ylabel('Loss')


def make_grid(imgs, n_rows, pad):
    assert len(imgs) > 0
    n_cols = math.ceil(len(imgs) / n_rows)
    w, h = imgs[0].width, imgs[0].height
    grid = Image.new(imgs[0].mode,
                     (w * n_cols + pad * (n_cols - 1), h * n_rows + pad *
Beispiel #20
0
def eval_OSVOSNetNet():

    # Paths
    cfg = configparser.ConfigParser()
    cfg.read('settings.conf')

    if sys.platform == 'darwin':
        cfg_dataset = 'dataset_mac'
    elif sys.platform == 'linux':
        cfg_dataset = 'dataset_ubuntu'

    # Hyper parameters
    parser = argparse.ArgumentParser(description='PyTorch OSVOSNet Testing')
    parser.add_argument('-c',
                        '--checkpoint',
                        default=None,
                        type=str,
                        metavar='PATH',
                        help='Path to latest checkpoint (default: none).')
    parser.add_argument('-v',
                        '--video-name',
                        default=None,
                        type=str,
                        help='Test video name (default: none).')
    parser.add_argument(
        '-m',
        '--model-name',
        default='OSVOSNet',
        type=str,
        help=
        'Model name for the ouput segmentation, it will create a subfolder under the out_folder.'
    )
    parser.add_argument('-o',
                        '--out-folder',
                        default=os.path.join(cfg['paths'][cfg_dataset],
                                             'results/'),
                        type=str,
                        metavar='PATH',
                        help='Folder for the output segmentations.')
    parser.add_argument('-b',
                        '--benchmark',
                        action='store_true',
                        help='Evaluate the video with groundtruth.')
    parser.add_argument('--sample',
                        action='store_true',
                        help='The video sequence has been sampled.')
    args = parser.parse_args()

    print('Args:', args)

    if args.checkpoint is None:
        raise ValueError('Must input checkpoint path.')
    if args.video_name is None:
        raise ValueError('Must input video name.')

    water_thres = 0.5

    device = torch.device('cpu')
    if torch.cuda.is_available():
        device = torch.device('cuda')

    # Dataset
    dataset_args = {}
    if torch.cuda.is_available():
        dataset_args = {
            'num_workers': int(cfg['params_OSVOS']['num_workers']),
            'pin_memory': bool(cfg['params_OSVOS']['pin_memory'])
        }

    dataset = WaterDataset_RGB(mode='eval',
                               dataset_path=cfg['paths'][cfg_dataset],
                               test_case=args.video_name,
                               eval_size=(int(cfg['params_OSVOS']['eval_w']),
                                          int(cfg['params_OSVOS']['eval_h'])))
    eval_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              **dataset_args)

    # Model
    OSVOS_net = OSVOSNet()

    # Load pretrained model
    if os.path.isfile(args.checkpoint):
        print('Load checkpoint \'{}\''.format(args.checkpoint))
        if torch.cuda.is_available():
            checkpoint = torch.load(args.checkpoint)
        else:
            checkpoint = torch.load(args.checkpoint, map_location='cpu')
        args.start_epoch = checkpoint['epoch'] + 1
        OSVOS_net.load_state_dict(checkpoint['model'])
        print('Loaded checkpoint \'{}\' (epoch {})'.format(
            args.checkpoint, checkpoint['epoch']))
    else:
        raise ValueError('No checkpoint found at \'{}\''.format(
            args.checkpoint))

    # Set ouput path
    out_path = os.path.join(args.out_folder, args.model_name + '_segs',
                            args.video_name)
    if not os.path.exists(out_path):
        os.makedirs(out_path)

    if args.sample:
        out_full_path = out_path + '_full'
        if not os.path.exists(out_full_path):
            os.makedirs(out_full_path)

    # Start testing
    OSVOS_net.to(device).eval()
    running_time = AverageMeter()
    running_endtime = time.time()

    # First frame annotation
    pre_frame_mask = dataset.get_first_frame_label()
    eval_size = pre_frame_mask.shape[-2:]
    first_frame_seg = TF.to_pil_image(pre_frame_mask)
    first_frame_seg.save(os.path.join(out_path, '0.png'))
    if args.sample:
        first_frame_seg.save(os.path.join(out_full_path, '0.png'))
    pre_frame_mask = pre_frame_mask.unsqueeze(0).to(device)

    if args.benchmark:
        gt_folder = os.path.join(cfg['paths'][cfg_dataset], 'test_annots',
                                 args.video_name)
        gt_list = os.listdir(gt_folder)
        gt_list.sort(key=lambda x: (len(x), x))
        gt_list.pop(0)
    avg_iou = 0

    with torch.no_grad():
        for i, sample in enumerate(tqdm(eval_loader)):

            img = sample['img'].to(device)

            outputs = OSVOS_net(img)

            output = outputs[-1].detach()
            output = 1 / (1 + torch.exp(-output))
            # seg_raw = TF.to_pil_image(output.squeeze(0).cpu())
            # seg_raw.save(os.path.join(out_path, 'raw_%d.png' % (i + 1)))

            zero_tensor = torch.zeros(output.shape).to(device)
            one_tensor = torch.ones(output.shape).to(device)
            seg_tf = torch.where(output > water_thres, one_tensor, zero_tensor)
            seg = TF.to_pil_image(seg_tf.squeeze(0).cpu())

            if args.sample:
                seg.save(os.path.join(out_full_path, f'{i + 1}.png'))

                if i + 1 in [1, 50, 100, 150, 199]:
                    seg.save(os.path.join(out_path, f'{i + 1}.png'))

            else:
                seg.save(os.path.join(out_path, f'{i + 1}.png'))

            running_time.update(time.time() - running_endtime)
            running_endtime = time.time()

            # if args.benchmark:
            #     gt_seg = load_image_in_PIL(os.path.join(gt_folder, gt_list[i])).convert('L')
            #     gt_tf = TF.to_tensor(gt_seg).to(device).type(torch.int)

            #     iou = iou_tensor(seg_tf.squeeze(0).type(torch.int), gt_tf)
            #     avg_iou += iou.item()
            #     print('iou:', iou.item())

            # print('Segment: [{0:4}/{1:4}]\t'
            #     'Time: {running_time.val:.3f}s ({running_time.sum:.3f}s)\t'.format(
            #     i + 1, len(eval_loader), running_time=running_time))

    # if args.benchmark:
    #     print('total_iou:', avg_iou)
    #     avg_iou /= len(eval_loader)
    #     print('avg_iou:', avg_iou, 'frame_num:', len(eval_loader))

    if args.sample:
        mask_folder = args.video_name + '_full'
    else:
        mask_folder = args.video_name
    run_cvt_images_to_overlays(args.video_name, mask_folder,
                               cfg['paths'][cfg_dataset], args.model_name,
                               eval_size)
Beispiel #21
0
                                  fill_value=self.input_length,
                                  dtype=torch.long)
        target_length = torch.full(size=(1, ),
                                   fill_value=self.label_length,
                                   dtype=torch.long)
        return image, target, input_length, target_length


# 测试数据集输出
dataset = CaptchaDataset(characters, width, height, n_input_length, n_len,
                         TRAIN_DATASET_PATH)
print('dataset.length', dataset.length, 'dataset.label_length',
      dataset.label_length)
image, target, input_length, label_length = dataset[0]
print(''.join([characters[x] for x in target]), input_length, label_length)
to_pil_image(image)

batch_size = 128
# trans_set的length调一下
train_set = CaptchaDataset(characters=characters,
                           width=width,
                           height=height,
                           input_length=n_input_length,
                           label_length=n_len,
                           folder=TRAIN_DATASET_PATH)
# train_set = CaptchaDataset(characters = characters,length=100*batch_size, width=width, height=height, input_length=n_input_length, label_length=n_len,folder=TRAIN_DATASET_PATH)
# valid_set = CaptchaDataset(characters, 100 * batch_size, width, height, n_input_length, n_len)
# shuffle=True,drop_last=True
train_loader = DataLoader(train_set,
                          batch_size=batch_size,
                          num_workers=0,
Beispiel #22
0
        new_image = photometric_distort(new_image)

        # Convert PIL image to Torch tensor
        new_image = FT.to_tensor(new_image)

        # Expand image (zoom out) with a 50% chance - helpful for training detection of small objects
        # Fill surrounding space with the mean of ImageNet data that our base VGG was trained on
        if random.random() < 0.5:
            new_image, new_boxes = expand(new_image, boxes, filler=mean)

        # Randomly crop image (zoom in)
        new_image, new_boxes, new_labels, new_difficulties = random_crop(new_image, new_boxes, new_labels,
                                                                         new_difficulties)

        # Convert Torch tensor to PIL image
        new_image = FT.to_pil_image(new_image)

        # Flip image with a 50% chance
        if random.random() < 0.5:
            new_image, new_boxes = flip(new_image, new_boxes)

    # Resize image to (300, 300) - this also converts absolute boundary coordinates to their fractional form
    new_image, new_boxes = resize(new_image, new_boxes, dims=(300, 300))

    # Convert PIL image to Torch tensor
    new_image = FT.to_tensor(new_image)

    # Normalize by mean and standard deviation of ImageNet data that our base VGG was trained on
    new_image = FT.normalize(new_image, mean=mean, std=std)

    return new_image, new_boxes, new_labels, new_difficulties
Beispiel #23
0
def make_pil_images(*args, **kwargs):
    for image in make_vanilla_tensor_images(*args, **kwargs):
        yield to_pil_image(image)
Beispiel #24
0
def save_test_preds(dir_, preds):
	dir_ = Path(dir_)
	for i, im in enumerate(preds):
		im = transforms_f.to_pil_image(im.data, mode="L")
		im.save(dir_ / f"{i}.jpg")
Beispiel #25
0
 def _postprocess_img(self, img):
     x = self._post_proc_op(img=img)
     x = torch.clamp(x, 0, 1)  # NST might kick values into illegal areas
     return FT.to_pil_image(x)
Beispiel #26
0
 def unconvert(self, tensor):
     return tr.to_pil_image(
         denormalize_pixels(tensor.clone(), self.mean, self.stddev), 'RGB')
Beispiel #27
0
    def test_random_affine(self):

        with self.assertRaises(ValueError):
            transforms.RandomAffine(-0.7)
            transforms.RandomAffine([-0.7])
            transforms.RandomAffine([-0.7, 0, 0.7])

            transforms.RandomAffine([-90, 90], translate=2.0)
            transforms.RandomAffine([-90, 90], translate=[-1.0, 1.0])
            transforms.RandomAffine([-90, 90], translate=[-1.0, 0.0, 1.0])

            transforms.RandomAffine([-90, 90],
                                    translate=[0.2, 0.2],
                                    scale=[0.0])
            transforms.RandomAffine([-90, 90],
                                    translate=[0.2, 0.2],
                                    scale=[-1.0, 1.0])
            transforms.RandomAffine([-90, 90],
                                    translate=[0.2, 0.2],
                                    scale=[0.5, -0.5])
            transforms.RandomAffine([-90, 90],
                                    translate=[0.2, 0.2],
                                    scale=[0.5, 3.0, -0.5])

            transforms.RandomAffine([-90, 90],
                                    translate=[0.2, 0.2],
                                    scale=[0.5, 0.5],
                                    shear=-7)
            transforms.RandomAffine([-90, 90],
                                    translate=[0.2, 0.2],
                                    scale=[0.5, 0.5],
                                    shear=[-10])
            transforms.RandomAffine([-90, 90],
                                    translate=[0.2, 0.2],
                                    scale=[0.5, 0.5],
                                    shear=[-10, 0, 10])

        x = np.zeros((100, 100, 3), dtype=np.uint8)
        img = F.to_pil_image(x)

        t = transforms.RandomAffine(10,
                                    translate=[0.5, 0.3],
                                    scale=[0.7, 1.3],
                                    shear=[-10, 10])
        for _ in range(100):
            angle, translations, scale, shear = t.get_params(t.degrees,
                                                             t.translate,
                                                             t.scale,
                                                             t.shear,
                                                             img_size=img.size)
            assert -10 < angle < 10
            assert -img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5, \
                "{} vs {}".format(translations[0], img.size[0] * 0.5)
            assert -img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5, \
                "{} vs {}".format(translations[1], img.size[1] * 0.5)
            assert 0.7 < scale < 1.3
            assert -10 < shear < 10

        # Checking if RandomAffine can be printed as string
        t.__repr__()

        t = transforms.RandomAffine(10, resample=Image.BILINEAR)
        assert "Image.BILINEAR" in t.__repr__()
Beispiel #28
0
    def __getitem__(self, idx):
        imgA = self.imgA[idx]
        imgB = self.imgB[idx]
        imgC = self.imgC[idx]

        if not self.is_test:
            imgB = cv2.resize(imgB, (352, 352))
        img_label = imgB
        # print(np.unique(img_label))
        # make data augmentation here
        if self.is_data_augment:
            # convert to pil format so we can data augment them
            img_label = np.expand_dims(img_label, -1)
            pil_imgA = TF.to_pil_image(imgA)
            pil_img_label = TF.to_pil_image(img_label)
            pil_imgC = TF.to_pil_image(imgC)

            if random.random() > 0.5:
                # random cropping
                crop_size = int(min(imgA.shape[:2]) * 0.8)
                i, j, w, h = transforms.RandomCrop.get_params(
                    pil_imgA, output_size=(crop_size, crop_size))
                pil_imgA = TF.crop(pil_imgA, i, j, w, h)
                pil_img_label = TF.crop(pil_img_label, i, j, w, h)
                pil_imgC = TF.crop(pil_imgC, i, j, w, h)

            # -- data augmentation --
            # Random horizontal flipping
            if random.random() > 0.5:
                pil_imgA = TF.hflip(pil_imgA)
                pil_img_label = TF.hflip(pil_img_label)
                pil_imgC = TF.hflip(pil_imgC)

            # Random vertical flipping
            if random.random() > 0.5:
                pil_imgA = TF.vflip(pil_imgA)
                pil_img_label = TF.vflip(pil_img_label)
                pil_imgC = TF.vflip(pil_imgC)

            # random cutout
            if self.random_cutout:
                if random.random() > 0.5:
                    cutout_size = int(min(imgA.shape[:2]) * self.random_cutout)
                    i, j, w, h = transforms.RandomCrop.get_params(
                        pil_imgA,
                        output_size=(random.randint(0, cutout_size),
                                     random.randint(0, cutout_size)))
                    color_code = random.randint(0, 255)
                    rect = Image.new('RGB', (w, h),
                                     (color_code, color_code, color_code))
                    pil_imgA.paste(rect, (i, j))

            pil_imgA = pil_imgA.resize((352, 352))
            pil_img_label = pil_img_label.resize((352, 352))
            pil_imgC = pil_imgC.resize((352, 352))

            # convert pil back to numpy
            imgA = np.array(pil_imgA)
            img_label = np.array(pil_img_label)
            imgC = np.array(pil_imgC)

        img_label_onehot = (np.arange(
            self.num_class) == img_label[..., None]).astype(float)
        img_label_onehot = img_label_onehot.transpose(2, 0,
                                                      1)  # n_class * w * H

        # label smoothing
        if self.is_label_smooth:
            img_label_onehot[0] = img_label_onehot[
                0] * 0.9  # since there are so many labels on the first axis, we smooth it

        onehot_label = torch.FloatTensor(img_label_onehot)
        if self.transform:
            imgA = self.transform(imgA)
            imgC = self.transform(imgC)

        # return imgA, imgC, onehot_label, img_name
        return imgA, imgC, onehot_label, []
Beispiel #29
0
def predict(ckpt_root, selected_defects, aug_params, model_kwargs, name_dict):
    # determine training device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # load images
    test_loader = get_test_dataloader('sensitivity/test_imgs',
                                      transforms.ToTensor(),
                                      test_batch_size=1)

    # prepare model
    model = restore_model(ckpt_root, 100, selected_defects, 'shufflenet',
                          'eval', device, **model_kwargs)

    results = dict()
    for i, data in enumerate(test_loader):
        # load image
        images = data['image']
        image_ids = data['img_id']
        assert images.size()[0] == 1
        image_pil = TF.to_pil_image(images[0])
        image_id = image_ids[0]

        for name in augs:
            if name not in results:
                results[name] = [[] for _ in range(len(augs[name]['params']))]
            for k, param in enumerate(augs[name]['params']):
                print('image #%02d, %s, %.3f' % (i, name, param))
                # apply augmentation to the image
                image = augs[name]['method'](image_pil, param)

                # convert to tensor and resize
                image = TF.to_tensor(image)
                image = image.unsqueeze_(0).to(device)
                image = F.interpolate(image, size=(224, 224), mode='area')

                # save the augmented image
                save_dir = 'sensitivity/%s' % name
                img_savepath = '%s/%s_%.3f.png' % (
                    save_dir, image_id.split('/')[-1].split('.')[0], param)
                if not os.path.isfile(img_savepath):
                    makedirs_if_not_exists(save_dir)
                    image_ = TF.to_pil_image(image.data.cpu()[0])
                    image_.save(img_savepath)

                # gather model prediction
                outputs = model(image)
                if model_kwargs['use_softmax_classifier']:
                    outputs = torch.cat([
                        outputs[0][:2].flatten(), outputs[1].flatten(),
                        outputs[0][2:].flatten()
                    ],
                                        dim=0)
                    outputs = score_convert_softmax_cls(
                        outputs, selected_defects, 2)
                else:
                    outputs = score_convert_reg(outputs, selected_defects, 2)
                outputs = outputs.data.cpu().numpy()
                print(outputs)
                idx = DEFECT_NAMES.index(name_dict[name])
                idx = selected_defects.index(idx)
                results[name][k].append(outputs[idx])

    # plot graph
    for name in augs:
        r = np.array(results[name])
        r = np.mean(r, axis=1)
        fig, ax = plt.subplots()
        ax.plot(augs[name]['params'], r)
        ax.set(xlabel='factor', ylabel='score', title=name)
        ax.grid()
        fig.savefig('sensitivity/%s.png' % name)
        plt.clf()
Beispiel #30
0
    def test_affine(self):
        input_img = np.zeros((200, 200, 3), dtype=np.uint8)
        pts = []
        cnt = [100, 100]
        for pt in [(80, 80), (100, 80), (100, 100)]:
            for i in range(-5, 5):
                for j in range(-5, 5):
                    input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55]
                    pts.append((pt[0] + i, pt[1] + j))
        pts = list(set(pts))

        with self.assertRaises(TypeError):
            F.affine(input_img, 10)

        pil_img = F.to_pil_image(input_img)

        def _to_3x3_inv(inv_result_matrix):
            result_matrix = np.zeros((3, 3))
            result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3))
            result_matrix[2, 2] = 1
            return np.linalg.inv(result_matrix)

        def _test_transformation(a, t, s, sh):
            a_rad = math.radians(a)
            s_rad = math.radians(sh)
            # 1) Check transformation matrix:
            c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]], [0.0, 0.0, 1.0]])
            c_inv_matrix = np.linalg.inv(c_matrix)
            t_matrix = np.array([[1.0, 0.0, t[0]],
                                 [0.0, 1.0, t[1]],
                                 [0.0, 0.0, 1.0]])
            r_matrix = np.array([[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0],
                                 [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0],
                                 [0.0, 0.0, 1.0]])
            true_matrix = np.dot(t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix)))
            result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a,
                                                                     translate=t, scale=s, shear=sh))
            assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10
            # 2) Perform inverse mapping:
            true_result = np.zeros((200, 200, 3), dtype=np.uint8)
            inv_true_matrix = np.linalg.inv(true_matrix)
            for y in range(true_result.shape[0]):
                for x in range(true_result.shape[1]):
                    res = np.dot(inv_true_matrix, [x, y, 1])
                    _x = int(res[0] + 0.5)
                    _y = int(res[1] + 0.5)
                    if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]:
                        true_result[y, x, :] = input_img[_y, _x, :]

            result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh)
            assert result.size == pil_img.size
            # Compute number of different pixels:
            np_result = np.array(result)
            n_diff_pixels = np.sum(np_result != true_result) / 3
            # Accept 3 wrong pixels
            assert n_diff_pixels < 3, \
                "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\
                "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0]))

        # Test rotation
        a = 45
        _test_transformation(a=a, t=(0, 0), s=1.0, sh=0.0)

        # Test translation
        t = [10, 15]
        _test_transformation(a=0.0, t=t, s=1.0, sh=0.0)

        # Test scale
        s = 1.2
        _test_transformation(a=0.0, t=(0.0, 0.0), s=s, sh=0.0)

        # Test shear
        sh = 45.0
        _test_transformation(a=0.0, t=(0.0, 0.0), s=1.0, sh=sh)

        # Test rotation, scale, translation, shear
        for a in range(-90, 90, 25):
            for t1 in range(-10, 10, 5):
                for s in [0.75, 0.98, 1.0, 1.1, 1.2]:
                    for sh in range(-15, 15, 5):
                        _test_transformation(a=a, t=(t1, t1), s=s, sh=sh)
Beispiel #31
0
def tensor_to_pil(tensor):
    if len(tensor.shape) == 4:
        return F.to_pil_image(tensor[0])
    else:
        return F.to_pil_image(tensor)
Beispiel #32
0
    def __getitem__(self, idx):
        """Returns a pair of images with the given identifier. This is lazy loading
        of data into memory. Only those image pairs needed for the current batch
        are loaded.

        :param idx: image pair identifier
        :returns: dictionary containing input and output images and their identifier
        :rtype: dictionary

        """
        while True:

            if (self.is_inference) or (self.is_valid):

                input_img = util.ImageProcessing.load_image(
                    self.data_dict[idx]['input_img'], normaliser=self.normaliser)
                output_img = util.ImageProcessing.load_image(
                    self.data_dict[idx]['output_img'], normaliser=self.normaliser)

                if self.normaliser==1:
                    input_img = input_img.astype(np.uint8)
                    output_img = output_img.astype(np.uint8)

                input_img = TF.to_pil_image(input_img)
                input_img = TF.to_tensor(input_img)
                output_img = TF.to_pil_image(output_img)
                output_img = TF.to_tensor(output_img)

                if input_img.shape[1]==output_img.shape[2]:
                    output_img=output_img.permute(0,2,1)

                return {'input_img': input_img, 'output_img': output_img,
                        'name': self.data_dict[idx]['input_img'].split("/")[-1]}

            elif idx in self.data_dict:

                output_img = util.ImageProcessing.load_image(
                    self.data_dict[idx]['output_img'], normaliser=self.normaliser)
                input_img = util.ImageProcessing.load_image(
                    self.data_dict[idx]['input_img'], normaliser=self.normaliser)

                if self.normaliser==1:
                    input_img = input_img.astype(np.uint8)
                    output_img = output_img.astype(np.uint8)

                input_img = TF.to_pil_image(input_img)
                output_img = TF.to_pil_image(output_img)
                
                if not self.is_valid:

                    if random.random()>0.5:

                        # Random horizontal flipping
                        if random.random() > 0.5:
                            input_img = TF.hflip(input_img)
                            output_img = TF.hflip(output_img)

                        # Random vertical flipping
                        if random.random() > 0.5:
                            input_img = TF.vflip(input_img)
                            output_img = TF.vflip(output_img)

                # Transform to tensor
                #print(output_img.shape)
                #plt.imsave("./"+self.data_dict[idx]['input_img'].split("/")[-1]+".png", output_img,format='png')
                input_img = TF.to_tensor(input_img)
                output_img = TF.to_tensor(output_img)
                
                return {'input_img': input_img, 'output_img': output_img,
                        'name': self.data_dict[idx]['input_img'].split("/")[-1]}
Beispiel #33
0
    print(f"Processing ({i + 1}/{len(files)}): {filepath}")

    dirname = path.dirname(filepath)
    filename = path.basename(filepath)
    basename, ext = path.splitext(filename)

    img = I.open(filepath)
    tensor = TF.to_tensor(img)
    C, H, W = tensor.size()
    print(f"Image size is {H} x {W}")

    tensor = tensor.view(C, H * W).permute(1, 0)
    dists = pairwise_distance(centroids, tensor)
    nearest = 1 - F.softmax(dists, dim=0)
    probmap = nearest.view(NC, H, W)

    probmap_save = path.join(dirname, "prob", basename) + ".pth"
    prepare_folder(probmap_save)
    torch.save(probmap, probmap_save)
    print(f"probablity_map: {probmap_save}")

    maxclass = probmap.argmax(dim=0)
    refined_colormap = centroids[maxclass, :].permute(2, 0, 1)

    colormap_save = path.join(dirname, "refined", basename) + ".png"
    prepare_folder(colormap_save)
    TF.to_pil_image(refined_colormap).save(colormap_save)

    print()

# print(refined_colormap.size())