def test_4_channel_tensor_to_pil_image(self): def verify_img_data(img_data, expected_output, mode): if mode is None: img = transform.ToPILImage()(img_data) self.assertEqual(img.mode, 'RGBA') # default should assume RGBA else: img = transform.ToPILImage(mode=mode)(img_data) self.assertEqual(img.mode, mode) split = img.split() for i in range(4): self.assertTrue( np.allclose(expected_output[:, :, i], transform.to_tensor(split[i])[0])) img_data = jt.random((4, 4, 4)) expected_output = img_data.multiply(255).int().float().divide(255) for mode in [None, 'RGBA', 'CMYK', 'RGBX']: verify_img_data(img_data, expected_output, mode) with self.assertRaises(ValueError): # should raise if we try a mode for 3 or 1 or 2 channel images transform.ToPILImage(mode='RGB')(img_data) transform.ToPILImage(mode='P')(img_data) transform.ToPILImage(mode='LA')(img_data)
def test_2d_tensor_to_pil_image(self): to_tensor = transform.ToTensor() img_data_float = jt.array(np.random.rand(4, 4), dtype='float32') img_data_byte = jt.array(np.random.randint(0, 255, (4, 4)), dtype='uint8') img_data_short = jt.array(np.random.randint(0, 32767, (4, 4)), dtype='int16') img_data_int = jt.array(np.random.randint(0, 2147483647, (4, 4)), dtype='int32') inputs = [img_data_float, img_data_byte, img_data_short, img_data_int] expected_outputs = [ img_data_float.multiply(255).int().float().divide(255).numpy(), img_data_byte.float().divide(255.0).numpy(), img_data_short.numpy(), img_data_int.numpy() ] expected_modes = ['F', 'L', 'I;16', 'I'] for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes): for t in [transform.ToPILImage(), transform.ToPILImage(mode=mode)]: img = t(img_data) self.assertEqual(img.mode, mode) self.assertTrue( np.allclose(expected_output, to_tensor(img), atol=0.01, rtol=0.01))
def test_1_channel_tensor_to_pil_image(self): to_tensor = transform.ToTensor() shape = (4, 4, 1) img_data_float = jt.array(np.random.rand(*shape), dtype='float32') img_data_byte = jt.array(np.random.randint(0, 255, shape), dtype='uint8') img_data_short = jt.array(np.random.randint(0, 32767, shape), dtype='int16') img_data_int = jt.array(np.random.randint(0, 2147483647, shape), dtype='int32') inputs = [img_data_float, img_data_byte, img_data_short, img_data_int] expected_outputs = [ img_data_float.multiply(255).int().float().divide(255).numpy(), img_data_byte.float().divide(255.0).numpy(), img_data_short.numpy(), img_data_int.numpy() ] expected_modes = ['F', 'L', 'I;16', 'I'] for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes): for t in [transform.ToPILImage(), transform.ToPILImage(mode=mode)]: img = t(img_data) self.assertEqual(img.mode, mode) np.testing.assert_allclose(expected_output[:, :, 0], to_tensor(img)[0], atol=0.01) # 'F' mode for torch.FloatTensor img_F_mode = transform.ToPILImage(mode='F')(img_data_float) self.assertEqual(img_F_mode.mode, 'F')
def test_ndarray_bad_types_to_pil_image(self): trans = transform.ToPILImage() with self.assertRaises(TypeError): trans(np.ones([4, 4, 1], np.int64)) trans(np.ones([4, 4, 1], np.uint16)) trans(np.ones([4, 4, 1], np.uint32)) trans(np.ones([4, 4, 1], np.float64)) with self.assertRaises(ValueError): transform.ToPILImage()(np.ones([1, 4, 4, 3]))
def verify_img_data(img_data, mode): if mode is None: img = transform.ToPILImage()(img_data) self.assertEqual(img.mode, 'RGB') # default should assume RGB else: img = transform.ToPILImage(mode=mode)(img_data) self.assertEqual(img.mode, mode) split = img.split() for i in range(3): self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
def verify_img_data(img_data, expected_output, mode): if mode is None: img = transform.ToPILImage()(img_data) self.assertEqual(img.mode, 'LA') # default should assume LA else: img = transform.ToPILImage(mode=mode)(img_data) self.assertEqual(img.mode, mode) split = img.split() for i in range(2): self.assertTrue( np.allclose(expected_output[:, :, i], transform.to_tensor(split[i])))
def test_2d_ndarray_to_pil_image(self): img_data_float = np.random.rand(4, 4).astype(np.float32) img_data_byte = np.random.randint(0, 255, (4, 4)).astype(np.uint8) img_data_short = np.random.randint(0, 32767, (4, 4)).astype(np.int16) img_data_int = np.random.randint(0, 2147483647, (4, 4)).astype(np.int32) inputs = [img_data_float, img_data_byte, img_data_short, img_data_int] expected_modes = ['F', 'L', 'I;16', 'I'] for img_data, mode in zip(inputs, expected_modes): for t in [transform.ToPILImage(), transform.ToPILImage(mode=mode)]: img = t(img_data) self.assertEqual(img.mode, mode) self.assertTrue(np.allclose(img_data, img))
def test_crop(self): height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 oheight = random.randint(5, (height - 2) / 2) * 2 owidth = random.randint(5, (width - 2) / 2) * 2 img = np.ones([height, width, 3]) oh1 = (height - oheight) // 2 ow1 = (width - owidth) // 2 # imgnarrow = img[oh1:oh1 + oheight, ow1:ow1 + owidth, :] # imgnarrow.fill(0) img[oh1:oh1 + oheight, ow1:ow1 + owidth, :] = 0 # img = jt.array(img) result = transform.Compose([ transform.ToPILImage(), transform.CenterCrop((oheight, owidth)), transform.ToTensor(), ])(img) self.assertEqual( result.sum(), 0, f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}" ) oheight += 1 owidth += 1 result = transform.Compose([ transform.ToPILImage(), transform.CenterCrop((oheight, owidth)), transform.ToTensor(), ])(img) sum1 = result.sum() # TODO: not pass # self.assertGreater(sum1, 1, # f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}") oheight += 1 owidth += 1 result = transform.Compose([ transform.ToPILImage(), transform.CenterCrop((oheight, owidth)), transform.ToTensor(), ])(img) sum2 = result.sum() self.assertGreater( sum2, 0, f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}" ) self.assertGreaterEqual( sum2, sum1, f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}" )
def test_random_horizontal_flip(self): random_state = random.getstate() random.seed(42) img = transform.ToPILImage()(jt.random((3, 10, 10))) himg = img.transpose(Image.FLIP_LEFT_RIGHT) num_samples = 250 num_horizontal = 0 for _ in range(num_samples): out = transform.RandomHorizontalFlip()(img) if out == himg: num_horizontal += 1 p_value = stats.binom_test(num_horizontal, num_samples, p=0.5) random.setstate(random_state) self.assertGreater(p_value, 0.0001) num_samples = 250 num_horizontal = 0 for _ in range(num_samples): out = transform.RandomHorizontalFlip(p=0.7)(img) if out == himg: num_horizontal += 1 p_value = stats.binom_test(num_horizontal, num_samples, p=0.7) random.setstate(random_state) self.assertGreater(p_value, 0.0001)
def build_transform(self): """ Creates a basic transformation that was used to train the models """ cfg = self.cfg # we are loading images with OpenCV, so we don't need to convert them # to BGR, they are already! So all we need to do is to normalize # by 255 if we want to convert to BGR255 format, or flip the channels # if we want it to be in RGB in [0-1] range. if cfg.INPUT.TO_BGR255: to_bgr_transform = T.Lambda(lambda x: x * 255) else: to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) normalize_transform = T.ImageNormalize( mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD ) min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST transform = T.Compose( [ T.ToPILImage(), Resize(min_size, max_size), T.ToTensor(), to_bgr_transform, normalize_transform, ] ) return transform
def test_TenCrop(self): img = jt.random((30, 40, 3)) result = transform.Compose([ transform.ToPILImage(), transform.TenCrop(20), transform.ToTensor(), ])(img)
def test_RandomAffine(self): img = jt.random((30, 40, 3)) result = transform.Compose([ transform.ToPILImage(), transform.RandomAffine(20), transform.ToTensor(), ])(img)
def test_RandomPerspective(self): img = jt.random((30, 40, 3)) result = transform.Compose([ transform.ToPILImage(), transform.RandomPerspective(p=1), transform.ToTensor(), ])(img)
def test_random_choice(self): random_state = random.getstate() random.seed(42) random_choice_transform = transform.RandomChoice([ transform.Resize(15), transform.Resize(20), transform.CenterCrop(10) ]) img = transform.ToPILImage()(jt.random((25, 25, 3))) num_samples = 250 num_resize_15 = 0 num_resize_20 = 0 num_crop_10 = 0 for _ in range(num_samples): out = random_choice_transform(img) if out.size == (15, 15): num_resize_15 += 1 elif out.size == (20, 20): num_resize_20 += 1 elif out.size == (10, 10): num_crop_10 += 1 p_value = stats.binom_test(num_resize_15, num_samples, p=0.33333) self.assertGreater(p_value, 0.0001) p_value = stats.binom_test(num_resize_20, num_samples, p=0.33333) self.assertGreater(p_value, 0.0001) p_value = stats.binom_test(num_crop_10, num_samples, p=0.33333) self.assertGreater(p_value, 0.0001) random.setstate(random_state)
def test_resize(self): height = random.randint(24, 32) * 2 width = random.randint(24, 32) * 2 osize = random.randint(5, 12) * 2 img = jt.ones([height, width, 3]) result = transform.Compose([ transform.ToPILImage(), transform.Resize(osize), transform.ToTensor(), ])(img) self.assertIn(osize, result.shape) if height < width: self.assertLessEqual(result.shape[1], result.shape[2]) elif width < height: self.assertGreaterEqual(result.shape[1], result.shape[2]) result = transform.Compose([ transform.ToPILImage(), transform.Resize([osize, osize]), transform.ToTensor(), ])(img) self.assertIn(osize, result.shape) self.assertEqual(result.shape[1], osize) self.assertEqual(result.shape[2], osize) oheight = random.randint(5, 12) * 2 owidth = random.randint(5, 12) * 2 result = transform.Compose([ transform.ToPILImage(), transform.Resize((oheight, owidth)), transform.ToTensor(), ])(img) self.assertEqual(result.shape[1], oheight) self.assertEqual(result.shape[2], owidth) result = transform.Compose([ transform.ToPILImage(), transform.Resize([oheight, owidth]), transform.ToTensor(), ])(img) self.assertEqual(result.shape[1], oheight) self.assertEqual(result.shape[2], owidth)
def test_to_tensor(self): test_channels = [1, 3, 4] height, width = 4, 4 trans = transform.ToTensor() with self.assertRaises(TypeError): trans(np.random.rand(1, height, width).tolist()) with self.assertRaises(ValueError): trans(np.random.rand(height)) trans(np.random.rand(1, 1, height, width)) for channels in test_channels: input_data = np.random.randint( low=0, high=255, size=(height, width, channels)).astype( np.float32) / np.float32(255.0) img = transform.ToPILImage()(input_data) output = trans(img) expect = input_data.transpose(2, 0, 1) self.assertTrue(np.allclose(expect, output), f"{expect.shape}\n{output.shape}") ndarray = np.random.randint(low=0, high=255, size=(channels, height, width)).astype(np.uint8) output = trans(ndarray) expected_output = ndarray / 255.0 np.testing.assert_allclose(output, expected_output) ndarray = np.random.rand(channels, height, width).astype(np.float32) output = trans(ndarray) expected_output = ndarray self.assertTrue(np.allclose(output, expected_output)) # separate test for mode '1' PIL images input_data = np.random.binomial(1, 0.5, size=(height, width, 1)).astype(np.uint8) img = transform.ToPILImage()(input_data * 255).convert('1') output = trans(img) self.assertTrue(np.allclose(input_data[:, :, 0], output[0]), f"{input_data.shape}\n{output.shape}")
def test_3_channel_ndarray_to_pil_image(self): def verify_img_data(img_data, mode): if mode is None: img = transform.ToPILImage()(img_data) self.assertEqual(img.mode, 'RGB') # default should assume RGB else: img = transform.ToPILImage(mode=mode)(img_data) self.assertEqual(img.mode, mode) split = img.split() for i in range(3): self.assertTrue(np.allclose(img_data[:, :, i], split[i])) img_data = np.random.randint(0, 255, (4, 4, 3)).astype(np.uint8) for mode in [None, 'RGB', 'HSV', 'YCbCr']: verify_img_data(img_data, mode) with self.assertRaises(ValueError): # should raise if we try a mode for 4 or 1 or 2 channel images transform.ToPILImage(mode='RGBA')(img_data) transform.ToPILImage(mode='P')(img_data) transform.ToPILImage(mode='LA')(img_data)
def test_not_pil_image(self): img = jt.random((30, 40, 3)) result = transform.Compose([ transform.RandomAffine(20), transform.ToTensor(), ])(img) img = jt.random((30, 40, 3)) result = transform.Compose([ transform.ToPILImage(), transform.Gray(), transform.Resize(20), transform.ToTensor(), ])(img)
def test_random_crop(self): height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 oheight = random.randint(5, (height - 2) / 2) * 2 owidth = random.randint(5, (width - 2) / 2) * 2 img = np.ones((height, width, 3)) result = transform.Compose([ transform.ToPILImage(), transform.RandomCrop((oheight, owidth)), transform.ToTensor(), ])(img) self.assertEqual(result.shape[1], oheight) self.assertEqual(result.shape[2], owidth) result = transform.Compose([ transform.ToPILImage(), transform.RandomCrop((oheight, owidth)), transform.ToTensor(), ])(img) self.assertEqual(result.shape[1], oheight) self.assertEqual(result.shape[2], owidth) result = transform.Compose([ transform.ToPILImage(), transform.RandomCrop((height, width)), transform.ToTensor() ])(img) self.assertEqual(result.shape[1], height) self.assertEqual(result.shape[2], width) self.assertTrue(np.allclose(img, result.transpose(1, 2, 0))) with self.assertRaises(AssertionError): result = transform.Compose([ transform.ToPILImage(), transform.RandomCrop((height + 1, width + 1)), transform.ToTensor(), ])(img)
def test_random_order(self): random_state = random.getstate() random.seed(42) random_order_transform = transform.RandomOrder( [transform.Resize(20), transform.CenterCrop(10)]) img = transform.ToPILImage()(jt.random((3, 25, 25))) num_samples = 250 num_normal_order = 0 resize_crop_out = transform.CenterCrop(10)(transform.Resize(20)(img)) for _ in range(num_samples): out = random_order_transform(img) if out == resize_crop_out: num_normal_order += 1 p_value = stats.binom_test(num_normal_order, num_samples, p=0.5) random.setstate(random_state) self.assertGreater(p_value, 0.0001)
def build_transform(): if cfg.INPUT.TO_BGR255: to_bgr_transform = T.Lambda(lambda x: x * 255) else: to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) normalize_transform = T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST transform = T.Compose([ T.ToPILImage(), Resize(min_size, max_size), T.ToTensor(), to_bgr_transform, normalize_transform, ]) return transform
def test_random_apply(self): random_state = random.getstate() random.seed(42) random_apply_transform = transform.RandomApply([ transform.RandomHorizontalFlip(), transform.RandomVerticalFlip(), ], p=0.4) img = transform.ToPILImage()(jt.random((3, 10, 10))) num_samples = 250 num_applies = 0 for _ in range(num_samples): out = random_apply_transform(img) if out != img: num_applies += 1 p_value = stats.binom_test(num_applies, num_samples, p=0.3) random.setstate(random_state) self.assertGreater(p_value, 0.0001)
init_step = int(math.log2(args.init_size) - 2) max_step = int(math.log2(max_size) - 2) nsteps = max_step - init_step + 1 lr = 1e-3 mixing = True code_size = 512 batch_size = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16} batch_default = 32 phase = 150_000 max_iter = 100_000 transform = transform.Compose([ transform.ToPILImage(), transform.RandomHorizontalFlip(), transform.ToTensor(), transform.ImageNormalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) netG = StyledGenerator(code_dim=code_size) netD = Discriminator(from_rgb_activate=True) g_running = StyledGenerator(code_size) g_running.eval() d_optimizer = jt.optim.Adam(netD.parameters(), lr=lr, betas=(0.0, 0.99)) g_optimizer = jt.optim.Adam(netG.generator.parameters(), lr=lr, betas=(0.0, 0.99)) g_optimizer.add_param_group({
def test_tensor_bad_types_to_pil_image(self): with self.assertRaises(ValueError): transform.ToPILImage()(jt.ones((1, 3, 4, 4)))