def test_rotate(self): x = np.zeros((100, 100, 3), dtype=np.uint8) x[40, 40] = [255, 255, 255] with self.assertRaises(TypeError): F.rotate(x, 10) img = F.to_pil_image(x) result = F.rotate(img, 45) assert result.size == (100, 100) r, c, ch = np.where(result) assert all(x in r for x in [49, 50]) assert all(x in c for x in [36]) assert all(x in ch for x in [0, 1, 2]) result = F.rotate(img, 45, expand=True) assert result.size == (142, 142) r, c, ch = np.where(result) assert all(x in r for x in [70, 71]) assert all(x in c for x in [57]) assert all(x in ch for x in [0, 1, 2]) result = F.rotate(img, 45, center=(40, 40)) assert result.size == (100, 100) r, c, ch = np.where(result) assert all(x in r for x in [40]) assert all(x in c for x in [40]) assert all(x in ch for x in [0, 1, 2]) result_a = F.rotate(img, 90) result_b = F.rotate(img, -270) assert np.all(np.array(result_a) == np.array(result_b))
def test_random_affine(self): with self.assertRaises(ValueError): transforms.RandomAffine(-0.7) transforms.RandomAffine([-0.7]) transforms.RandomAffine([-0.7, 0, 0.7]) transforms.RandomAffine([-90, 90], translate=2.0) transforms.RandomAffine([-90, 90], translate=[-1.0, 1.0]) transforms.RandomAffine([-90, 90], translate=[-1.0, 0.0, 1.0]) transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.0]) transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[-1.0, 1.0]) transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, -0.5]) transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 3.0, -0.5]) transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=-7) transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10]) transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10]) x = np.zeros((100, 100, 3), dtype=np.uint8) img = F.to_pil_image(x) t = transforms.RandomAffine(10, translate=[0.5, 0.3], scale=[0.7, 1.3], shear=[-10, 10]) for _ in range(100): angle, translations, scale, shear = t.get_params(t.degrees, t.translate, t.scale, t.shear, img_size=img.size) assert -10 < angle < 10 assert -img.size[0] * 0.5 <= translations[0] <= img.size[0] * 0.5, \ "{} vs {}".format(translations[0], img.size[0] * 0.5) assert -img.size[1] * 0.5 <= translations[1] <= img.size[1] * 0.5, \ "{} vs {}".format(translations[1], img.size[1] * 0.5) assert 0.7 < scale < 1.3 assert -10 < shear < 10 # Checking if RandomAffine can be printed as string t.__repr__() t = transforms.RandomAffine(10, resample=Image.BILINEAR) assert "Image.BILINEAR" in t.__repr__()
def test_affine(self): input_img = np.zeros((200, 200, 3), dtype=np.uint8) pts = [] cnt = [100, 100] for pt in [(80, 80), (100, 80), (100, 100)]: for i in range(-5, 5): for j in range(-5, 5): input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55] pts.append((pt[0] + i, pt[1] + j)) pts = list(set(pts)) with self.assertRaises(TypeError): F.affine(input_img, 10) pil_img = F.to_pil_image(input_img) def _to_3x3_inv(inv_result_matrix): result_matrix = np.zeros((3, 3)) result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3)) result_matrix[2, 2] = 1 return np.linalg.inv(result_matrix) def _test_transformation(a, t, s, sh): a_rad = math.radians(a) s_rad = math.radians(sh) # 1) Check transformation matrix: c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]], [0.0, 0.0, 1.0]]) c_inv_matrix = np.linalg.inv(c_matrix) t_matrix = np.array([[1.0, 0.0, t[0]], [0.0, 1.0, t[1]], [0.0, 0.0, 1.0]]) r_matrix = np.array( [[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0], [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0], [0.0, 0.0, 1.0]]) true_matrix = np.dot( t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix))) result_matrix = _to_3x3_inv( F._get_inverse_affine_matrix(center=cnt, angle=a, translate=t, scale=s, shear=sh)) assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10 # 2) Perform inverse mapping: true_result = np.zeros((200, 200, 3), dtype=np.uint8) inv_true_matrix = np.linalg.inv(true_matrix) for y in range(true_result.shape[0]): for x in range(true_result.shape[1]): res = np.dot(inv_true_matrix, [x, y, 1]) _x = int(res[0] + 0.5) _y = int(res[1] + 0.5) if 0 <= _x < input_img.shape[ 1] and 0 <= _y < input_img.shape[0]: true_result[y, x, :] = input_img[_y, _x, :] result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh) assert result.size == pil_img.size # Compute number of different pixels: np_result = np.array(result) n_diff_pixels = np.sum(np_result != true_result) / 3 # Accept 3 wrong pixels assert n_diff_pixels < 3, \ "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\ "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0])) # Test rotation a = 45 _test_transformation(a=a, t=(0, 0), s=1.0, sh=0.0) # Test translation t = [10, 15] _test_transformation(a=0.0, t=t, s=1.0, sh=0.0) # Test scale s = 1.2 _test_transformation(a=0.0, t=(0.0, 0.0), s=s, sh=0.0) # Test shear sh = 45.0 _test_transformation(a=0.0, t=(0.0, 0.0), s=1.0, sh=sh) # Test rotation, scale, translation, shear for a in range(-90, 90, 25): for t1 in range(-10, 10, 5): for s in [0.75, 0.98, 1.0, 1.1, 1.2]: for sh in range(-15, 15, 5): _test_transformation(a=a, t=(t1, t1), s=s, sh=sh)
def __getitem__(self, idx): # processing img img_name = self.img_names[idx] # image path imgA = cv2.imread(img_name) imgA = cv2.resize(imgA, (352, 352)) img_filename = os.path.basename(img_name).split('.')[0] # processing pseudo imgC = cv2.imread(self.pseudo_path + img_filename.split('.')[0] + '.png') imgC = cv2.resize(imgC, (352, 352)) # processing label imgB = cv2.imread(self.label_path + img_filename + '.png', 0) if not self.is_test: imgB = cv2.resize(imgB, (352, 352)) img_label = imgB # print(np.unique(img_label)) # make data augmentation here if self.is_data_augment: # convert to pil format so we can data augment them img_label = np.expand_dims(img_label, -1) pil_imgA = TF.to_pil_image(imgA) pil_img_label = TF.to_pil_image(img_label) pil_imgC = TF.to_pil_image(imgC) if random.random() > 0.5: # random cropping crop_size = int(min(imgA.shape[:2]) * 0.8) i, j, w, h = transforms.RandomCrop.get_params( pil_imgA, output_size=(crop_size, crop_size)) pil_imgA = TF.crop(pil_imgA, i, j, w, h) pil_img_label = TF.crop(pil_img_label, i, j, w, h) pil_imgC = TF.crop(pil_imgC, i, j, w, h) # -- data augmentation -- # Random horizontal flipping if random.random() > 0.5: pil_imgA = TF.hflip(pil_imgA) pil_img_label = TF.hflip(pil_img_label) pil_imgC = TF.hflip(pil_imgC) # Random vertical flipping if random.random() > 0.5: pil_imgA = TF.vflip(pil_imgA) pil_img_label = TF.vflip(pil_img_label) pil_imgC = TF.vflip(pil_imgC) # random cutout if self.random_cutout: if random.random() > 0.5: cutout_size = int(min(imgA.shape[:2]) * self.random_cutout) i, j, w, h = transforms.RandomCrop.get_params( pil_imgA, output_size=(random.randint(0, cutout_size), random.randint(0, cutout_size))) color_code = random.randint(0, 255) rect = Image.new('RGB', (w, h), (color_code, color_code, color_code)) pil_imgA.paste(rect, (i, j)) pil_imgA = pil_imgA.resize((352, 352)) pil_img_label = pil_img_label.resize((352, 352)) pil_imgC = pil_imgC.resize((352, 352)) # convert pil back to numpy imgA = np.array(pil_imgA) img_label = np.array(pil_img_label) imgC = np.array(pil_imgC) # only need to process the original dataset, tr and rp already processed if 'tr' not in img_filename and 'rp' not in img_filename: img_label[img_label < 19] = 0 img_label[(img_label <= 38) & (img_label >= 19)] = 1 img_label[img_label > 38] = 2 img_label_onehot = (np.arange( self.num_class) == img_label[..., None]).astype(float) img_label_onehot = img_label_onehot.transpose(2, 0, 1) # n_class * w * H # label smoothing if self.is_label_smooth: img_label_onehot[0] = img_label_onehot[ 0] * 0.9 # since there are so many labels on the first axis, we smooth it onehot_label = torch.FloatTensor(img_label_onehot) if self.transform: imgA = self.transform(imgA) imgC = self.transform(imgC) return imgA, imgC, onehot_label, img_name
def tensor2img(t, padding=16): std = torch.Tensor([0.229, 0.224, 0.225]).reshape(-1, 1, 1) mu = torch.Tensor([0.485, 0.456, 0.406]).reshape(-1, 1, 1) img = to_pil_image(t * std + mu if t.shape[0] > 1 else t) w, h = img.size return img.crop((padding, padding, w - padding, h - padding))
def run_eval(args): print('running evaluation...') if args.save_output: if os.path.exists(args.output_dir) is False: os.mkdir(args.output_dir) running_psnr = [] running_ssim = [] if args.dataset == 'rain100h': datadir = r'./datasets/Rain100H/val' val_dirs = glob.glob(os.path.join(datadir, 'norain-*.png')) elif args.dataset == 'rain100l': datadir = r'./datasets/Rain100L/val' val_dirs = glob.glob(os.path.join(datadir, '*x2.png')) elif args.dataset == 'rain800': datadir = r'./datasets/Rain800/val' val_dirs = glob.glob(os.path.join(datadir, '*.jpg')) elif args.dataset == 'rain800-real': datadir = r'./datasets/Rain800/test_nature' val_dirs = glob.glob(os.path.join(datadir, '*.jpg')) elif args.dataset == 'did-mdn-test1': datadir = r'./datasets/DID-MDN/val' val_dirs = glob.glob(os.path.join(datadir, '*.jpg')) elif args.dataset == 'did-mdn-test2': datadir = r'./datasets/DID-MDN/testing_fu' val_dirs = glob.glob(os.path.join(datadir, '*.jpg')) elif args.dataset == 'rain1400': datadir = r'./datasets/Rain1400/val/rainy_image' val_dirs = glob.glob(os.path.join(datadir, '*.jpg')) for idx in range(len(val_dirs)): this_dir = val_dirs[idx] if args.dataset == 'rain100h': gt = cv2.imread(this_dir, cv2.IMREAD_COLOR) gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB) img_mix = cv2.imread(val_dirs[idx].replace('norain', 'rain'), cv2.IMREAD_COLOR) img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB) elif args.dataset == 'rain100l': img_mix = cv2.imread(this_dir, cv2.IMREAD_COLOR) img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB) gt = cv2.imread(val_dirs[idx].replace('x2.png', '.png'), cv2.IMREAD_COLOR) gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB) elif args.dataset == 'rain800': img = cv2.imread(this_dir, cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) h, w, c = img.shape gt = img[:, 0:int(w / 2), :] img_mix = img[:, int(w / 2):, :] elif args.dataset == 'rain800-real': img = cv2.imread(this_dir, cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) h, w, c = img.shape gt = img[:, 0:int(w / 2), :] img_mix = img[:, int(w / 2):, :] elif args.dataset == 'did-mdn-test1': img = cv2.imread(this_dir, cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) h, w, c = img.shape img_mix = img[:, 0:int(w / 2), :] gt = img[:, int(w / 2):, :] elif args.dataset == 'did-mdn-test2': img = cv2.imread(this_dir, cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) h, w, c = img.shape gt = img[:, 0:int(w / 2), :] img_mix = img[:, int(w / 2):, :] elif args.dataset == 'rain1400': img_mix = cv2.imread(this_dir, cv2.IMREAD_COLOR) img_mix = cv2.cvtColor(img_mix, cv2.COLOR_BGR2RGB) suff = '_' + this_dir.split('_')[-1] this_gt_dir = this_dir.replace('rainy_image', 'ground_truth').replace( suff, '.jpg') gt = cv2.imread(this_gt_dir, cv2.IMREAD_COLOR) gt = cv2.cvtColor(gt, cv2.COLOR_BGR2RGB) # we recommend to use TF.resize since it was also used during trainig # You may also try cv2.resize, but it will produce slightly different results img_mix = TF.resize(TF.to_pil_image(img_mix), [args.in_size, args.in_size]) img_mix = TF.to_tensor(img_mix).unsqueeze(0) gt = TF.resize(TF.to_pil_image(gt), [args.in_size, args.in_size]) gt = TF.to_tensor(gt).unsqueeze(0) with torch.no_grad(): G_pred1 = net_G(img_mix.to(device))[:, 0:3, :, :] G_pred2 = net_G(img_mix.to(device))[:, 3:6, :, :] G_pred1 = np.array(G_pred1.cpu().detach()) G_pred1 = G_pred1[0, :].transpose([1, 2, 0]) G_pred2 = np.array(G_pred2.cpu().detach()) G_pred2 = G_pred2[0, :].transpose([1, 2, 0]) gt = np.array(gt.cpu().detach()) gt = gt[0, :].transpose([1, 2, 0]) img_mix = np.array(img_mix.cpu().detach()) img_mix = img_mix[0, :].transpose([1, 2, 0]) G_pred1[G_pred1 > 1] = 1 G_pred1[G_pred1 < 0] = 0 G_pred2[G_pred2 > 1] = 1 G_pred2[G_pred2 < 0] = 0 psnr = utils.cpt_rgb_psnr(G_pred1, gt, PIXEL_MAX=1.0) ssim = utils.cpt_rgb_ssim(G_pred1, gt) running_psnr.append(psnr) running_ssim.append(ssim) if args.save_output: fname = this_dir.split('/')[-1] plt.imsave( os.path.join(args.output_dir, fname[:-4] + '_input.png'), img_mix) plt.imsave(os.path.join(args.output_dir, fname[:-4] + '_gt1.png'), gt) plt.imsave( os.path.join(args.output_dir, fname[:-4] + '_output1.png'), G_pred1) plt.imsave( os.path.join(args.output_dir, fname[:-4] + '_output2.png'), G_pred2) print('id: %d, running psnr: %.4f, running ssim: %.4f' % (idx, np.mean(running_psnr), np.mean(running_ssim))) print('Dataset: %s, average psnr: %.4f, average ssim: %.4f' % (args.dataset, np.mean(running_psnr), np.mean(running_ssim)))
def __call__(self, image, target): original_w, original_h = image.size image = F.to_tensor(image) boxes = target['boxes'] labels = target['labels'] masks = target['masks'] # Keep choosing a minimum overlap until a successful crop is made min_overlap = 0.75 # Try up to 50 times for this choice of minimum overlap max_trials = 50 for _ in range(max_trials): min_scale = 0.75 scale_h = random.uniform(min_scale, 1) scale_w = random.uniform(min_scale, 1) new_h = int(scale_h * original_h) new_w = int(scale_w * original_w) # Aspect ratio has to be in [0.5, 2] aspect_ratio = new_h / new_w if not 0.5 < aspect_ratio < 2: continue # Crop coordinates left = random.randint(0, original_w - new_w) right = left + new_w top = random.randint(0, original_h - new_h) bottom = top + new_h crop = torch.LongTensor([left, top, right, bottom]) # Calculate Jaccard overlap between the crop and the bounding boxes overlap = find_jaccard_overlap(crop.unsqueeze(0), boxes) # (1, n_objects), n_objects is the no. of objects in this image overlap = overlap.squeeze(0) # (n_objects) # If not a single bounding box has a Jaccard overlap of greater than the minimum, try again if overlap.max().item() < min_overlap: continue # Crop image new_image = image[:, top:bottom, left:right] # (3, new_h, new_w) # Find boxes in cropped region boxes_in_crop = (boxes[:, 0] < right) * (boxes[:, 2] > left) * (boxes[:, 1] < bottom) * (boxes[:, 3] > top) if not boxes_in_crop.any(): continue # Discard bounding boxes that don't meet this criterion new_boxes = boxes[boxes_in_crop, :] new_masks = masks[boxes_in_crop, :] new_labels = labels[boxes_in_crop] # Calculate bounding boxes' new coordinates in the crop new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2]) # crop[:2] is [left, top] new_boxes[:, :2] -= crop[:2] new_boxes[:, 2:] = torch.min(new_boxes[:, 2:], crop[2:]) # crop[2:] is [right, bottom] new_boxes[:, 2:] -= crop[:2] # Crop masks new_masks = new_masks[:, top:bottom, left:right] new_target = {} new_target['boxes'] = new_boxes new_target['labels'] = new_labels new_target['masks'] = new_masks new_target['image_name'] = target['image_name'] new_image = F.to_pil_image(new_image) return new_image, new_target image = F.to_pil_image(image) return image, target
test_loader = DataLoader(test_dataset, batch_size=batch_s, shuffle=False, num_workers=0) model = Unet(3, 21) model = model.cuda() model.load_state_dict( torch.load(args.path + '/Unet_results' + '/training_results.pt')) inputs, labels, predctions = test_model(model, test_loader) inputs = inputs.cpu() labels = labels.cpu() predctions = predctions.cpu() fig = plt.figure(figsize=(10, 10)) plt.clf() columns = 3 rows = batch_s for i in range(0, columns * rows): if i % 3 == 0: fig.add_subplot(rows, columns, i + 1) plt.imshow(to_pil_image(re_normalize((inputs[i // 3])))) if i % 3 == 1: fig.add_subplot(rows, columns, i + 1) plt.imshow((((labels[i // 3])))) if i % 3 == 2: fig.add_subplot(rows, columns, i + 1) plt.imshow((((predctions[i // 3])))) plt.show() plt.savefig(args.path + '/Unet_results' + '/test_pics.png', bbox_inches='tight')
data_loader = DataLoader(dataset, batch_size=16, shuffle=True) class Network(nn.Module): def __init__(self): super().__init__() self.out = nn.Conv2d(3, 3, 1) def forward(self, x): return self.out(x) model = Network().eval() with torch.no_grad(): image = next(iter(data_loader)) image = image model.out.weight = torch.nn.Parameter(torch.tensor([[[[1]], [[0]], [[0]]], [[[0]], [[1]], [[0]]], [[[0]], [[0]], [[1]]]]).float(), requires_grad=False) model.out.bias = torch.nn.Parameter(torch.tensor([0, 0, 0]).float(), requires_grad=False) image_filled = model(image) # save_image(denormalize(image), 'image.png') # save_image(denormalize(image_filled), 'manual.png') to_pil_image(make_grid(denormalize(image))).show() to_pil_image(make_grid(denormalize(image_filled))).show()
Dict['TL_x']/W,Dict['TL_y']/H,Dict['TR_x']/W,Dict['TR_y']/H] norm_img= torch.FloatTensor(norm_img) kpt = torch.FloatTensor(kpt) return norm_img, kpt # Do some checking and visualization data = TrainData(ROOT_DIR + '/train.csv', ROOT_DIR + '/train_images') print(len(data)) # should be 3000 img, kpt = data[0] # get a sample print(img.size()) # should be [3, H, W] print(img.max()) # should be <= 1.0 print(kpt.size()) # should be [8] img = tf.to_pil_image(img) # convert tensor of shape (3, H, W) to PIL.Image vis = draw_kpts(img, kpt, c='orange') plt.imshow(vis) plt.show() #%% class ConvBlock(nn.Module): def __init__(self, cin, cout): super().__init__() # necessary self.conv = nn.Conv2d(cin, cout, (3, 3), padding=1) self.bn = nn.BatchNorm2d(cout) self.relu = nn.ReLU() def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x)
def PIL_ShowTensor3(tensor1, tensor2, tensor3): pil_img1 = tvF.to_pil_image(tensor1) pil_img2 = tvF.to_pil_image(tensor2) pil_img3 = tvF.to_pil_image(tensor3) PIL_ShowPILImage3(pil_img1, pil_img2, pil_img3)
def PIL_ShowTensor2(tensor1, tensor2): pil_img1 = tvF.to_pil_image(tensor1) pil_img2 = tvF.to_pil_image(tensor2) PIL_ShowPILImage2(pil_img1, pil_img2)
def PIL_ShowTensor(tensor): pil_img = tvF.to_pil_image(tensor) fig = plt.figure() plt.imshow(pil_img) plt.show()
with torch.no_grad(): for file in tqdm(source_files, desc='Generating images from source'): # load HR image input_img = Image.open(file) input_img = TF.to_tensor(input_img) # Resize HR image to clean it up and make sure it can be resized again resize2_img = utils.imresize(input_img, 1.0 / opt.cleanup_factor, True) _, w, h = resize2_img.size() w = w - w % opt.upscale_factor h = h - h % opt.upscale_factor resize2_cut_img = resize2_img[:, :w, :h] # Save resize2_cut_img as HR image for TDSR path = os.path.join(tdsr_hr_dir, os.path.basename(file)) resize2_cut_img = TF.to_pil_image(resize2_cut_img) resize2_cut_img.save(path, 'PNG') # Generate resize3_cut_img and apply model kernel_path = kernel_paths[np.random.randint(0, kernel_num)] mat = loadmat(kernel_path) k = np.array([mat['Kernel']]).squeeze() resize3_cut_img = imresize(np.array(resize2_cut_img), scale_factor=1.0 / opt.upscale_factor, kernel=k) # Save resize3_cut_img as LR image for TDSR path = os.path.join(tdsr_lr_dir, os.path.basename(file)) TF.to_pil_image(resize3_cut_img).save(path, 'PNG') for file in tqdm(target_files, desc='Generating images from target'):
def replace_original(self, img: torch.Tensor, mask: torch.Tensor, inpainted_resized: torch.Tensor) -> torch.Tensor: inpainted = F.to_pil_image(inpainted_resized).resize( (img.shape[-1], img.shape[-2])) return torch.where(mask == 1, F.to_tensor(inpainted), img)
def __getitem__(self, idx): # pylint: disable=too-many-locals # Sample a random transformation rotation = np.random.uniform(-self._max_rotation_jitter, self._max_rotation_jitter) scale = np.exp( np.random.uniform(-self._max_scale_jitter, self._max_scale_jitter)) shear = np.random.uniform(-self._max_shear_jitter, self._max_shear_jitter, size=2) # Compute the "extended" patch size. This is the size of the patch that # we will first transform and then center crop to the final size. extpatch_w, extpatch_h = self._compute_extended_patch_size( w=self._patch_w, h=self._patch_h, rotation=rotation, scale=scale, shear=shear, ) # The slide may not be large enough for the extended patch size. In # this case, we will downscale the target patch size until the extended # patch size fits. adjmul = min(1.0, self._slide.W / extpatch_w, self._slide.H / extpatch_h) extpatch_w = min(int(np.ceil(extpatch_w * adjmul)), self._slide.W) extpatch_h = min(int(np.ceil(extpatch_h * adjmul)), self._slide.H) patch_w = int(self._patch_w * adjmul) patch_h = int(self._patch_h * adjmul) # Extract the extended patch by sampling uniformly from the size of the # slide x, y = [ np.random.randint(a - b + 1) for a, b in zip((self._slide.W, self._slide.H), (extpatch_w, extpatch_h)) ] image = self._slide.image[y:y + extpatch_h, x:x + extpatch_w] image = (255 * (image + 1) / 2).astype(np.uint8) image = to_pil_image(image) label = to_pil_image(self._slide.label[y:y + extpatch_h, x:x + extpatch_w]) # Apply augmentations output_size = (max(extpatch_w, patch_w), max(extpatch_h, patch_h)) transformation = _get_inverse_affine_matrix( center=(image.size[0] * 0.5, image.size[1] * 0.5), angle=rotation, translate=[(a - b) / 2 for a, b in zip(output_size, image.size)], scale=scale, shear=shear, ) image = self.image_augmentation(image) image = np.array( image.transform( output_size, Image.AFFINE, transformation, resample=Image.BILINEAR, )) image = center_crop(image, (patch_h, patch_w)) label = np.array( label.transform( output_size, Image.AFFINE, transformation, resample=Image.NEAREST, )) label = center_crop(label, (patch_h, patch_w)) if np.random.rand() < 0.5: image = np.flip(image, 0).copy() label = np.flip(label, 0).copy() # Convert image to the correct data format (float32 in [-1, 1] and in # CHW order) image = 2 * image.astype(np.float32) / 255 - 1 image = image.transpose(2, 0, 1) return self._slide.prepare_data(image, label)
def ycbcr_to_rgb(image: torch.Tensor) -> torch.Tensor: ycbcr_image = F.to_pil_image(image, mode='YCbCr') rgb_image = ycbcr_image.convert('RGB') rgb_tensor = F.to_tensor(rgb_image) return rgb_tensor
def tensor2img(self, ts): img = np.asarray(F.to_pil_image(ts)) return img
loss_func = roi_loss_func(roi_mask=None, towards_target=True) gen_images = [] fig, axs = plt.subplots(len(alphas), len(decays), squeeze=False, figsize=(len(decays) * 10, len(alphas) * 5)) for i, alpha in tqdm(enumerate(alphas)): for j, decay in enumerate(decays): gen_image, _, loss, losses = optimize(generator, encoder, target, loss_func, alpha=alpha, decay=decay) gen_images.append(to_pil_image(gen_image)) axs[i, j].plot(range(len(losses)), losses) axs[i, j].set_title( 'alpha: {:.3g}, decay: {:.3g}, min_loss: {:.0f}'.format( alpha, decay, loss)) axs[i, j].set_xlabel('Iteration') axs[i, j].set_ylabel('Loss') def make_grid(imgs, n_rows, pad): assert len(imgs) > 0 n_cols = math.ceil(len(imgs) / n_rows) w, h = imgs[0].width, imgs[0].height grid = Image.new(imgs[0].mode, (w * n_cols + pad * (n_cols - 1), h * n_rows + pad *
def eval_OSVOSNetNet(): # Paths cfg = configparser.ConfigParser() cfg.read('settings.conf') if sys.platform == 'darwin': cfg_dataset = 'dataset_mac' elif sys.platform == 'linux': cfg_dataset = 'dataset_ubuntu' # Hyper parameters parser = argparse.ArgumentParser(description='PyTorch OSVOSNet Testing') parser.add_argument('-c', '--checkpoint', default=None, type=str, metavar='PATH', help='Path to latest checkpoint (default: none).') parser.add_argument('-v', '--video-name', default=None, type=str, help='Test video name (default: none).') parser.add_argument( '-m', '--model-name', default='OSVOSNet', type=str, help= 'Model name for the ouput segmentation, it will create a subfolder under the out_folder.' ) parser.add_argument('-o', '--out-folder', default=os.path.join(cfg['paths'][cfg_dataset], 'results/'), type=str, metavar='PATH', help='Folder for the output segmentations.') parser.add_argument('-b', '--benchmark', action='store_true', help='Evaluate the video with groundtruth.') parser.add_argument('--sample', action='store_true', help='The video sequence has been sampled.') args = parser.parse_args() print('Args:', args) if args.checkpoint is None: raise ValueError('Must input checkpoint path.') if args.video_name is None: raise ValueError('Must input video name.') water_thres = 0.5 device = torch.device('cpu') if torch.cuda.is_available(): device = torch.device('cuda') # Dataset dataset_args = {} if torch.cuda.is_available(): dataset_args = { 'num_workers': int(cfg['params_OSVOS']['num_workers']), 'pin_memory': bool(cfg['params_OSVOS']['pin_memory']) } dataset = WaterDataset_RGB(mode='eval', dataset_path=cfg['paths'][cfg_dataset], test_case=args.video_name, eval_size=(int(cfg['params_OSVOS']['eval_w']), int(cfg['params_OSVOS']['eval_h']))) eval_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1, shuffle=False, **dataset_args) # Model OSVOS_net = OSVOSNet() # Load pretrained model if os.path.isfile(args.checkpoint): print('Load checkpoint \'{}\''.format(args.checkpoint)) if torch.cuda.is_available(): checkpoint = torch.load(args.checkpoint) else: checkpoint = torch.load(args.checkpoint, map_location='cpu') args.start_epoch = checkpoint['epoch'] + 1 OSVOS_net.load_state_dict(checkpoint['model']) print('Loaded checkpoint \'{}\' (epoch {})'.format( args.checkpoint, checkpoint['epoch'])) else: raise ValueError('No checkpoint found at \'{}\''.format( args.checkpoint)) # Set ouput path out_path = os.path.join(args.out_folder, args.model_name + '_segs', args.video_name) if not os.path.exists(out_path): os.makedirs(out_path) if args.sample: out_full_path = out_path + '_full' if not os.path.exists(out_full_path): os.makedirs(out_full_path) # Start testing OSVOS_net.to(device).eval() running_time = AverageMeter() running_endtime = time.time() # First frame annotation pre_frame_mask = dataset.get_first_frame_label() eval_size = pre_frame_mask.shape[-2:] first_frame_seg = TF.to_pil_image(pre_frame_mask) first_frame_seg.save(os.path.join(out_path, '0.png')) if args.sample: first_frame_seg.save(os.path.join(out_full_path, '0.png')) pre_frame_mask = pre_frame_mask.unsqueeze(0).to(device) if args.benchmark: gt_folder = os.path.join(cfg['paths'][cfg_dataset], 'test_annots', args.video_name) gt_list = os.listdir(gt_folder) gt_list.sort(key=lambda x: (len(x), x)) gt_list.pop(0) avg_iou = 0 with torch.no_grad(): for i, sample in enumerate(tqdm(eval_loader)): img = sample['img'].to(device) outputs = OSVOS_net(img) output = outputs[-1].detach() output = 1 / (1 + torch.exp(-output)) # seg_raw = TF.to_pil_image(output.squeeze(0).cpu()) # seg_raw.save(os.path.join(out_path, 'raw_%d.png' % (i + 1))) zero_tensor = torch.zeros(output.shape).to(device) one_tensor = torch.ones(output.shape).to(device) seg_tf = torch.where(output > water_thres, one_tensor, zero_tensor) seg = TF.to_pil_image(seg_tf.squeeze(0).cpu()) if args.sample: seg.save(os.path.join(out_full_path, f'{i + 1}.png')) if i + 1 in [1, 50, 100, 150, 199]: seg.save(os.path.join(out_path, f'{i + 1}.png')) else: seg.save(os.path.join(out_path, f'{i + 1}.png')) running_time.update(time.time() - running_endtime) running_endtime = time.time() # if args.benchmark: # gt_seg = load_image_in_PIL(os.path.join(gt_folder, gt_list[i])).convert('L') # gt_tf = TF.to_tensor(gt_seg).to(device).type(torch.int) # iou = iou_tensor(seg_tf.squeeze(0).type(torch.int), gt_tf) # avg_iou += iou.item() # print('iou:', iou.item()) # print('Segment: [{0:4}/{1:4}]\t' # 'Time: {running_time.val:.3f}s ({running_time.sum:.3f}s)\t'.format( # i + 1, len(eval_loader), running_time=running_time)) # if args.benchmark: # print('total_iou:', avg_iou) # avg_iou /= len(eval_loader) # print('avg_iou:', avg_iou, 'frame_num:', len(eval_loader)) if args.sample: mask_folder = args.video_name + '_full' else: mask_folder = args.video_name run_cvt_images_to_overlays(args.video_name, mask_folder, cfg['paths'][cfg_dataset], args.model_name, eval_size)
fill_value=self.input_length, dtype=torch.long) target_length = torch.full(size=(1, ), fill_value=self.label_length, dtype=torch.long) return image, target, input_length, target_length # 测试数据集输出 dataset = CaptchaDataset(characters, width, height, n_input_length, n_len, TRAIN_DATASET_PATH) print('dataset.length', dataset.length, 'dataset.label_length', dataset.label_length) image, target, input_length, label_length = dataset[0] print(''.join([characters[x] for x in target]), input_length, label_length) to_pil_image(image) batch_size = 128 # trans_set的length调一下 train_set = CaptchaDataset(characters=characters, width=width, height=height, input_length=n_input_length, label_length=n_len, folder=TRAIN_DATASET_PATH) # train_set = CaptchaDataset(characters = characters,length=100*batch_size, width=width, height=height, input_length=n_input_length, label_length=n_len,folder=TRAIN_DATASET_PATH) # valid_set = CaptchaDataset(characters, 100 * batch_size, width, height, n_input_length, n_len) # shuffle=True,drop_last=True train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=0,
new_image = photometric_distort(new_image) # Convert PIL image to Torch tensor new_image = FT.to_tensor(new_image) # Expand image (zoom out) with a 50% chance - helpful for training detection of small objects # Fill surrounding space with the mean of ImageNet data that our base VGG was trained on if random.random() < 0.5: new_image, new_boxes = expand(new_image, boxes, filler=mean) # Randomly crop image (zoom in) new_image, new_boxes, new_labels, new_difficulties = random_crop(new_image, new_boxes, new_labels, new_difficulties) # Convert Torch tensor to PIL image new_image = FT.to_pil_image(new_image) # Flip image with a 50% chance if random.random() < 0.5: new_image, new_boxes = flip(new_image, new_boxes) # Resize image to (300, 300) - this also converts absolute boundary coordinates to their fractional form new_image, new_boxes = resize(new_image, new_boxes, dims=(300, 300)) # Convert PIL image to Torch tensor new_image = FT.to_tensor(new_image) # Normalize by mean and standard deviation of ImageNet data that our base VGG was trained on new_image = FT.normalize(new_image, mean=mean, std=std) return new_image, new_boxes, new_labels, new_difficulties
def make_pil_images(*args, **kwargs): for image in make_vanilla_tensor_images(*args, **kwargs): yield to_pil_image(image)
def save_test_preds(dir_, preds): dir_ = Path(dir_) for i, im in enumerate(preds): im = transforms_f.to_pil_image(im.data, mode="L") im.save(dir_ / f"{i}.jpg")
def _postprocess_img(self, img): x = self._post_proc_op(img=img) x = torch.clamp(x, 0, 1) # NST might kick values into illegal areas return FT.to_pil_image(x)
def unconvert(self, tensor): return tr.to_pil_image( denormalize_pixels(tensor.clone(), self.mean, self.stddev), 'RGB')
def __getitem__(self, idx): imgA = self.imgA[idx] imgB = self.imgB[idx] imgC = self.imgC[idx] if not self.is_test: imgB = cv2.resize(imgB, (352, 352)) img_label = imgB # print(np.unique(img_label)) # make data augmentation here if self.is_data_augment: # convert to pil format so we can data augment them img_label = np.expand_dims(img_label, -1) pil_imgA = TF.to_pil_image(imgA) pil_img_label = TF.to_pil_image(img_label) pil_imgC = TF.to_pil_image(imgC) if random.random() > 0.5: # random cropping crop_size = int(min(imgA.shape[:2]) * 0.8) i, j, w, h = transforms.RandomCrop.get_params( pil_imgA, output_size=(crop_size, crop_size)) pil_imgA = TF.crop(pil_imgA, i, j, w, h) pil_img_label = TF.crop(pil_img_label, i, j, w, h) pil_imgC = TF.crop(pil_imgC, i, j, w, h) # -- data augmentation -- # Random horizontal flipping if random.random() > 0.5: pil_imgA = TF.hflip(pil_imgA) pil_img_label = TF.hflip(pil_img_label) pil_imgC = TF.hflip(pil_imgC) # Random vertical flipping if random.random() > 0.5: pil_imgA = TF.vflip(pil_imgA) pil_img_label = TF.vflip(pil_img_label) pil_imgC = TF.vflip(pil_imgC) # random cutout if self.random_cutout: if random.random() > 0.5: cutout_size = int(min(imgA.shape[:2]) * self.random_cutout) i, j, w, h = transforms.RandomCrop.get_params( pil_imgA, output_size=(random.randint(0, cutout_size), random.randint(0, cutout_size))) color_code = random.randint(0, 255) rect = Image.new('RGB', (w, h), (color_code, color_code, color_code)) pil_imgA.paste(rect, (i, j)) pil_imgA = pil_imgA.resize((352, 352)) pil_img_label = pil_img_label.resize((352, 352)) pil_imgC = pil_imgC.resize((352, 352)) # convert pil back to numpy imgA = np.array(pil_imgA) img_label = np.array(pil_img_label) imgC = np.array(pil_imgC) img_label_onehot = (np.arange( self.num_class) == img_label[..., None]).astype(float) img_label_onehot = img_label_onehot.transpose(2, 0, 1) # n_class * w * H # label smoothing if self.is_label_smooth: img_label_onehot[0] = img_label_onehot[ 0] * 0.9 # since there are so many labels on the first axis, we smooth it onehot_label = torch.FloatTensor(img_label_onehot) if self.transform: imgA = self.transform(imgA) imgC = self.transform(imgC) # return imgA, imgC, onehot_label, img_name return imgA, imgC, onehot_label, []
def predict(ckpt_root, selected_defects, aug_params, model_kwargs, name_dict): # determine training device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load images test_loader = get_test_dataloader('sensitivity/test_imgs', transforms.ToTensor(), test_batch_size=1) # prepare model model = restore_model(ckpt_root, 100, selected_defects, 'shufflenet', 'eval', device, **model_kwargs) results = dict() for i, data in enumerate(test_loader): # load image images = data['image'] image_ids = data['img_id'] assert images.size()[0] == 1 image_pil = TF.to_pil_image(images[0]) image_id = image_ids[0] for name in augs: if name not in results: results[name] = [[] for _ in range(len(augs[name]['params']))] for k, param in enumerate(augs[name]['params']): print('image #%02d, %s, %.3f' % (i, name, param)) # apply augmentation to the image image = augs[name]['method'](image_pil, param) # convert to tensor and resize image = TF.to_tensor(image) image = image.unsqueeze_(0).to(device) image = F.interpolate(image, size=(224, 224), mode='area') # save the augmented image save_dir = 'sensitivity/%s' % name img_savepath = '%s/%s_%.3f.png' % ( save_dir, image_id.split('/')[-1].split('.')[0], param) if not os.path.isfile(img_savepath): makedirs_if_not_exists(save_dir) image_ = TF.to_pil_image(image.data.cpu()[0]) image_.save(img_savepath) # gather model prediction outputs = model(image) if model_kwargs['use_softmax_classifier']: outputs = torch.cat([ outputs[0][:2].flatten(), outputs[1].flatten(), outputs[0][2:].flatten() ], dim=0) outputs = score_convert_softmax_cls( outputs, selected_defects, 2) else: outputs = score_convert_reg(outputs, selected_defects, 2) outputs = outputs.data.cpu().numpy() print(outputs) idx = DEFECT_NAMES.index(name_dict[name]) idx = selected_defects.index(idx) results[name][k].append(outputs[idx]) # plot graph for name in augs: r = np.array(results[name]) r = np.mean(r, axis=1) fig, ax = plt.subplots() ax.plot(augs[name]['params'], r) ax.set(xlabel='factor', ylabel='score', title=name) ax.grid() fig.savefig('sensitivity/%s.png' % name) plt.clf()
def test_affine(self): input_img = np.zeros((200, 200, 3), dtype=np.uint8) pts = [] cnt = [100, 100] for pt in [(80, 80), (100, 80), (100, 100)]: for i in range(-5, 5): for j in range(-5, 5): input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55] pts.append((pt[0] + i, pt[1] + j)) pts = list(set(pts)) with self.assertRaises(TypeError): F.affine(input_img, 10) pil_img = F.to_pil_image(input_img) def _to_3x3_inv(inv_result_matrix): result_matrix = np.zeros((3, 3)) result_matrix[:2, :] = np.array(inv_result_matrix).reshape((2, 3)) result_matrix[2, 2] = 1 return np.linalg.inv(result_matrix) def _test_transformation(a, t, s, sh): a_rad = math.radians(a) s_rad = math.radians(sh) # 1) Check transformation matrix: c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]], [0.0, 0.0, 1.0]]) c_inv_matrix = np.linalg.inv(c_matrix) t_matrix = np.array([[1.0, 0.0, t[0]], [0.0, 1.0, t[1]], [0.0, 0.0, 1.0]]) r_matrix = np.array([[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0], [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0], [0.0, 0.0, 1.0]]) true_matrix = np.dot(t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix))) result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a, translate=t, scale=s, shear=sh)) assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10 # 2) Perform inverse mapping: true_result = np.zeros((200, 200, 3), dtype=np.uint8) inv_true_matrix = np.linalg.inv(true_matrix) for y in range(true_result.shape[0]): for x in range(true_result.shape[1]): res = np.dot(inv_true_matrix, [x, y, 1]) _x = int(res[0] + 0.5) _y = int(res[1] + 0.5) if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]: true_result[y, x, :] = input_img[_y, _x, :] result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh) assert result.size == pil_img.size # Compute number of different pixels: np_result = np.array(result) n_diff_pixels = np.sum(np_result != true_result) / 3 # Accept 3 wrong pixels assert n_diff_pixels < 3, \ "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\ "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0])) # Test rotation a = 45 _test_transformation(a=a, t=(0, 0), s=1.0, sh=0.0) # Test translation t = [10, 15] _test_transformation(a=0.0, t=t, s=1.0, sh=0.0) # Test scale s = 1.2 _test_transformation(a=0.0, t=(0.0, 0.0), s=s, sh=0.0) # Test shear sh = 45.0 _test_transformation(a=0.0, t=(0.0, 0.0), s=1.0, sh=sh) # Test rotation, scale, translation, shear for a in range(-90, 90, 25): for t1 in range(-10, 10, 5): for s in [0.75, 0.98, 1.0, 1.1, 1.2]: for sh in range(-15, 15, 5): _test_transformation(a=a, t=(t1, t1), s=s, sh=sh)
def tensor_to_pil(tensor): if len(tensor.shape) == 4: return F.to_pil_image(tensor[0]) else: return F.to_pil_image(tensor)
def __getitem__(self, idx): """Returns a pair of images with the given identifier. This is lazy loading of data into memory. Only those image pairs needed for the current batch are loaded. :param idx: image pair identifier :returns: dictionary containing input and output images and their identifier :rtype: dictionary """ while True: if (self.is_inference) or (self.is_valid): input_img = util.ImageProcessing.load_image( self.data_dict[idx]['input_img'], normaliser=self.normaliser) output_img = util.ImageProcessing.load_image( self.data_dict[idx]['output_img'], normaliser=self.normaliser) if self.normaliser==1: input_img = input_img.astype(np.uint8) output_img = output_img.astype(np.uint8) input_img = TF.to_pil_image(input_img) input_img = TF.to_tensor(input_img) output_img = TF.to_pil_image(output_img) output_img = TF.to_tensor(output_img) if input_img.shape[1]==output_img.shape[2]: output_img=output_img.permute(0,2,1) return {'input_img': input_img, 'output_img': output_img, 'name': self.data_dict[idx]['input_img'].split("/")[-1]} elif idx in self.data_dict: output_img = util.ImageProcessing.load_image( self.data_dict[idx]['output_img'], normaliser=self.normaliser) input_img = util.ImageProcessing.load_image( self.data_dict[idx]['input_img'], normaliser=self.normaliser) if self.normaliser==1: input_img = input_img.astype(np.uint8) output_img = output_img.astype(np.uint8) input_img = TF.to_pil_image(input_img) output_img = TF.to_pil_image(output_img) if not self.is_valid: if random.random()>0.5: # Random horizontal flipping if random.random() > 0.5: input_img = TF.hflip(input_img) output_img = TF.hflip(output_img) # Random vertical flipping if random.random() > 0.5: input_img = TF.vflip(input_img) output_img = TF.vflip(output_img) # Transform to tensor #print(output_img.shape) #plt.imsave("./"+self.data_dict[idx]['input_img'].split("/")[-1]+".png", output_img,format='png') input_img = TF.to_tensor(input_img) output_img = TF.to_tensor(output_img) return {'input_img': input_img, 'output_img': output_img, 'name': self.data_dict[idx]['input_img'].split("/")[-1]}
print(f"Processing ({i + 1}/{len(files)}): {filepath}") dirname = path.dirname(filepath) filename = path.basename(filepath) basename, ext = path.splitext(filename) img = I.open(filepath) tensor = TF.to_tensor(img) C, H, W = tensor.size() print(f"Image size is {H} x {W}") tensor = tensor.view(C, H * W).permute(1, 0) dists = pairwise_distance(centroids, tensor) nearest = 1 - F.softmax(dists, dim=0) probmap = nearest.view(NC, H, W) probmap_save = path.join(dirname, "prob", basename) + ".pth" prepare_folder(probmap_save) torch.save(probmap, probmap_save) print(f"probablity_map: {probmap_save}") maxclass = probmap.argmax(dim=0) refined_colormap = centroids[maxclass, :].permute(2, 0, 1) colormap_save = path.join(dirname, "refined", basename) + ".png" prepare_folder(colormap_save) TF.to_pil_image(refined_colormap).save(colormap_save) print() # print(refined_colormap.size())