def readRGBData(self, folderName): # path = os.path.join(self.rootDir,folderName) # img = torch.FloatTensor(3, self.nfra,self.numpixels) path = os.path.join(self.rootDir, folderName) frames = torch.FloatTensor(3, self.nfra, self.numpixels) # offset = len([name for name in sorted(os.listdir(path)) if ".jpeg" in name]); # offset = random.randint(1, offset - self.nfra - 1) offset = 1 # print("reading " + str(self.nfra) + " data") ### NEED N + 1 frames when starting with raw frames frames = torch.zeros(3, self.nfra + 1, 240, 320) i = 0 # frames = torch.zeros(self.nfra, 3, 240, 320) for framenum in range(offset, 2 * (self.nfra + offset), 2): # print("reading from frame " + str('%04d' % framenum) + "...") # print("For rfolder: ", folderName) img_path1 = os.path.join( path, "image-" + str('%04d' % framenum) + '.jpeg') image1 = Image.open(img_path1) image1 = ToTensor()(image1) image1 = image1.float() # print(image1.shape) # image1 = image1.view(-1, 240*320) # print(image1.shape) frames[:, i, :] = image1 i = i + 1 # print(frames.shape) return frames
def predict(self, filepath): final_bboxes = [] orig_img = Image.open(filepath) result_img = deepcopy(orig_img) img_w, img_h = orig_img.size resized_tensor = ToTensor()(orig_img.resize( size=(416, 416), resample=PIL.Image.BILINEAR)).float() inp = resized_tensor[[2, 1, 0], :].float() inp = resized_tensor.float() if len(inp.shape) == 3: inp = resized_tensor.unsqueeze(0) pred = self.model(inp) pred = self.post_process(pred) result_bboxes = self.get_bboxes(predictions=pred, S=[13, 26, 52], B=3, device=inp.device) for bbox in result_bboxes: c, conf, bb = self.output_labels[int(bbox[0])], bbox[1], bbox[-4:] xcenter, ycenter, bb_w, bb_h = bb xmin = int(np.round((xcenter - bb_w / 2) * img_w)) + 1 ymin = int(np.round((ycenter - bb_h / 2) * img_h)) + 1 xmax = int(np.round((xcenter + bb_w / 2) * img_w)) + 1 ymax = int(np.round((ycenter + bb_h / 2) * img_h)) + 1 self.draw_bb(result_img, c + " " + str(conf)[:5], (xmin, ymin, xmax, ymax)) final_bboxes.append([c, conf, (xmin, ymin, xmax, ymax)]) return result_img, final_bboxes
def readData(self, folderName): path = os.path.join(self.rootDirPGOF,folderName) OF = torch.FloatTensor(2,self.nfra,self.numpixels) for framenum in range(self.nfra): flow = np.load(os.path.join(path,str(framenum)+'.npy')) flow = np.transpose(flow,(2,0,1)) OF[:,framenum] = torch.from_numpy(flow.reshape(2,self.numpixels)).type(torch.FloatTensor) path = os.path.join(self.rootDirRGB,folderName) frames = torch.FloatTensor(3,self.nfra,self.numpixels) ### NEED N + 1 frames when starting with raw frames frames = torch.zeros(3, self.nfra, 240*320); # frames = torch.zeros(self.nfra, 3, 240, 320) for framenum in range(1, self.nfra): # print("reading from frame " + str(framenum) + "...") img_path1 = os.path.join(path, "image-" + str('%04d' % framenum) + '.jpeg') image1 = Image.open(img_path1) image1 = ToTensor()(image1) image1 = image1.float() # print(image1.shape) image1 = image1.view(-1, 240*320) # print(image1.shape) frames[:,framenum,:] = image1 return OF, frames
def main_worker(args, use_gpu=True): device = torch.device('cuda') if use_gpu else torch.device('cpu') # Model and version net = importlib.import_module('model.' + args.model) model = net.InpaintGenerator(args).cuda() model.load_state_dict(torch.load(args.pre_train, map_location='cuda')) model.eval() # prepare dataset image_paths = [] with open(os.path.join(args.dir_image, args.data_test, 'val.txt')) as f: images_list = f.read().splitlines() for path in images_list: image_paths.append(os.path.join(args.dir_image, args.data_test, path)) # image_paths.sort() mask_paths = glob(os.path.join(args.dir_mask, args.mask_type, '*.png')) os.makedirs(args.outputs, exist_ok=True) trans = transforms.Compose([ transforms.Resize((args.image_size, args.image_size), interpolation=transforms.InterpolationMode.NEAREST), ]) j = 0 # iteration through datasets for ipath, mpath in tqdm(zip(image_paths, mask_paths)): if j >= args.num_test: exit() image = ToTensor()(Image.open(ipath).convert('RGB')) image = trans(image) image = (image * 2.0 - 1.0).unsqueeze(0) mask = ToTensor()(Image.open(mpath).convert('L')) mask = trans(mask) mask = mask.unsqueeze(0) image, mask = image.cuda(), mask.cuda() image_masked = image * (1 - mask.float()) + mask with torch.no_grad(): pred_img = model(image_masked, mask) comp_imgs = (1 - mask) * image + mask * pred_img visualize_test(j, image, image * (1 - mask), pred_img.detach(), comp_imgs.detach()) j += 1
def readRGBData(self, folderName): path = os.path.join(self.rootDir, folderName) print(path) bunch_of_frames = [] for index in range(0, self.perVideo): frames = torch.FloatTensor(3, self.nfra, self.numpixels) end = len( [name for name in sorted(os.listdir(path)) if ".jpg" in name]) offset = [ name for name in sorted(os.listdir(path)) if ".jpg" in name ] # print(offset[0]) # print("offset: ", str(int(offset[0].replace('.jpg','')))) offset = int(offset[0].replace('.jpg', '')) # input() offset = random.randint(offset, (end - self.nfra) - 2) # offset = 1; ### NEED N + 1 frames when starting with raw frames frames = torch.zeros(3, self.nfra + 1, 240, 320) i = 0 for framenum in range(offset, 2 * (self.nfra) + offset, 2): # print("reading from frame " + str('%04d' % framenum) + "...") # print("For rfolder: ", folderName) img_path1 = os.path.join(path, str('%07d' % framenum) + '.jpg') image1 = Image.open(img_path1) image1 = ToTensor()(image1) image1 = image1.float() # print(image1.shape) data = np.transpose(image1.numpy(), (1, 2, 0)) # put height and width in front data = skimage.transform.resize(data, (240, 320)) image1 = torch.from_numpy(np.transpose(data, (2, 0, 1))) # move back # print(image1.shape) frames[:, i, :] = image1 # .view(3, 480 * 640) i = i + 1 bunch_of_frames = bunch_of_frames + [frames] return bunch_of_frames
def expand_multires(img_tensor, keep=3): # get the original image dimensions _, height, width = img_tensor.size() # convert (CxHxW) to (HxWxC) image = img_tensor.permute(1, 2, 0) # construct the image pyramid pyramid = pyramid_gaussian(image, multichannel=True) multi_res = [] for idx, raw_np_img in enumerate(pyramid): if idx >= keep: break raw_tensor = ToTensor()(raw_np_img) pil_img = ToPILImage()(raw_tensor.float()) # pil_img = Resize((height, width))(pil_img) res_tensor = ToTensor()(pil_img) multi_res.append(res_tensor) # multires: (channel, height, width, resolution_dimension) # return torch.stack(multi_res, dim=3) return multi_res
def main_worker(args, use_gpu=True): device = torch.device('cuda') if use_gpu else torch.device('cpu') # Model and version net = importlib.import_module('model.'+args.model) model = net.InpaintGenerator(args).cuda() model.load_state_dict(torch.load(args.pre_train, map_location='cuda')) model.eval() # prepare dataset image_paths = [] for ext in ['.jpg', '.png']: image_paths.extend(glob(os.path.join(args.dir_image, '*'+ext))) image_paths.sort() mask_paths = sorted(glob(os.path.join(args.dir_mask, '*.png'))) os.makedirs(args.outputs, exist_ok=True) # iteration through datasets for ipath, mpath in zip(image_paths, mask_paths): image = ToTensor()(Image.open(ipath).convert('RGB')) image = (image * 2.0 - 1.0).unsqueeze(0) mask = ToTensor()(Image.open(mpath).convert('L')) mask = mask.unsqueeze(0) image, mask = image.cuda(), mask.cuda() image_masked = image * (1 - mask.float()) + mask with torch.no_grad(): pred_img = model(image_masked, mask) comp_imgs = (1 - mask) * image + mask * pred_img image_name = os.path.basename(ipath).split('.')[0] postprocess(image_masked[0]).save(os.path.join(args.outputs, f'{image_name}_masked.png')) postprocess(pred_img[0]).save(os.path.join(args.outputs, f'{image_name}_pred.png')) postprocess(comp_imgs[0]).save(os.path.join(args.outputs, f'{image_name}_comp.png')) print(f'saving to {os.path.join(args.outputs, image_name)}')
elif opt.model_type == 'rbf' or opt.model_type == 'nerf': model = modules.SingleBVPNet(type='relu', mode=opt.model_type, out_features=img_dataset.img_channels, sidelength=image_resolution, downsample=opt.downsample) else: raise NotImplementedError model.cuda() root_path = os.path.join(opt.logging_root, opt.experiment_name) if opt.mask_path: mask = Image.open(opt.mask_path) mask = ToTensor()(mask) mask = mask.float().cuda() percentage = torch.sum(mask).cpu().numpy() / np.prod(mask.shape) print("mask sparsity %f" % (percentage)) else: mask = torch.rand(image_resolution) < opt.sparsity mask = mask.float().cuda() # Define the loss if opt.prior is None: loss_fn = partial(loss_functions.image_mse, mask.view(-1, 1)) elif opt.prior == 'TV': loss_fn = partial(loss_functions.image_mse_TV_prior, mask.view(-1, 1), opt.k1, model) elif opt.prior == 'FH': loss_fn = partial(loss_functions.image_mse_FH_prior, mask.view(-1, 1), opt.k1, model)
def main_worker(args, use_gpu=True): args.dir_image = '/home/rudolfs/Desktop/camera-removal/pano' args.dir_mask = '/home/rudolfs/Desktop/camera-removal/pano' args.dataset = '/home/rudolfs/Desktop/camera-removal/pano' args.image_size = 512 args.pre_train = '/home/rudolfs/Desktop/AOT-GAN-for-Inpainting/places2/G0000000.pt' args.outputs = '/home/rudolfs/Desktop/AOT-GAN-for-Inpainting/aot-pano-pad-0p5' device = torch.device('cuda') if use_gpu else torch.device('cpu') # Model and version net = importlib.import_module('model.' + args.model) model = net.InpaintGenerator(args) # .cuda() model.load_state_dict(torch.load(args.pre_train, map_location=device)) model.eval() paths_image, paths_mask = read_paths(args) if not os.path.exists(args.outputs): os.makedirs(args.outputs) # if not os.path.exists(args.output_dir): # os.makedirs(args.output_dir) # if not os.path.exists(args.comparison_dir): # os.makedirs(args.comparison_dir) for path_image, path_mask in zip(paths_image, paths_mask): print(path_image, path_mask) # raw mask bg 0, fg 1 raw_mask = cv2.imread(path_mask) # convert mask to grayscale and threshold mask = cv2.cvtColor(raw_mask, cv2.COLOR_BGR2GRAY) _, mask = cv2.threshold(mask, 64, 255, cv2.THRESH_BINARY) if cv2.findNonZero(mask) is None: print("image doesn't have non-zero pixels") continue contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: print("image doesn't have any contours") continue # get bounding boxes and erase small masks bboxes, mask = get_bboxes(contours=contours, mask=mask) image = cv2.imread(path_image) # only for this model image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_y = image.shape[0] image_x = image.shape[1] for idx, bbox in enumerate(bboxes): x, y, w, h = bbox # ========= CROPPING CAMERA ======================= # calculate crop size as closest integer of [max(camera_w, camera_h) + padding] -> reshape(512,512) padding = int(max(w, h) * 0.5) crop_size = find_closest_dividend(max(w, h) + padding) if crop_size > 512: print('bla') # since we want bbox to be in center of crop, we need to calculate same crop padding to each sides of it crop_add_left = crop_add_right = (crop_size - w) // 2 if (crop_size - w) % 2 != 0: crop_add_right += 1 crop_add_top = crop_add_bottom = (crop_size - h) // 2 if (crop_size - h) % 2 != 0: crop_add_bottom += 1 # it could be bbox is to close to image edges, take residual crop from other side if x < crop_add_left: crop_add_right += crop_add_left - x crop_add_left = x elif x + w + crop_add_right > image_x: crop_add_left += x + w + crop_add_right - image_x crop_add_right = image_x if y < crop_add_top: crop_add_bottom += crop_add_top - y crop_add_top = y elif y + h + crop_add_bottom > image_y: crop_add_top += y + h + crop_add_bottom - image_y crop_add_bottom = image_y # since our pano is very big, we crop from it without resizing as in official source mask_large = mask[y - crop_add_top:y + h + crop_add_bottom, x - crop_add_left:x + w + crop_add_right] image_large = image[y - crop_add_top:y + h + crop_add_bottom, x - crop_add_left:x + w + crop_add_right, :] # use custom dataset written with automatic reshape image_512 = cv2.resize(image_large, (INPUT_SIZE, INPUT_SIZE)) mask_512 = cv2.resize(mask_large, (INPUT_SIZE, INPUT_SIZE)) # =============================================================================================== # read image as RGB image_512 = ToTensor()(image_512) # expanded dim axis = 0 image_512 = (image_512 * 2.0 - 1.0).unsqueeze(0) mask_512 = ToTensor()(mask_512) mask_512 = mask_512.unsqueeze(0) # image, mask = image.cuda(), mask.cuda() image_masked = image_512 * (1 - mask_512.float()) + mask_512 with torch.no_grad(): inpainted_512 = model(image_masked, mask_512) # ommit batch size inpainted_512 = inpainted_512[0] # convert image after tanh [-1..1], to [0..255] inpainted_512 = torch.clamp(inpainted_512, -1., 1.) inpainted_512 = (inpainted_512 + 1) / 2.0 * 255 # convert from Tensor (C,H,W) -> NumPy (H,W,C) with integer values inpainted_512 = inpainted_512.permute(1, 2, 0) inpainted_512 = inpainted_512.cpu().numpy().astype(np.uint8) # convert back to BGR inpainted_512 = cv2.cvtColor(inpainted_512, cv2.COLOR_RGB2BGR) # resize back to large size inpainted_512_large = cv2.resize(inpainted_512, (crop_size, crop_size)) # for calcs mask_large = np.expand_dims(mask_large, axis=2) mask_large = mask_large.astype(np.float32) / 255. image_large = cv2.cvtColor(image_large, cv2.COLOR_RGB2BGR) output_large = inpainted_512_large * mask_large + image_large * ( 1. - mask_large) filename = args.outputs + '/' + os.path.splitext( os.path.basename(path_image))[0] + f'_inpainted-{idx}.jpg' cv2.imwrite(filename, output_large)