def __getitem__(self, index): img_idx = self.lists[index] img = self.imgs[img_idx].transpose(2, 1, 0) #img = self.imgs[img_idx] dpt = self.dpts[img_idx].transpose(1, 0) #dpt = self.dpts[img_idx] #image = Image.fromarray(np.uint8(img)) #depth = Image.fromarray(np.uint8(dpt)) #image.save('img1.png') #input_transform = transforms.Compose([flow_transforms.Scale(228)]) #input_transform = transforms.Compose([flow_transforms.ArrayToTensor()]) input_transform = transforms.Compose( [flow_transforms.Scale(228), flow_transforms.ArrayToTensor()]) #target_depth_transform = transforms.Compose([flow_transforms.Scale(228)]) #target_depth_transform = transforms.Compose([flow_transforms.ArrayToTensor()]) target_depth_transform = transforms.Compose([ flow_transforms.Scale_Single(228), flow_transforms.ArrayToTensor() ]) img = input_transform(img) dpt = target_depth_transform(dpt) #image = Image.fromarray(np.uint8(img)) #image.save('img2.png') return img, dpt
def load_data(is_training = True): global IMAGE_NUM train_idx_path = "data/trainNdxs.txt" test_idx_path = "data/testNdxs.txt" input_rgb_images_dir = 'data/nyu_datasets_changed/input/' target_depth_images_dir = 'data/nyu_datasets_changed/target_depths/' target_labels_images_dir = 'data/nyu_datasets_changed/labels_38/' data_path = "data/nyu_depth_v2_labeled.mat" train_idx = np.loadtxt(train_idx_path, dtype = 'int') test_idx = np.loadtxt(test_idx_path, dtype = 'int') input_transform = flow_transforms.Compose([flow_transforms.Scale(120)]) target_depth_transform = flow_transforms.Compose([flow_transforms.Scale_Single(60)]) target_labels_transform = flow_transforms.Compose([]) co_transform=flow_transforms.Compose([ flow_transforms.RandomRotate(4), flow_transforms.RandomCrop((480,640)), flow_transforms.RandomVerticalFlip() ]) data = [] if is_training: data = ListDataset(data_path,train_idx,input_transform,target_depth_transform, target_labels_transform, co_transform) else: data = ListDataset(data_path, test_idx, input_transform, target_depth_transform, target_labels_transform) IMAGE_NUM = len(data) return data
NUM_TRAIN = 1000 NUM_VAL = 300 NUM_TEST = 149 listing = random.sample(os.listdir(input_rgb_images_dir), 1449) train_listing = listing[:min(NUM_TRAIN, train_on)] val_listing = listing[NUM_TRAIN:min(NUM_VAL + NUM_TRAIN, val_on + NUM_TRAIN)] test_listing = listing[NUM_VAL + NUM_TRAIN:min(NUM_VAL + NUM_TRAIN + NUM_TEST, test_on + NUM_VAL + NUM_TRAIN)] data_dir = (input_rgb_images_dir, target_depth_images_dir, target_labels_images_dir) input_transform = transforms.Compose( [flow_transforms.Scale(228), flow_transforms.ArrayToTensor()]) target_depth_transform = transforms.Compose( [flow_transforms.Scale_Single(228), flow_transforms.ArrayToTensor()]) target_labels_transform = transforms.Compose([flow_transforms.ArrayToTensor()]) ##Apply this transform on input, ground truth depth images and labeled images co_transform = flow_transforms.Compose([ flow_transforms.RandomCrop((480, 640)), flow_transforms.RandomHorizontalFlip() ]) ##Splitting in train, val and test sets [No data augmentation on val and test, only on train]
def __getitem__(self, index): left = self.left[index] right = self.right[index] left_img = self.loader(left) right_img = self.loader(right) disp_L = self.disp_L[index] dataL = self.dploader(disp_L) dataL[dataL == np.inf] = 0 if not (self.disp_R is None): disp_R = self.disp_R[index] dataR = self.dploader(disp_R) dataR[dataR == np.inf] = 0 max_h = 2048 // 4 max_w = 3072 // 4 # photometric unsymmetric-augmentation random_brightness = np.random.uniform(0.5, 2., 2) random_gamma = np.random.uniform(0.8, 1.2, 2) random_contrast = np.random.uniform(0.8, 1.2, 2) left_img = torchvision.transforms.functional.adjust_brightness( left_img, random_brightness[0]) left_img = torchvision.transforms.functional.adjust_gamma( left_img, random_gamma[0]) left_img = torchvision.transforms.functional.adjust_contrast( left_img, random_contrast[0]) right_img = torchvision.transforms.functional.adjust_brightness( right_img, random_brightness[1]) right_img = torchvision.transforms.functional.adjust_gamma( right_img, random_gamma[1]) right_img = torchvision.transforms.functional.adjust_contrast( right_img, random_contrast[1]) right_img = np.asarray(right_img) left_img = np.asarray(left_img) # horizontal flip if not (self.disp_R is None): if np.random.binomial(1, 0.5): tmp = right_img right_img = left_img[:, ::-1] left_img = tmp[:, ::-1] tmp = dataR dataR = dataL[:, ::-1] dataL = tmp[:, ::-1] # geometric unsymmetric-augmentation angle = 0 px = 0 if np.random.binomial(1, 0.5): angle = 0.1 px = 2 co_transform = flow_transforms.Compose([ flow_transforms.RandomVdisp(angle, px), flow_transforms.Scale(np.random.uniform(self.rand_scale[0], self.rand_scale[1]), order=self.order), flow_transforms.RandomCrop((max_h, max_w)), ]) augmented, dataL = co_transform([left_img, right_img], dataL) left_img = augmented[0] right_img = augmented[1] # randomly occlude a region if np.random.binomial(1, 0.5): sx = int(np.random.uniform(50, 150)) sy = int(np.random.uniform(50, 150)) cx = int(np.random.uniform(sx, right_img.shape[0] - sx)) cy = int(np.random.uniform(sy, right_img.shape[1] - sy)) right_img[cx - sx:cx + sx, cy - sy:cy + sy] = np.mean(np.mean(right_img, 0), 0)[np.newaxis, np.newaxis] h, w, _ = left_img.shape top_pad = max_h - h left_pad = max_w - w left_img = np.lib.pad(left_img, ((top_pad, 0), (0, left_pad), (0, 0)), mode='constant', constant_values=0) right_img = np.lib.pad(right_img, ((top_pad, 0), (0, left_pad), (0, 0)), mode='constant', constant_values=0) dataL = np.expand_dims(np.expand_dims(dataL, 0), 0) dataL = np.lib.pad(dataL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0)[0, 0] dataL = np.ascontiguousarray(dataL, dtype=np.float32) processed = preprocess.get_transform() left_img = processed(left_img) right_img = processed(right_img) return (left_img, right_img, dataL)