def train_transform(self, rgb, depth): scale = np.random.uniform(low=1, high=1.5) depth = depth / scale angle = np.random.uniform(-5.0, 5.0) should_flip = np.random.uniform(0.0, 1.0) < 0.5 h_offset = int((768 - 228) * np.random.uniform(0.0, 1.0)) v_offset = int((1024 - 304) * np.random.uniform(0.0, 1.0)) base_transform = transforms.Compose([ transforms.Resize(250 / iheight), transforms.Rotate(angle), transforms.Resize(scale), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(should_flip), ]) rgb = base_transform(rgb) rgb = self.color_jitter(rgb) rgb = rgb / 255.0 depth = base_transform(depth) return (rgb, depth)
def train_transform(self, im, gt): im = np.array(im).astype(np.float32) gt = np.array(gt).astype(np.float32) s = np.random.uniform(1.0, 1.5) # random scaling angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4) transform = my_transforms.Compose([ my_transforms.Crop(130, 10, 240, 1200), my_transforms.Resize(460 / 240, interpolation='bilinear'), my_transforms.Rotate(angle), my_transforms.Resize(s), my_transforms.CenterCrop(self.size), my_transforms.HorizontalFlip(do_flip) ]) im_ = transform(im) im_ = color_jitter(im_) gt_ = transform(gt) im_ = np.array(im_).astype(np.float32) gt_ = np.array(gt_).astype(np.float32) im_ /= 255.0 gt_ /= 100.0 * s im_ = to_tensor(im_) gt_ = to_tensor(gt_) gt_ = gt_.unsqueeze(0) return im_, gt_
def train_transform(self, rgb, depth): #s = np.random.uniform(1.0, 1.5) # random scaling #depth_np = depth / s s = self.getFocalScale() if (self.augArgs.varFocus): #Variable focal length simulation depth_np = depth else: depth_np = depth / s #Correct for focal length if (self.augArgs.varScale): #Variable global scale simulation scale = self.getDepthGroup() depth_np = depth_np * scale angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(self, rgb, depth): s = self.getFocalScale() if (self.augArgs.varFocus): #Variable focal length simulation depth_np = depth else: depth_np = depth / s #Correct for focal length if (self.augArgs.varScale): #Variable global scale simulation scale = self.getDepthGroup() depth_np = depth_np * scale angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling random_size = (int(s * 224), int(s * 224)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation # transform = torchvision.transforms.Compose([ # torchvision.transforms.Resize(self.output_size), # this is for computational efficiency, since rotation can be slow # torchvision.transforms.RandomRotation(angle), # torchvision.transforms.Resize(random_size), # torchvision.transforms.CenterCrop(self.output_size), # torchvision.transforms.RandomHorizontalFlip(do_flip) #]) transform2 = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform2(rgb) #rgb_n = Image.fromarray(np.uint8(rgb_np * 255)) #rgb_np = self.color_jitter(rgb_n) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform2(depth_np) #depth_np = np.asfarray(depth_np, dtype='float') / 255 return rgb_np, depth_np
def train_transform(self, rgb, depth, rgb_near): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 rgb_near_np = None if rgb_near is not None: rgb_near_np = transform(rgb_near) rgb_near_np = np.asfarray(rgb_near_np, dtype='float') / 255 depth_np = transform(depth_np) self.K = TransfromIntrinsics(self.K, (250.0 / iheight) * s, self.output_size) return rgb_np, depth_np, rgb_near_np
def train_transform(self, im, gt, mask): im = np.array(im).astype(np.float32) im = cv2.resize(im, (512, 256), interpolation=cv2.INTER_AREA) gt = cv2.resize(gt, (512, 256), interpolation=cv2.INTER_AREA) mask = cv2.resize(mask, (512, 256), interpolation=cv2.INTER_AREA) # h,w,c = im.shape # th, tw = 256,512 # x1 = random.randint(0, w - tw) # y1 = random.randint(0, h - th) # img = im[y1:y1 + th, x1:x1 + tw, :] # gt = gt[y1:y1 + th, x1:x1 + tw] # mask = mask[y1:y1 + th, x1:x1 + tw] s = np.random.uniform(1.0, 1.5) # random scaling angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4) transform = my_transforms.Compose([ my_transforms.Rotate(angle), my_transforms.Resize(s), my_transforms.CenterCrop(self.size), my_transforms.HorizontalFlip(do_flip) ]) im_ = transform(im) im_ = color_jitter(im_) gt_ = transform(gt) mask_ = transform(mask) im_ = np.array(im_).astype(np.float32) gt_ = np.array(gt_).astype(np.float32) mask_ = np.array(mask_).astype(np.float32) im_ /= 255.0 gt_ /= s im_ = to_tensor(im_) gt_ = to_tensor(gt_) mask_ = to_tensor(mask_) gt_ = gt_.unsqueeze(0) mask_ = mask_.unsqueeze(0) return im_, gt_, mask_
def train_transform(self, rgb, depth): """ [Reference] https://github.com/fangchangma/sparse-to-dense.pytorch/blob/master/dataloaders/nyu_dataloader.py Args: rgb (np.array): RGB image (shape=[H,W,3]) depth (np.array): Depth image (shape=[H,W]) Returns: torch.Tensor: Tranformed RGB image torch.Tensor: Transformed Depth image np.array: Transformed RGB image without color jitter (for 2D mesh creation) """ # Parameters for each augmentation s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # Perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize(250.0 / RAW_HEIGHT), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.img_size), transforms.HorizontalFlip(do_flip) ]) # Apply this transform to rgb/depth rgb_np_orig = transform(rgb) rgb_np_for_edge = np.asfarray( rgb_np_orig) # Used for canny edge detection rgb_np = color_jitter(rgb_np_orig) # random color jittering rgb_np = np.asfarray(rgb_np) / 255 depth_np = transform(depth_np) return rgb_np, depth_np, rgb_np_for_edge
def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / (s * self.depth_divider) angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ #Why not crop like in KITTI? Also, if resizing done, why not reflect this in depth_np as well? transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def _train_transform(self, rgb, sparse_depth, depth_gt): s = np.random.uniform(1.0, 1.5) # random scaling depth_gt = depth_gt / s # TODO critical why is the input not scaled in original implementation? sparse_depth = sparse_depth / s # TODO adapt and refactor angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation # TODO critical adjust sizes transform = transforms.Compose([ transforms.Crop(*self._road_crop), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb = transform(rgb) sparse_depth = transform(sparse_depth) # TODO needed? # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_gt = np.asfarray(depth_gt, dtype='float32') depth_gt = transform(depth_gt) rgb = self._color_jitter(rgb) # random color jittering # convert color [0,255] -> [0.0, 1.0] floats rgb = np.asfarray(rgb, dtype='float') / 255 return rgb, sparse_depth, depth_gt
def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # s = 1.5 depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize( s), # TODO (Katie): figure out how to resize properly transforms.RandomCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip iheight = rgb.shape[0] # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize(250.0 / iheight), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((228, 304)), transforms.HorizontalFlip(do_flip), transforms.Resize(self.output_size), ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 if depth_np.ndim != 2: print("Wrong Depth ",depth_np) depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(self, attrib_list): iheight = attrib_list['gt_depth'].shape[0] iwidth = attrib_list['gt_depth'].shape[1] s = np.random.uniform(1.0, 1.5) # random scaling angle = np.random.uniform(-15.0, 15.0) # random rotation degrees hdo_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip vdo_flip = np.random.uniform(0.0, 1.0) < 0.5 # random vertical flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize( 270.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(hdo_flip), transforms.VerticalFlip(vdo_flip) ]) attrib_np = dict() if self.depth_divider == 0: if 'fd' in attrib_list: minmax_image = transform(attrib_list['fd']) max_depth = max(minmax_image.max(), 1.0) if 'kor' in attrib_list: minmax_image = transform(attrib_list['kor']) max_depth = max(minmax_image.max(), 1.0) else: max_depth = 50 scale = 10.0 / max_depth # 10 is arbitrary. the network only converge in a especific range else: scale = 1.0 / self.depth_divider attrib_np['scale'] = 1.0 / scale for key, value in attrib_list.items(): attrib_np[key] = transform(value) if key in Modality.need_divider: #['gt_depth','fd','kor','kde','kgt','dor','dde', 'd3dwde','d3dwor','dvor','dvde','dvgt']: attrib_np[key] = scale * attrib_np[ key] #(attrib_np[key] - min_depth+0.01) / (max_depth - min_depth) #/ elif key in Modality.image_size_weight_names: #['d2dwor', 'd2dwde', 'd2dwgt']: attrib_np[key] = attrib_np[key] / ( iwidth * 1.5) # 1.5 about sqrt(2)- square's diagonal if 'rgb' in attrib_np: attrib_np['rgb'] = self.color_jitter( attrib_np['rgb']) # random color jittering attrib_np['rgb'] = (np.asfarray(attrib_np['rgb'], dtype='float') / 255).transpose( (2, 0, 1)) #all channels need to have C x H x W if 'grey' in attrib_np: attrib_np['grey'] = np.expand_dims( np.asfarray(attrib_np['grey'], dtype='float') / 255, axis=0) return attrib_np
def __getitem__(self, idx): # read input image filename = self.filenames[idx] filenameGt = self.filenamesGt[idx] rgb_image = Image.open(filename).convert('RGB') depth_image = cv2.imread(filenameGt, flags=(cv2.IMREAD_GRAYSCALE | cv2.IMREAD_ANYDEPTH)) if depth_image.ndim < 2: print(depth_image.shape) print(filenameGt) _s = np.random.uniform(1.0, 1.5) depth_image = depth_image / _s s = (np.int(365 * _s), np.int(547 * _s)) depth_image = np.asarray(cv2.resize(depth_image, dsize=(s[1], s[0]), interpolation=cv2.INTER_NEAREST), dtype=np.float32) # s = (912,608) degree = np.random.uniform(-5.0, 5.0) do_flip = np.random.uniform(0.0, 1.0) if self.split == 'train': tRgb = data_transform.Compose( [ # transforms.functional.crop(130,10,1368,912), transforms.Resize(s), data_transform.Rotation(degree), transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.CenterCrop((352, 512)), transforms.ToTensor(), ]) tDepth = data_transform.Compose( [ # transforms.functional.crop(130,10,1368,912), cfctransforms.Rotate(degree), cfctransforms.CenterCrop((352, 512)), ]) rgb_image = tRgb(rgb_image) depth_image = tDepth(depth_image) depth_image = np.asarray(depth_image, dtype=np.float32) ### exclude points with depth > 500m #### sparse_depth = np.zeros(depth_image.shape) mask_l = depth_image > 0 mask_keep = np.bitwise_and(mask_l, depth_image <= 500) sparse_depth[mask_keep] = depth_image[mask_keep] depth_image = sparse_depth # depth_image = scale(depth_image, out_range=(0.01, 1)) #scaling of depth maps max_depth = max(depth_image.max(), 1.0) depth_image = (10 / max_depth) * depth_image if self.split == 'val' or self.split == 'test': s = (365, 547) depth_image = np.asarray(cv2.resize( depth_image, dsize=(s[1], s[0]), interpolation=cv2.INTER_NEAREST), dtype=np.float32) tRgb = data_transform.Compose( [ # transforms.functional.crop(130,10,1368,912), # data_transform.Rotation(degree), transforms.Resize(s), # transforms.CenterCrop((228*1, 304*1)), transforms.CenterCrop((352, 512)), #transforms.CenterCrop((365,547)), transforms.ToTensor(), # transforms.Normalize((0.0115, 0.0124, 0.0111), (0.0085, 0.0086, 0.0084)), # transforms.ToPILImage() ]) tDepth = data_transform.Compose( [ # transforms.functional.crop(130,10,1368,912), # data_transform.Rotation(degree), cfctransforms.Resize(1.0), cfctransforms.CenterCrop((352, 512)), # transforms.CenterCrop((228*1, 304*1)), #cfctransforms.CenterCrop((365,547)), # transforms.ToTensor() ]) rgb_image = tRgb(rgb_image) # depth_image = transforms.functional.crop(depth_image, 130, 10, 548, 821) depth_image = tDepth(depth_image) # print(depth_image.shape) depth_image = np.asarray(depth_image, dtype=np.float32) ### exclude points with depth > 500m #### sparse_depth = np.zeros(depth_image.shape) mask_l = depth_image > 0 mask_keep = np.bitwise_and(mask_l, depth_image <= 500) sparse_depth[mask_keep] = depth_image[mask_keep] depth_image = sparse_depth #print("max", depth_image.max()) # depth_image = scale(depth_image, out_range=(0.01, 1)) max_depth = max(depth_image.max(), 1.0) depth_image = (10 / max_depth) * depth_image scale = max_depth / 10 if self.modality == 'rgb': input_np = rgb_np elif self.modality == 'rgbd': input_np = self.create_rgbd(rgb_np, depth_np) elif self.modality == 'd': input_np = self.create_sparse_depth(rgb_np, depth_np) depth_image = transforms.ToTensor()(depth_image) return transforms.ToTensor()(self.create_rgbdm( rgb_image.squeeze(0).numpy().transpose(1, 2, 0), depth_image.squeeze(0).numpy())), depth_image