def train_transform(rgb, sparse, target, rgb_near, args): # s = np.random.uniform(1.0, 1.5) # random scaling # angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip transform_geometric = transforms.Compose([ # transforms.Rotate(angle), # transforms.Resize(s), transforms.BottomCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) if sparse is not None: sparse = transform_geometric(sparse) target = transform_geometric(target) if rgb is not None: brightness = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) contrast = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) saturation = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter) transform_rgb = transforms.Compose([ transforms.ColorJitter(brightness, contrast, saturation, 0), transform_geometric ]) rgb = transform_rgb(rgb) if rgb_near is not None: rgb_near = transform_rgb(rgb_near) # sparse = drop_depth_measurements(sparse, 0.9) return rgb, sparse, target, rgb_near
def val_transform(self, rgb, depth): s = self.getFocalScale() if (self.augArgs.varScale): #Variable global scale simulation scale = self.getDepthGroup() depth_np = depth * scale else: depth_np = depth if (self.augArgs.varFocus): transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.Resize( s ), #Resize both images without correcting the depth values transforms.CenterCrop(self.output_size), ]) else: transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def __init__(self, root, isTrain=True): self.images_root = os.path.join(root, 'img') self.labels_root = os.path.join(root, 'gt') self.list_root = os.path.join(root, 'list') # print('image root = ', self.images_root) # print('labels root = ', self.labels_root) if isTrain: list_path = os.path.join(self.list_root, 'train_aug.txt') self.input_transform = transforms.Compose([ transforms.RandomRotation(10), # 随机旋转 transforms.CenterCrop(256), transforms.RandomHorizontalFlip(), # 随机翻转 transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]) ]) self.target_transform = transforms.Compose( [transforms.CenterCrop(256), transform.ToLabel()]) else: list_path = os.path.join(self.list_root, 'val.txt') self.input_transform = transforms.Compose([ transforms.CenterCrop(256), transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]) ]) self.target_transform = transforms.Compose( [transforms.CenterCrop(256), transform.ToLabel()]) self.filenames = [i_id.strip() for i_id in open(list_path)]
def val_transform(self, rgb, depth): s = self.getFocalScale() depth = np.asfarray( depth, dtype='float32' ) #This used to be the last step, not sure if it goes here? if (self.augArgs.varScale): #Variable global scale simulation scale = self.getDepthGroup() depth_np = depth * scale else: depth_np = depth if (self.augArgs.varFocus): transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Resize( s ), #Resize both images without correcting the depth values transforms.CenterCrop(self.output_size), ]) else: transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform_label(self, rgb, depth, label): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth # / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip shift_x = np.random.uniform(-50.0, 50.0) # perform 1st step of data augmentation transform = transforms.Compose([ # transforms.Translate(shift_x, 0.0), transforms.Resize( 150.0 / iheight ), # this is for computational efficiency, since rotation can be slow # transforms.Rotate(angle), # transforms.Resize(s), # transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) label_transform = transforms.Compose([ # transforms.Translate(shift_x / 2.0, 0.0), # transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) label_np = label_transform(label) return rgb_np, depth_np, label_np
def train_transform(self, rgb, depth): scale = np.random.uniform(low=1, high=1.5) depth = depth / scale angle = np.random.uniform(-5.0, 5.0) should_flip = np.random.uniform(0.0, 1.0) < 0.5 h_offset = int((768 - 228) * np.random.uniform(0.0, 1.0)) v_offset = int((1024 - 304) * np.random.uniform(0.0, 1.0)) base_transform = transforms.Compose([ transforms.Resize(250 / iheight), transforms.Rotate(angle), transforms.Resize(scale), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(should_flip), ]) rgb = base_transform(rgb) rgb = self.color_jitter(rgb) rgb = rgb / 255.0 depth = base_transform(depth) return (rgb, depth)
def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling random_size = (int(s * 224), int(s * 224)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation # transform = torchvision.transforms.Compose([ # torchvision.transforms.Resize(self.output_size), # this is for computational efficiency, since rotation can be slow # torchvision.transforms.RandomRotation(angle), # torchvision.transforms.Resize(random_size), # torchvision.transforms.CenterCrop(self.output_size), # torchvision.transforms.RandomHorizontalFlip(do_flip) #]) transform2 = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform2(rgb) #rgb_n = Image.fromarray(np.uint8(rgb_np * 255)) #rgb_np = self.color_jitter(rgb_n) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform2(depth_np) #depth_np = np.asfarray(depth_np, dtype='float') / 255 return rgb_np, depth_np
def train_transform(self, rgb, depth): s = self.getFocalScale() if (self.augArgs.varFocus): #Variable focal length simulation depth_np = depth else: depth_np = depth / s #Correct for focal length if (self.augArgs.varScale): #Variable global scale simulation scale = self.getDepthGroup() depth_np = depth_np * scale angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(self, rgb, depth, rgb_near): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 rgb_near_np = None if rgb_near is not None: rgb_near_np = transform(rgb_near) rgb_near_np = np.asfarray(rgb_near_np, dtype='float') / 255 depth_np = transform(depth_np) self.K = TransfromIntrinsics(self.K, (250.0 / iheight) * s, self.output_size) return rgb_np, depth_np, rgb_near_np
def train_transform(self, rgb, depth): # for create fake underwater images rgb = uw_style(rgb, depth) rgb /= rgb.max() / 255 rgb = rgb.astype(np.uint8) s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ # transforms.Rotate(angle), # transforms.Resize(s), # transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip), transforms.Resize(size=self.output_size) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth): """ [Reference] https://github.com/fangchangma/sparse-to-dense.pytorch/blob/master/dataloaders/nyu_dataloader.py Args: rgb (np.array): RGB image (shape=[H,W,3]) depth (np.array): Depth image (shape=[H,W]) Returns: torch.Tensor: Tranformed RGB image torch.Tensor: Transformed Depth image np.array: Transformed RGB image without color jitter (for 2D mesh creation) """ transform = transforms.Compose([ transforms.Resize(240.0 / RAW_HEIGHT), transforms.CenterCrop(self.img_size), ]) # Apply this transform to rgb/depth rgb_np_orig = transform(rgb) rgb_np_for_edge = np.asfarray(rgb_np_orig) # Used for mesh creation rgb_np = np.asfarray(rgb_np_orig) / 255 depth_np = transform(depth) return rgb_np, depth_np, rgb_np_for_edge
def train_transform(self, im, gt): im = np.array(im).astype(np.float32) gt = np.array(gt).astype(np.float32) s = np.random.uniform(1.0, 1.5) # random scaling angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4) transform = my_transforms.Compose([ my_transforms.Crop(130, 10, 240, 1200), my_transforms.Resize(460 / 240, interpolation='bilinear'), my_transforms.Rotate(angle), my_transforms.Resize(s), my_transforms.CenterCrop(self.size), my_transforms.HorizontalFlip(do_flip) ]) im_ = transform(im) im_ = color_jitter(im_) gt_ = transform(gt) im_ = np.array(im_).astype(np.float32) gt_ = np.array(gt_).astype(np.float32) im_ /= 255.0 gt_ /= 100.0 * s im_ = to_tensor(im_) gt_ = to_tensor(gt_) gt_ = gt_.unsqueeze(0) return im_, gt_
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (rgb, depth) the raw data. """ raw_rgb, raw_depth, _ = self.__getraw__(index) if self.transform is not None: rgb_np, depth_np, rgb_np_for_edge = self.transform(raw_rgb, raw_depth) else: raise RuntimeError("transform not defined") input_tensor = to_tensor(rgb_np) depth_tensor = to_tensor(depth_np).unsqueeze(0) # [1,H,W] # Extract mesh base_mesh = self.mesh_extractor(np.uint8(rgb_np_for_edge)) # Preserve original resolution for evaluation/visualization orig_transform = transforms.Compose([ transforms.CenterCrop((456, 608)), ]) orig_input_tensor = orig_transform(raw_rgb) orig_depth_tensor = orig_transform(raw_depth) # To tensor orig_input_tensor = to_tensor(orig_input_tensor) orig_depth_tensor = to_tensor(orig_depth_tensor).unsqueeze(0) # Estimated depthmaps (added for evaluation) est_depth_np = np.asfarray(self.mat_depth[index], dtype='float') # numpy est_depth_tensor = to_tensor(est_depth_np).unsqueeze(0) return input_tensor, depth_tensor, base_mesh, orig_input_tensor, orig_depth_tensor, est_depth_tensor
def train_transform(self, rgb, depth): #s = np.random.uniform(1.0, 1.5) # random scaling #depth_np = depth / s s = self.getFocalScale() if (self.augArgs.varFocus): #Variable focal length simulation depth_np = depth else: depth_np = depth / s #Correct for focal length if (self.augArgs.varScale): #Variable global scale simulation scale = self.getDepthGroup() depth_np = depth_np * scale angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop(self.output_size), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth #/ s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose([ #transforms.Resize(240.0 / iheight), # this is for computational efficiency, since rotation can be slow #transforms.Rotate(angle), #transforms.Resize(s), #transforms.CenterCrop(self.output_size), #transforms.HorizontalFlip(do_flip) transforms.Resize(self.output_size) ]) rgb_np = transform(rgb) #rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) depth_np = np.asfarray(depth_np, dtype='float') if self.depth_16: depth_np = depth_np / self.depth_16_max else: depth_np = depth_np / 255 return rgb_np, depth_np
def val_transform(self, rgb): transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 return rgb_np
def image_transform(rgb, depth): depth_frame_converted = np.asfarray(depth.clip(0, 6000), dtype='float') / 1000 depth_array = depth_frame_converted.reshape((424, 512), order='C') rgb_transform = transforms.Compose([ transforms.Resize([240, 426]), transforms.CenterCrop((228, 304)), ]) depth_transform = transforms.Compose([ transforms.Resize([240, 320]), transforms.CenterCrop((228, 304)), ]) rgb_frame = rgb_transform(rgb) rgb_np = np.asfarray(rgb_frame, dtype='float') / 255 depth_np = depth_transform(depth_array) return rgb_np, depth_np
def val_transform(rgb, depth): depth_np = depth transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def seq_transform(self, attrib_list, is_validation): iheight = attrib_list['gt_depth'].shape[0] iwidth = attrib_list['gt_depth'].shape[1] transform = transforms.Compose([ transforms.Resize( 240.0 / iheight), # this is for computational efficiency, transforms.CenterCrop(self.output_size) ]) attrib_np = dict() network_max_range = 10.0 # 10 is arbitrary. the network only converge in a especific range if 'scale' in attrib_list and attrib_list['scale'] > 0: scale = 1.0 / attrib_list['scale'] attrib_np['scale'] = attrib_list['scale'] else: if 'fd' in attrib_list: minmax_image = transform(attrib_list['fd']) max_depth = max(minmax_image.max(), 1.0) if 'kor' in attrib_list: minmax_image = transform(attrib_list['kor']) max_depth = max(minmax_image.max(), 1.0) else: max_depth = 50 scale = network_max_range / max_depth attrib_np['scale'] = 1.0 / scale for key, value in attrib_list.items(): if key not in Modality.no_transform: attrib_np[key] = transform(value) else: attrib_np[key] = value if key in Modality.need_divider: attrib_np[key] = scale * attrib_np[key] elif key in Modality.image_size_weight_names: attrib_np[key] = attrib_np[key] / ( iwidth * 1.5) # 1.5 about sqrt(2)- square's diagonal if 'rgb' in attrib_np: if not is_validation: attrib_np['rgb'] = self.color_jitter( attrib_np['rgb']) # random color jittering attrib_np['rgb'] = (np.asfarray(attrib_np['rgb'], dtype='float') / 255).transpose( (2, 0, 1)) # all channels need to have C x H x W if 'grey' in attrib_np: attrib_np['grey'] = np.expand_dims( np.asfarray(attrib_np['grey'], dtype='float') / 255, axis=0) return attrib_np
def val_transform(self, rgb, depth): depth_np = depth / (self.depth_divider) transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(rgb, sparse, rgb_near, args): transform = transforms.Compose([ # transforms.Resize((352, 1216)), #transforms.BottomCrop((oheight, owidth)), ]) if rgb is not None: rgb = transform(rgb) if sparse is not None: sparse = transform(sparse) if rgb_near is not None: rgb_near = transform(rgb_near) return rgb, sparse, rgb_near
def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ transforms.Crop(130, 10, 220, 1200), transforms.CenterCrop(self.output_size) ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 #Why do this?? depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth, pose): depth_np = depth transform = transforms.Compose([ transforms.Resize(250.0 / iheight), transforms.CenterCrop((228, 304)), transforms.Resize(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np, pose
def val_transform_label(self, rgb, depth, label): depth_np = depth transform = transforms.Compose([ transforms.Resize(150.0 / iheight), # transforms.CenterCrop(self.output_size), ]) # label_transform = transforms.CenterCrop(self.output_size), rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) label_np = label return rgb_np, depth_np, label_np
def val_transform(rgb, sparse, target, rgb_near, args): transform = transforms.Compose([ transforms.BottomCrop((oheight, owidth)), ]) if rgb is not None: rgb = transform(rgb) if sparse is not None: sparse = transform(sparse) if target is not None: target = transform(target) if rgb_near is not None: rgb_near = transform(rgb_near) return rgb, sparse, target, rgb_near
def val_transform(self, rgb, depth, random_seed): np.random.seed(random_seed) depth_np = depth transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ transforms.Crop(0, 20, 750, 2000), transforms.Resize(500 / 750), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) # for compare with Eigen's paper depth_np = depth_data_transforms(depth_np) return rgb_np, depth_np
def validate_transform(self, rgb, depth): h_offset = int((768 - 228) * np.random.uniform(0.0, 1.0)) v_offset = int((1024 - 304) * np.random.uniform(0.0, 1.0)) base_transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop(self.output_size), ]) rgb = base_transform(rgb) rgb = rgb / 255.0 depth = base_transform(depth) return (rgb, depth)
def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose([ #transform.Resize(250.0 / iheight), transforms.Crop(130, 10, 240, 1200), transforms.CenterCrop(self.output_size), transforms.Resize(self.output_size), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np