def __init__(self, task, mode, dump_imgs=False): assert task in {'binary', 'type', 'parts'} assert mode in {'train', 'val', 'test'} self.task = task self.mode = mode self.dump_imgs = dump_imgs self.size = 768 if mode == 'train': idx_range = range(150) elif mode == 'val': idx_range = range(150, 175) else: idx_range = range(175, 225) #idx_range = range(175) if mode=='train' else range(175,225) self.img_paths = [] self.mask_paths = [] for i in range(1, 9): img_list = [ os.path.join(root, 'instrument_dataset_%d' % i, 'left_frames/frame{:03d}.png'.format(j)) for j in idx_range ] self.img_paths.extend(img_list) mask_list = [ os.path.join(root, 'instrument_dataset_%d' % i, 'ground_truth/%s_labels' % task, 'frame{:03d}.png'.format(j)) for j in idx_range ] self.mask_paths.extend(mask_list) joint_transform_list = [ joint_transforms.Resize(self.size), joint_transforms.RandomHorizontallyFlip() ] self.train_joint_transform = joint_transforms.Compose( joint_transform_list) self.val_joint_transform = joint_transforms.Resize(self.size) train_input_transform = [] train_input_transform += [ extended_transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.25) ] train_input_transform += [extended_transforms.RandomGaussianBlur()] train_input_transform += [ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ] self.train_input_transform = standard_transforms.Compose( train_input_transform) self.val_input_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) self.target_transform = extended_transforms.MaskToTensor()
def get_train_joint_transform(args, dataset): """ Get train joint transform Args: args: input config arguments dataset: dataset class object return: train_joint_transform_list, train_joint_transform """ # Geometric image transformations train_joint_transform_list = [] train_joint_transform_list += [ joint_transforms.RandomSizeAndCrop(args.crop_size, crop_nopad=args.crop_nopad, pre_size=args.pre_size, scale_min=args.scale_min, scale_max=args.scale_max, ignore_index=dataset.ignore_label), joint_transforms.Resize(args.crop_size), joint_transforms.RandomHorizontallyFlip() ] if args.rrotate > 0: train_joint_transform_list += [ joint_transforms.RandomRotate(degree=args.rrotate, ignore_index=dataset.ignore_label) ] train_joint_transform = joint_transforms.Compose( train_joint_transform_list) # return the raw list for class uniform sampling return train_joint_transform_list, train_joint_transform
def setup_loader(): """ Setup Data Loaders """ val_input_transform = transforms.ToTensor() target_transform = extended_transforms.MaskToTensor() val_joint_transform_list = [joint_transforms.Resize(args.resize_scale)] if args.dataset == 'iSAID': args.dataset_cls = iSAID test_set = args.dataset_cls.ISAIDDataset( args.mode, args.split, joint_transform_list=val_joint_transform_list, transform=val_input_transform, target_transform=target_transform) elif args.dataset == 'Posdam': args.dataset_cls = Posdam test_set = args.dataset_cls.POSDAMDataset( args.mode, args.split, joint_transform_list=val_joint_transform_list, transform=val_input_transform, target_transform=target_transform) elif args.dataset == 'Vaihingen': args.dataset_cls = Vaihingen test_set = args.dataset_cls.VAIHINGENDataset( args.mode, args.split, joint_transform_list=val_joint_transform_list, transform=val_input_transform, target_transform=target_transform) else: raise NameError('-------------Not Supported Currently-------------') if args.split_count > 1: test_set.split_dataset(args.split_index, args.split_count) batch_size = 1 if args.inference_mode == 'pooling': batch_size = args.batch_size test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=False, drop_last=False) return test_loader
def setup_loaders(args): ''' input: argument passed by the user return: training data loader, validation data loader loader, train_set ''' if args.dataset == 'cityscapes': args.dataset_cls = cityscapes args.train_batch_size = args.bs_mult * args.ngpu if args.bs_mult_val > 0: args.val_batch_size = args.bs_mult_val * args.ngpu else: args.val_batch_size = args.bs_mult * args.ngpu else: raise args.num_workers = 4 * args.ngpu if args.test_mode: args.num_workers = 0 #1 mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Geometric image transformations train_joint_transform_list = [ joint_transforms.RandomSizeAndCrop(args.crop_size, False, pre_size=args.pre_size, scale_min=args.scale_min, scale_max=args.scale_max, ignore_index=args.dataset_cls.ignore_label), joint_transforms.Resize(args.crop_size), joint_transforms.RandomHorizontallyFlip()] #if args.rotate: # train_joint_transform_list += [joint_transforms.RandomRotate(args.rotate)] train_joint_transform = joint_transforms.Compose(train_joint_transform_list) # Image appearance transformations train_input_transform = [] if args.color_aug: train_input_transform += [extended_transforms.ColorJitter( brightness=args.color_aug, contrast=args.color_aug, saturation=args.color_aug, hue=args.color_aug)] if args.bblur: train_input_transform += [extended_transforms.RandomBilateralBlur()] elif args.gblur: train_input_transform += [extended_transforms.RandomGaussianBlur()] else: pass train_input_transform += [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] train_input_transform = standard_transforms.Compose(train_input_transform) val_input_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) target_transform = extended_transforms.MaskToTensor() target_train_transform = extended_transforms.MaskToTensor() if args.dataset == 'cityscapes': city_mode = 'train' ## Can be trainval city_quality = 'fine' train_set = args.dataset_cls.CityScapes( city_quality, city_mode, 0, joint_transform=train_joint_transform, transform=train_input_transform, target_transform=target_train_transform, dump_images=args.dump_augmentation_images, cv_split=args.cv) val_set = args.dataset_cls.CityScapes('fine', 'val', 0, transform=val_input_transform, target_transform=target_transform, cv_split=args.cv) else: raise train_sampler = None val_sampler = None train_loader = DataLoader(train_set, batch_size=args.train_batch_size, num_workers=args.num_workers, shuffle=(train_sampler is None), drop_last=True, sampler = train_sampler) val_loader = DataLoader(val_set, batch_size=args.val_batch_size, num_workers=args.num_workers // 2 , shuffle=False, drop_last=False, sampler = val_sampler) return train_loader, val_loader, train_set
def setup_loaders(args): """ Setup Data Loaders[Currently supports Cityscapes, Mapillary and ADE20kin] input: argument passed by the user return: training data loader, validation data loader loader, train_set """ if args.dataset == 'cityscapes': args.dataset_cls = cityscapes args.train_batch_size = args.bs_mult * args.ngpu if args.bs_mult_val > 0: args.val_batch_size = args.bs_mult_val * args.ngpu else: args.val_batch_size = args.bs_mult * args.ngpu elif args.dataset == 'mapillary': args.dataset_cls = mapillary args.train_batch_size = args.bs_mult * args.ngpu args.val_batch_size = 4 elif args.dataset == 'ade20k': args.dataset_cls = ade20k args.train_batch_size = args.bs_mult * args.ngpu args.val_batch_size = 4 elif args.dataset == 'kitti': args.dataset_cls = kitti args.train_batch_size = args.bs_mult * args.ngpu if args.bs_mult_val > 0: args.val_batch_size = args.bs_mult_val * args.ngpu else: args.val_batch_size = args.bs_mult * args.ngpu elif args.dataset == 'camvid': args.dataset_cls = camvid args.train_batch_size = args.bs_mult * args.ngpu if args.bs_mult_val > 0: args.val_batch_size = args.bs_mult_val * args.ngpu else: args.val_batch_size = args.bs_mult * args.ngpu elif args.dataset == 'null_loader': args.dataset_cls = null_loader args.train_batch_size = args.bs_mult * args.ngpu if args.bs_mult_val > 0: args.val_batch_size = args.bs_mult_val * args.ngpu else: args.val_batch_size = args.bs_mult * args.ngpu else: raise Exception('Dataset {} is not supported'.format(args.dataset)) # Readjust batch size to mini-batch size for apex if args.apex: args.train_batch_size = args.bs_mult args.val_batch_size = args.bs_mult_val args.num_workers = 4 * args.ngpu if args.test_mode: args.num_workers = 1 mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Geometric image transformations train_joint_transform_list = [ joint_transforms.RandomSizeAndCrop(args.crop_size, False, pre_size=args.pre_size, scale_min=args.scale_min, scale_max=args.scale_max, ignore_index=args.dataset_cls.ignore_label), joint_transforms.Resize(args.crop_size), joint_transforms.RandomHorizontallyFlip()] train_joint_transform = joint_transforms.Compose(train_joint_transform_list) # Image appearance transformations train_input_transform = [] if args.color_aug: train_input_transform += [extended_transforms.ColorJitter( brightness=args.color_aug, contrast=args.color_aug, saturation=args.color_aug, hue=args.color_aug)] if args.bblur: train_input_transform += [extended_transforms.RandomBilateralBlur()] elif args.gblur: train_input_transform += [extended_transforms.RandomGaussianBlur()] else: pass train_input_transform += [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] train_input_transform = standard_transforms.Compose(train_input_transform) val_input_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) target_transform = extended_transforms.MaskToTensor() if args.jointwtborder: target_train_transform = extended_transforms.RelaxedBoundaryLossToTensor(args.dataset_cls.ignore_label, args.dataset_cls.num_classes) else: target_train_transform = extended_transforms.MaskToTensor() if args.dataset == 'cityscapes': city_mode = 'train' ## Can be trainval city_quality = 'fine' if args.class_uniform_pct: if args.coarse_boost_classes: coarse_boost_classes = \ [int(c) for c in args.coarse_boost_classes.split(',')] else: coarse_boost_classes = None train_set = args.dataset_cls.CityScapesUniform( city_quality, city_mode, args.maxSkip, joint_transform_list=train_joint_transform_list, transform=train_input_transform, target_transform=target_train_transform, dump_images=args.dump_augmentation_images, cv_split=args.cv, class_uniform_pct=args.class_uniform_pct, class_uniform_tile=args.class_uniform_tile, test=args.test_mode, coarse_boost_classes=coarse_boost_classes) else: train_set = args.dataset_cls.CityScapes( city_quality, city_mode, 0, joint_transform=train_joint_transform, transform=train_input_transform, target_transform=target_train_transform, dump_images=args.dump_augmentation_images, cv_split=args.cv) val_set = args.dataset_cls.CityScapes('fine', 'val', 0, transform=val_input_transform, target_transform=target_transform, cv_split=args.cv) elif args.dataset == 'mapillary': eval_size = 1536 val_joint_transform_list = [ joint_transforms.ResizeHeight(eval_size), joint_transforms.CenterCropPad(eval_size, ignore_index=args.dataset_cls.ignore_label)] train_set = args.dataset_cls.Mapillary( 'semantic', 'train', joint_transform_list=train_joint_transform_list, transform=train_input_transform, target_transform=target_train_transform, dump_images=args.dump_augmentation_images, class_uniform_pct=args.class_uniform_pct, class_uniform_tile=args.class_uniform_tile, test=args.test_mode) val_set = args.dataset_cls.Mapillary( 'semantic', 'val', joint_transform_list=val_joint_transform_list, transform=val_input_transform, target_transform=target_transform, test=False) elif args.dataset == 'ade20k': eval_size = 384 val_joint_transform_list = [ joint_transforms.ResizeHeight(eval_size), joint_transforms.CenterCropPad(eval_size)] train_set = args.dataset_cls.ade20k( 'semantic', 'train', joint_transform_list=train_joint_transform_list, transform=train_input_transform, target_transform=target_train_transform, dump_images=args.dump_augmentation_images, class_uniform_pct=args.class_uniform_pct, class_uniform_tile=args.class_uniform_tile, test=args.test_mode) val_set = args.dataset_cls.ade20k( 'semantic', 'val', joint_transform_list=val_joint_transform_list, transform=val_input_transform, target_transform=target_transform, test=False) elif args.dataset == 'kitti': # eval_size_h = 384 # eval_size_w = 1280 # val_joint_transform_list = [ # joint_transforms.ResizeHW(eval_size_h, eval_size_w)] train_set = args.dataset_cls.KITTI( 'semantic', 'train', args.maxSkip, joint_transform_list=train_joint_transform_list, transform=train_input_transform, target_transform=target_train_transform, dump_images=args.dump_augmentation_images, class_uniform_pct=args.class_uniform_pct, class_uniform_tile=args.class_uniform_tile, test=args.test_mode, cv_split=args.cv, scf=args.scf, hardnm=args.hardnm) val_set = args.dataset_cls.KITTI( 'semantic', 'trainval', 0, joint_transform_list=None, transform=val_input_transform, target_transform=target_transform, test=False, cv_split=args.cv, scf=None) elif args.dataset == 'camvid': # eval_size_h = 384 # eval_size_w = 1280 # val_joint_transform_list = [ # joint_transforms.ResizeHW(eval_size_h, eval_size_w)] train_set = args.dataset_cls.CAMVID( 'semantic', 'trainval', args.maxSkip, joint_transform_list=train_joint_transform_list, transform=train_input_transform, target_transform=target_train_transform, dump_images=args.dump_augmentation_images, class_uniform_pct=args.class_uniform_pct, class_uniform_tile=args.class_uniform_tile, test=args.test_mode, cv_split=args.cv, scf=args.scf, hardnm=args.hardnm) val_set = args.dataset_cls.CAMVID( 'semantic', 'test', 0, joint_transform_list=None, transform=val_input_transform, target_transform=target_transform, test=False, cv_split=args.cv, scf=None) elif args.dataset == 'null_loader': train_set = args.dataset_cls.null_loader(args.crop_size) val_set = args.dataset_cls.null_loader(args.crop_size) else: raise Exception('Dataset {} is not supported'.format(args.dataset)) if args.apex: from datasets.sampler import DistributedSampler train_sampler = DistributedSampler(train_set, pad=True, permutation=True, consecutive_sample=False) val_sampler = DistributedSampler(val_set, pad=False, permutation=False, consecutive_sample=False) else: train_sampler = None val_sampler = None train_loader = DataLoader(train_set, batch_size=args.train_batch_size, num_workers=args.num_workers, shuffle=(train_sampler is None), drop_last=True, sampler = train_sampler) val_loader = DataLoader(val_set, batch_size=args.val_batch_size, num_workers=args.num_workers // 2 , shuffle=False, drop_last=False, sampler = val_sampler) return train_loader, val_loader, train_set
def __getitem__(self, index): try: data = copy.deepcopy(self.data_list[index]) # if self.eval_mode: # print(data["img_mask"]) # im = cv2.imread(data['img_path'], 1 if self.img_mode != 'GRAY' else 0) # if self.img_mode == 'RGB': # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) label = Image.open(data["img_mask"]) label = np.array(label) label = self.id2trainId(label) mask = Image.fromarray(label.astype(np.uint8)) im = Image.open(data['img_path']).convert('RGB') if self.eval_mode: return self._eval_get_item(im, label, self.eval_scales, self.eval_flip,data) # Geometric image transformations train_joint_transform_list = [ joint_transforms.RandomSizeAndCrop(self.crop_size, False, pre_size=self.pre_size, scale_min=self.scale_min, scale_max=self.scale_max, ignore_index=self.ignore_label), joint_transforms.Resize(self.crop_size), joint_transforms.RandomHorizontallyFlip()] if self.rotate: train_joint_transform_list += [joint_transforms.RandomRotate(self.rotate)] train_joint_transform = joint_transforms.Compose(train_joint_transform_list) ## Image appearance transformations train_input_transform = [] if self.color_aug: train_input_transform += [extended_transforms.ColorJitter( brightness=self.color_aug, contrast=self.color_aug, saturation=self.color_aug, hue=self.color_aug)] if self.bblur: train_input_transform += [extended_transforms.RandomBilateralBlur()] elif self.bblur: train_input_transform += [extended_transforms.RandomGaussianBlur()] else: pass train_input_transform = transforms.Compose(train_input_transform) target_transform = extended_transforms.MaskToTensor() target_train_transform = extended_transforms.MaskToTensor() # Image Transformations # if train_joint_transform is not None: # train_joint_transform img, mask = train_joint_transform(im, mask) # if train_input_transform is not None: # train_input_transform img = train_input_transform(img) # if target_train_transform is not None: mask = target_train_transform(mask) if self.dump_images: outdir = '/data/SSSSdump_imgs_/' os.makedirs(outdir, exist_ok=True) out_img_fn = os.path.join(outdir, '{}.png'.format(self.i)) out_msk_fn = os.path.join(outdir, '{}s_mask.png'.format(self.i)) print(out_img_fn) self.i+=1 mask_img = colorize_mask(np.array(mask)) img.save(out_img_fn) mask_img.save(out_msk_fn) dict2 = {'img': img,'label':mask} data.update(dict2) if self.transform: data['img'] = self.transform(data['img']) # # print(image.shape, label.shape) # rdata['img'] = image # rdata['label'] = label # rdata['img_name'] = ['img_name'] return data except: return self.__getitem__(np.random.randint(self.__len__()))
def setup_loaders(args): """ Setup Data Loaders[Currently supports Cityscapes, Mapillary and ADE20kin] input: argument passed by the user return: training data loader, validation data loader loader, train_set """ if args.dataset == 'OmniAudio_noBG_Paralleltask': args.dataset_cls = OmniAudio_noBG_Paralleltask args.train_batch_size = 4#args.bs_mult * args.ngpu args.val_batch_size = 4 elif args.dataset == 'OmniAudio_noBG_Paralleltask_depth': args.dataset_cls = OmniAudio_noBG_Paralleltask_depth args.train_batch_size = 4#args.bs_mult * args.ngpu args.val_batch_size = 4 elif args.dataset == 'OmniAudio_noBG_Paralleltask_depth_noSeman': args.dataset_cls = OmniAudio_noBG_Paralleltask_depth_noSeman args.train_batch_size = 4#args.bs_mult * args.ngpu args.val_batch_size = 4 else: raise Exception('Dataset {} is not supported'.format(args.dataset)) # Readjust batch size to mini-batch size for apex if args.apex: args.train_batch_size = args.bs_mult args.val_batch_size = args.bs_mult_val args.num_workers = 4 * args.ngpu if args.test_mode: args.num_workers = 1 mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Geometric image transformations train_joint_transform_list = [ joint_transforms.RandomSizeAndCrop(args.crop_size, False, pre_size=args.pre_size, scale_min=args.scale_min, scale_max=args.scale_max, ignore_index=args.dataset_cls.ignore_label), joint_transforms.Resize(args.crop_size), joint_transforms.RandomHorizontallyFlip()] train_joint_transform = joint_transforms.Compose(train_joint_transform_list) # Image appearance transformations train_input_transform = [] if args.color_aug: train_input_transform += [extended_transforms.ColorJitter( brightness=args.color_aug, contrast=args.color_aug, saturation=args.color_aug, hue=args.color_aug)] if args.bblur: train_input_transform += [extended_transforms.RandomBilateralBlur()] elif args.gblur: train_input_transform += [extended_transforms.RandomGaussianBlur()] else: pass train_input_transform += [standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std)] train_input_transform = standard_transforms.Compose(train_input_transform) val_input_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) target_transform = extended_transforms.MaskToTensor() if args.jointwtborder: target_train_transform = extended_transforms.RelaxedBoundaryLossToTensor(args.dataset_cls.ignore_label, args.dataset_cls.num_classes) else: target_train_transform = extended_transforms.MaskToTensor() if args.dataset == 'OmniAudio_noBG_Paralleltask': eval_size = 960 val_joint_transform_list = [ joint_transforms.ResizeHeight(eval_size), joint_transforms.CenterCropPad(eval_size)] train_set = args.dataset_cls.OmniAudio( 'semantic', 'train', joint_transform_list=train_joint_transform_list, transform=train_input_transform, target_transform=target_train_transform) val_set = args.dataset_cls.OmniAudio( 'semantic', 'val', joint_transform_list=val_joint_transform_list, transform=val_input_transform, target_transform=target_transform) elif args.dataset == 'OmniAudio_noBG_Paralleltask_depth': eval_size = 960 val_joint_transform_list = [ joint_transforms.ResizeHeight(eval_size), joint_transforms.CenterCropPad(eval_size)] train_set = args.dataset_cls.OmniAudio( 'semantic', 'train', joint_transform_list=train_joint_transform_list, transform=train_input_transform, target_transform=target_train_transform) val_set = args.dataset_cls.OmniAudio( 'semantic', 'val', joint_transform_list=val_joint_transform_list, transform=val_input_transform, target_transform=target_transform) elif args.dataset == 'OmniAudio_noBG_Paralleltask_depth_noSeman': eval_size = 960 val_joint_transform_list = [ joint_transforms.ResizeHeight(eval_size), joint_transforms.CenterCropPad(eval_size)] train_set = args.dataset_cls.OmniAudio( 'semantic', 'train', joint_transform_list=train_joint_transform_list, transform=train_input_transform, target_transform=target_train_transform) val_set = args.dataset_cls.OmniAudio( 'semantic', 'val', joint_transform_list=val_joint_transform_list, transform=val_input_transform, target_transform=target_transform) elif args.dataset == 'null_loader': train_set = args.dataset_cls.null_loader(args.crop_size) val_set = args.dataset_cls.null_loader(args.crop_size) else: raise Exception('Dataset {} is not supported'.format(args.dataset)) if args.apex: from datasets.sampler import DistributedSampler train_sampler = DistributedSampler(train_set, pad=True, permutation=True, consecutive_sample=False) val_sampler = DistributedSampler(val_set, pad=False, permutation=False, consecutive_sample=False) else: train_sampler = None val_sampler = None train_loader = DataLoader(train_set, batch_size=args.train_batch_size, num_workers=args.num_workers, shuffle=(train_sampler is None), drop_last=True, sampler = train_sampler) val_loader = DataLoader(val_set, batch_size=args.val_batch_size, num_workers=args.num_workers // 2 , shuffle=False, drop_last=False, sampler = val_sampler) return train_loader, val_loader, train_set