def load_data(fold: int, params: Dict[str, Any]) -> Any: torch.multiprocessing.set_sharing_strategy('file_system') cudnn.benchmark = True logger.info('Options:') logger.info(pprint.pformat(opt)) full_df = pd.read_csv(opt.TRAIN.CSV) print('full_df', full_df.shape) train_df, val_df = train_val_split(full_df, fold) print('train_df', train_df.shape, 'val_df', val_df.shape) test_df = pd.read_csv(opt.TEST.CSV) transform_train = albu.Compose([ albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE), albu.RandomCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE), albu.HorizontalFlip(.5), ]) if opt.TEST.NUM_TTAS > 1: transform_test = albu.Compose([ albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE), albu.RandomCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE), albu.HorizontalFlip(), ]) else: transform_test = albu.Compose([ albu.PadIfNeeded(opt.MODEL.INPUT_SIZE, opt.MODEL.INPUT_SIZE), albu.CenterCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE), ]) train_dataset = Dataset(train_df, path=opt.TRAIN.PATH, mode='train', num_classes=opt.MODEL.NUM_CLASSES, resize=False, augmentor=transform_train) val_dataset = Dataset(val_df, path=opt.TRAIN.PATH, mode='val', # image_size=opt.MODEL.INPUT_SIZE, num_classes=opt.MODEL.NUM_CLASSES, resize=False, num_tta=1, # opt.TEST.NUM_TTAS, augmentor=transform_test) test_dataset = Dataset(test_df, path=opt.TEST.PATH, mode='test', # image_size=opt.MODEL.INPUT_SIZE, num_classes=opt.MODEL.NUM_CLASSES, resize=False, num_tta=opt.TEST.NUM_TTAS, augmentor=transform_test) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=True, num_workers=opt.TRAIN.WORKERS) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS) return train_loader, val_loader, test_loader
def load_data(fold: int, params: Dict[str, Any]) -> Any: torch.multiprocessing.set_sharing_strategy('file_system') cudnn.benchmark = True logger.info('Options:') logger.info(pprint.pformat(opt)) full_df = pd.read_csv(opt.TRAIN.CSV) print('full_df', full_df.shape) train_df, val_df = train_val_split(full_df, fold) print('train_df', train_df.shape, 'val_df', val_df.shape) test_df = pd.read_csv(opt.TEST.CSV) # transform_train = transforms.Compose([ # # transforms.Resize((opt.MODEL.IMAGE_SIZE)), # smaller edge # transforms.RandomCrop(opt.MODEL.INPUT_SIZE), # transforms.RandomHorizontalFlip(), # # transforms.ColorJitter(brightness=0.2, contrast=0.2), # # transforms.RandomAffine(degrees=20, scale=(0.8, 1.2), shear=10, resample=PIL.Image.BILINEAR), # # transforms.RandomCrop(opt.MODEL.INPUT_SIZE), # ]) augs = [] augs.append(albu.HorizontalFlip(.5)) if int(params['vflip']): augs.append(albu.VerticalFlip(.5)) if int(params['rotate90']): augs.append(albu.RandomRotate90()) if params['affine'] == 'soft': augs.append( albu.ShiftScaleRotate(shift_limit=0.075, scale_limit=0.15, rotate_limit=10, p=.75)) elif params['affine'] == 'medium': augs.append( albu.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2)) elif params['affine'] == 'hard': augs.append( albu.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.50, rotate_limit=45, p=.75)) if float(params['noise']) > 0.1: augs.append( albu.OneOf([ albu.IAAAdditiveGaussianNoise(), albu.GaussNoise(), ], p=float(params['noise']))) if float(params['blur']) > 0.1: augs.append( albu.OneOf([ albu.MotionBlur(p=.2), albu.MedianBlur(blur_limit=3, p=0.1), albu.Blur(blur_limit=3, p=0.1), ], p=float(params['blur']))) if float(params['distortion']) > 0.1: augs.append( albu.OneOf([ albu.OpticalDistortion(p=0.3), albu.GridDistortion(p=.1), albu.IAAPiecewiseAffine(p=0.3), ], p=float(params['distortion']))) if float(params['color']) > 0.1: augs.append( albu.OneOf([ albu.CLAHE(clip_limit=2), albu.IAASharpen(), albu.IAAEmboss(), albu.RandomBrightnessContrast(), ], p=float(params['color']))) transform_train = albu.Compose([ albu.RandomCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE), albu.Compose(augs, p=float(params['aug_global_prob'])) ]) transform_test = albu.Compose([ # transforms.CenterCrop(opt.MODEL.INPUT_SIZE), albu.RandomCrop(height=opt.MODEL.INPUT_SIZE, width=opt.MODEL.INPUT_SIZE), albu.HorizontalFlip(), ]) train_dataset = Dataset(train_df, path=opt.TRAIN.PATH, mode='train', num_classes=opt.MODEL.NUM_CLASSES, resize=False, augmentor=transform_train) val_dataset = Dataset( val_df, path=opt.TRAIN.PATH, mode='val', # image_size=opt.MODEL.INPUT_SIZE, num_classes=opt.MODEL.NUM_CLASSES, resize=False, num_tta=1, # opt.TEST.NUM_TTAS, augmentor=transform_test) test_dataset = Dataset( test_df, path=opt.TEST.PATH, mode='test', # image_size=opt.MODEL.INPUT_SIZE, num_classes=opt.MODEL.NUM_CLASSES, resize=False, num_tta=opt.TEST.NUM_TTAS, augmentor=transform_test) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=True, num_workers=opt.TRAIN.WORKERS) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=opt.TRAIN.BATCH_SIZE, shuffle=False, num_workers=opt.TRAIN.WORKERS) return train_loader, val_loader, test_loader