def get_transformer(face_policy: str, patch_size: int, net_normalizer: transforms.Normalize, train: bool): # Transformers and traindb if face_policy == 'scale': # The loader crops the face isotropically then scales to a square of size patch_size_load loading_transformations = [ A.PadIfNeeded(min_height=patch_size, min_width=patch_size, border_mode=cv2.BORDER_CONSTANT, value=0,always_apply=True), A.Resize(height=patch_size,width=patch_size,always_apply=True), ] if train: downsample_train_transformations = [ A.Downscale(scale_max=0.5, scale_min=0.5, p=0.5), # replaces scaled dataset ] else: downsample_train_transformations = [] elif face_policy == 'tight': # The loader crops the face tightly without any scaling loading_transformations = [ A.LongestMaxSize(max_size=patch_size, always_apply=True), A.PadIfNeeded(min_height=patch_size, min_width=patch_size, border_mode=cv2.BORDER_CONSTANT, value=0,always_apply=True), ] if train: downsample_train_transformations = [ A.Downscale(scale_max=0.5, scale_min=0.5, p=0.5), # replaces scaled dataset ] else: downsample_train_transformations = [] else: raise ValueError('Unknown value for face_policy: {}'.format(face_policy)) if train: aug_transformations = [ A.Compose([ A.HorizontalFlip(), A.OneOf([ A.RandomBrightnessContrast(), A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=20), ]), A.OneOf([ A.ISONoise(), A.IAAAdditiveGaussianNoise(scale=(0.01 * 255, 0.03 * 255)), ]), A.Downscale(scale_min=0.7, scale_max=0.9, interpolation=cv2.INTER_LINEAR), A.ImageCompression(quality_lower=50, quality_upper=99), ], ) ] else: aug_transformations = [] # Common final transformations final_transformations = [ A.Normalize(mean=net_normalizer.mean, std=net_normalizer.std, ), ToTensorV2(), ] transf = A.Compose( loading_transformations + downsample_train_transformations + aug_transformations + final_transformations) return transf
def sequence_augmentation(self, p_apply=0.5, limit_rotation=40, limit_translation=0.1, limit_scale=(-0.2, 0.2)): if self.rand_choice == 1: augm = A.Lambda(image=self.aug_dilate, keypoint=self.aug_keypoints) elif self.rand_choice == 2: augm = A.Lambda(image=self.aug_erode, keypoint=self.aug_keypoints) else: augm = A.NoOp() transform = A.Compose([ A.Lambda(image=self.aug_morph_close, keypoint=self.aug_keypoints, p=1.0), augm, A.Downscale(scale_min=0.5, scale_max=0.9, p=p_apply, interpolation=cv2.INTER_NEAREST_EXACT), A.ShiftScaleRotate(limit_translation, limit_scale, limit_rotation, p=p_apply, border_mode=cv2.BORDER_REFLECT101, value=-1.0) ], additional_targets={ 'image1': 'image', 'image2': 'image' }, keypoint_params=A.KeypointParams( "xy", remove_invisible=False)) return transform
def __init__(self, metadata: DataFrame, params: dict, transform, data_filter, frame_num=8): self.metadata_df = metadata self.real_filename = list(data_filter(metadata).index) self.transform = transform self.frame_num = frame_num self.same_transform = params['same_transform'] self.smooth = params['label_smoothing'] self.trans = aug.OneOf([ aug.Downscale(0.5, 0.5, p=0.666), aug.JpegCompression(quality_lower=20, quality_upper=20, p=0.666), aug.Flip(p=0) ]) self.video_path = pathlib.Path(params['data_path']) self.cached_path = pathlib.Path(params['cache_path']) self.cached_path.mkdir(exist_ok=True) self.data_dropout = params['data_dropout'] self.input_mix = params['input_mix'] np.random.shuffle(self.real_filename) self.real_filename = self.real_filename[:int( len(self.real_filename) * (1 - self.data_dropout))] self.real2fakes = {fn: [] for fn in self.real_filename} filename_set = set(self.real_filename) for fn, row in metadata.iterrows(): if row['label'] == 'FAKE' and row['original'] in filename_set: self.real2fakes[row['original']].append(fn)
def __init__(self, metadata: DataFrame, bbox: DataFrame, params: dict, transform, data_filter, diff): self.metadata_df = metadata self.real_filename = list(data_filter(metadata).index) self.bbox_df = bbox self.bbox_index_fn = set(bbox.index.get_level_values(0)) self.transform = transform self.same_transform = params['same_transform'] self.diff = diff self.use_diff = params["img_diff"] self.smooth = params["smooth"] self.fix_fake = params["fix_fake"] self.video_path = pathlib.Path(params['data_path']) self.cached_path = pathlib.Path(params['cache_path']) self.cached_path.mkdir(exist_ok=True) self.real2fakes = {fn: [] for fn in self.real_filename} filename_set = set(self.real_filename) for fn, row in metadata.iterrows(): if row['label'] == 'FAKE' and row['original'] in filename_set: self.real2fakes[row['original']].append(fn) if self.fix_fake == 1: for key in self.real2fakes.keys(): if len(self.real2fakes[key]) > 0: self.real2fakes[key] = np.random.choice( self.real2fakes[key]) import albumentations as aug self.trans1 = aug.Downscale(0.5, 0.5, p=1) self.trans2 = aug.JpegCompression(quality_lower=20, quality_upper=20, p=1)
def oversampling(self): self.transform = A.Compose([ A.RandomBrightnessContrast( always_apply=False, p=0.4, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), ), # A.CLAHE(always_apply=False, # p=0.5, # clip_limit=(1, 4), # tile_grid_size=(8, 8)), A.GaussNoise(always_apply=False, p=0.4, var_limit=(20, 40)), A.Downscale( always_apply=False, p=0.4, scale_min=0.5, scale_max=0.8, interpolation=0, ), A.HueSaturationValue( always_apply=False, p=0.4, hue_shift_limit=(-20, 20), sat_shift_limit=(-30, 30), val_shift_limit=(-20, 20), ), ])
def get_transform(is_train): if is_train: return albumentations.Compose( [ albumentations.Resize(224,224), albumentations.OneOf([ albumentations.JpegCompression(quality_lower=20, quality_upper=70, p=0.5), albumentations.Downscale(scale_min=0.25, scale_max=0.50, interpolation=1, p=0.5), ], p=0.6), albumentations.HorizontalFlip(p=0.5), albumentations.VerticalFlip(p=0.5), # albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=45), albumentations.GaussNoise(p=0.2), albumentations.RandomBrightnessContrast(0.3,0.3, p=0.7), albumentations.RandomGamma(p=0.2), albumentations.CLAHE(p=0.2), albumentations.ChannelShuffle(p=0.2), albumentations.MultiplicativeNoise(multiplier=[0.5, 1.5], elementwise=True, p=0.3), albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.7), albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0) ]) else: return albumentations.Compose( [ albumentations.Resize(224,224), albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0) ])
def __init__(self, metadata: DataFrame, bbox: DataFrame, params: dict, transform, data_filter, diff): self.metadata_df = metadata self.real_filename = list(data_filter(metadata).index) self.bbox_df = bbox self.bbox_index_fn = set(bbox.index.get_level_values(0)) self.transform = transform self.same_transform = params['same_transform'] self.diff = diff self.video_path = pathlib.Path(params['data_path']) self.cached_path = pathlib.Path(params['cache_path']) self.cached_path.mkdir(exist_ok=True) self.real2fakes = {fn: [] for fn in self.real_filename} filename_set = set(self.real_filename) for fn, row in metadata.iterrows(): if row['label'] == 'FAKE' and row['original'] in filename_set: self.real2fakes[row['original']].append(fn) import albumentations as aug self.trans = aug.OneOf([ aug.Downscale(0.5, 0.5, p=0.66), aug.JpegCompression(quality_lower=20, quality_upper=20, p=0.66), aug.Flip(p=0) ])
def augmentation(p_apply=0.5, limit_rotation=40, limit_translation=0.1, limit_scale=(-0.2, 0.2)): transform = A.Compose( [ A.Lambda(image=aug_morph_close, keypoint=aug_keypoints, p=1.0), A.OneOf([ A.Lambda(image=aug_dilate, keypoint=aug_keypoints), A.Lambda(image=aug_erode, keypoint=aug_keypoints), A.NoOp() ], p=p_apply), # A.Lambda(image=aug_erode_or_dilate, keypoint=aug_keypoints, p=p_apply), A.Downscale(scale_min=0.5, scale_max=0.9, p=p_apply, interpolation=cv2.INTER_NEAREST_EXACT), A.ShiftScaleRotate(limit_translation, limit_scale, limit_rotation, p=p_apply, border_mode=cv2.BORDER_REFLECT101, value=-1.0), A.Lambda(image=cropout, keypoint=aug_keypoints, p=p_apply), ], keypoint_params=A.KeypointParams("xy", remove_invisible=False)) return transform
def generate_train_transformes(target_size=(50, 50)): transforms = [ A.Resize(height=target_size[0], width=target_size[1]), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.25), A.Rotate(limit=(-90, 90), p=1.), A.RandomResizedCrop(height=target_size[0], width=target_size[0], scale=(0.5, 1.), p=0.5), A.OneOf([ A.Blur(p=ColorEffectP), A.GaussNoise(p=ColorEffectP), A.Downscale(p=ColorEffectP), A.RGBShift(p=ColorEffectP, r_shift_limit=20, g_shift_limit=20, b_shift_limit=20), A.RandomGamma(p=ColorEffectP), A.RandomBrightnessContrast(p=ColorEffectP) ], p=0.5), ToTensor() ] return A.Compose(transforms)
def __down_scale(self,img): transform = A.Compose([ A.Downscale(scale_min=0.3,scale_max=0.8) ]) transformed = transform(image=img) transformed_image = transformed["image"] return transformed_image
def test_downscale(interpolation): img_float = np.random.rand(100, 100, 3) img_uint = (img_float * 255).astype("uint8") aug = A.Downscale(scale_min=0.5, scale_max=0.5, interpolation=interpolation, always_apply=True) for img in (img_float, img_uint): transformed = aug(image=img)["image"] func_applied = F.downscale(img, scale=0.5, interpolation=interpolation) np.testing.assert_almost_equal(transformed, func_applied)
def set_augmentation(self, flags): aug_list = [] if flags.blur["p"] > 0.0: aug_list.append(A.Blur(**flags.blur)) if flags.cutout["p"] > 0.0: aug_list.append(A.Cutout(**flags.cutout)) if flags.downscale["p"] > 0.0: aug_list.append(A.Downscale(**flags.downscale)) if flags.crossdrop["p"] > 0.0: aug_list.append(CrossDrop(**flags.crossdrop)) self.aug = Compose(aug_list) if len(aug_list) != 0 else None
def get_training_augmentation(y=256, x=256): train_transform = [ albu.RandomBrightnessContrast(p=0.3), albu.VerticalFlip(p=0.5), albu.HorizontalFlip(p=0.5), albu.Downscale( p=1.0, scale_min=0.35, scale_max=0.75, ), albu.Resize(y, x) ] return albu.Compose(train_transform)
def acase2_augs(name, **kwargs): return [ A.Compose( [A.Posterize(), A.GridDistortion(num_steps=4), A.Normalize()], p=1.0), A.Compose( [A.Downscale(), A.GridDistortion(num_steps=4), A.Normalize()], p=1.0), ]
def get_obliterate_augs(): """ Get the augmentation that can obliterate the hidden signal. This is used as augmentation to create negative sample from positive one. :return: """ return A.OneOf( [ A.ImageCompression(quality_lower=70, quality_upper=95, p=1), A.Downscale(p=1), A.GaussianBlur(blur_limit=(5, 9), p=1), ], p=1, )
def generate_transforms3(img_size): train_transform = Compose([ A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.ShiftScaleRotate(p=0.5), A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.7), A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=0.7), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.OpticalDistortion(distort_limit=1.0), A.GridDistortion(num_steps=5, distort_limit=1.0), A.ElasticTransform(alpha=3), ], p=0.2), A.Resize(img_size, img_size), A.OneOf([ A.JpegCompression(), A.Downscale(scale_min=0.1, scale_max=0.15), ], p=0.2), A.IAAPerspective(p=0.2), A.IAASharpen(p=0.2), A.Cutout(max_h_size=int(img_size * 0.1), max_w_size=int(img_size * 0.1), num_holes=5, p=0.5), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0), ToTensorV2(), ]) val_transform = Compose([ Resize(height=img_size, width=img_size), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0), ToTensorV2(), ]) return {"train": train_transform, "val": val_transform}
def da_policy_downscale(img_size): print("Using Data Augmentation Downscale") train_aug = [] train_aug = common_test_augmentation(img_size) + train_aug train_aug_img = [ albumentations.Downscale(p=0.7, scale_min=0.4, scale_max=0.8, interpolation=0) ] val_aug = common_test_augmentation(img_size) return train_aug, train_aug_img, val_aug
def main(): size = (432,432) color_dic = {1:[255,255,255]} img_paths = [p.replace('\\', '/') for p in glob('dataset/train/img_aug/**', recursive=True) if os.path.isfile(p)] mask_paths = list(map(lambda x: x.replace('/img_aug/', '/mask_aug/'), img_paths)) batch_size = 16 splits = math.ceil(len(img_paths)/batch_size) empty = [] # albumentation # https://qiita.com/kurilab/items/b69e1be8d0224ae139ad transforms = albu.OneOf([ albu.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=90), albu.GaussNoise(), albu.ISONoise(intensity=(0.7,0.9)), albu.Downscale(), albu.ElasticTransform(), albu.GaussianBlur(), albu.MultiplicativeNoise(multiplier=(2.0,3.0)), ]) for i in range(splits): tfrecord_fname = '_record_' + str(i) + '.tfrecord' save_path = os.path.join('dataset', tfrecord_fname) # tfrecordのファイルは(画像データ数 / バッチサイズ)分作成する with tf.io.TFRecordWriter(tfrecord_fname) as writer: for img_d, mask_d in zip(img_paths[i::splits], mask_paths[i::splits]): # 画像変形 img = cv2.imread(img_d) mask = cv2.imread(mask_d) #augmented = transforms(image=img, mask=mask) #img, mask = augmented['image'], augmented['mask'] img = cv2.resize(img, (size[0], size[1]), cv2.INTER_NEAREST) mask = cv2.resize(mask, (size[0], size[1]), cv2.INTER_NEAREST) # byte列に変換 img = np2byte(img) mask = np2byte(mask) #img = np2byte(np.float32(img/127.5 - 1)) #mask = np2byte(convert_mask(mask, color_dic)) # シリアライズして書き出し proto = serialize_example(img, mask) writer.write(proto.SerializeToString()) if i>2 : break
def get_augmentation(input_size, train_flag=True, normalize_flag=True): aug_list = [] ###基本的なサイズ変更関数 aug_list.append(A.Resize(height=input_size[0], width=input_size[1], p=1)) ##学習用の水増し方法 if train_flag: aug_list.extend([ A.Flip(), A.ShiftScaleRotate(shift_limit=(-0.02, 0.02), scale_limit=(-0.05,0.05), rotate_limit=30, border_mode=0 ,value=[0,0,0],p=0.5), #色変更 A.RandomBrightnessContrast(brightness_limit=0.5, contrast_limit=0.5, brightness_by_max=False, p=0.5), A.HueSaturationValue(hue_shift_limit=10,sat_shift_limit=10, val_shift_limit=10, p=0.5), #画像品質変換 A.OneOf([ A.OneOf([ A.Blur(blur_limit=5, p=1), A.GaussianBlur(blur_limit=5, p=1), ], p=1), A.GaussNoise(var_limit=(10, 80), p=1), A.Downscale(scale_min=0.5, scale_max=0.5, p=1), ], p=0.4), A.CoarseDropout(max_holes=4, max_height=int(input_size[0]/8), max_width=int(input_size[0]/8), min_holes=1, min_height=int(input_size[0]/10), min_width=int(input_size[0]/10), fill_value=(255,255,255), p=0.3), ]) if normalize_flag: aug_list.extend([ A.Normalize( p=1.0), ToTensorV2(p=1.0) ]) else: aug_list.extend([ ToTensor(), ]) return A.Compose(aug_list)
def get_augmentation(prob, aug_level, size): if isinstance(size, list): size = tuple(size) augs = [] augs.append(albumentations.HorizontalFlip(prob)) if aug_level > 0: shift_limit = [None, 0.0625, 0.125, 0.1875][aug_level] rotate_limit = [None, 20., 40., 60.][aug_level] scale_limit = [None, 0.1, 0.2, 0.3][aug_level] augs.append( albumentations.ShiftScaleRotate( shift_limit, scale_limit, rotate_limit, p=prob, border_mode=cv2.BORDER_CONSTANT)) augs.append(albumentations.Downscale(p=prob / 4)) return albumentations.Compose(augs)
def __init__(self, img, data, img_size): """ arguments --------- img : list list of images, in the original size (height, width, 3) data : list of dict Each dict has : 'image' : index of the image. The index should match with img 'mask' : [xx, yy] IMPORTANT : (WIDTH, HEIGHT) 'box' : [[xmin, ymin], [xmax,ymax]] 'size' : the size of the image that the data was created with IMPORTANT : (WIDTH, HEIGHT) img_size : tuple Desired output image size The axes will be swapped to match pygame. IMPORTANT : (WIDTH, HEIGHT) """ self.image = img self.data = data self.n = len(data) self.output_size = img_size self.aug = A.Compose([ A.OneOf([ A.RandomGamma((40, 200), p=1), A.RandomBrightness(limit=0.5, p=1), A.RandomContrast(limit=0.5, p=1), A.RGBShift(40, 40, 40, p=1), A.Downscale(scale_min=0.25, scale_max=0.5, p=1), A.ChannelShuffle(p=1), ], p=0.8), A.InvertImg(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=1), A.Resize(img_size[0], img_size[1]), ], ) for datum in data: datum['mask_min'] = np.min(datum['mask'], axis=1) datum['mask_max'] = np.max(datum['mask'], axis=1) + 1
def init_augmentation(self, CONFIGURATION): """ Initialization of augmentation function Parameters stored in CONFIGURATION. Int: CROP_SIZE_HEIGHT -> Height of Image crop Int: CROP_SIZE_WIDTH -> Width of Image crop Float: VERTICAL_FLIP_PROBA -> Probability of vertical flipping the image Float: HORIZONTAL_FLIP_PROBA -> Probability of horizontal flipping the image Returns: Augmentation function (albumentations.Compose) """ augmentation = albumentations.Compose([ InvertImg(p=CONFIGURATION.INVERT_IMG_PROBA), albumentations.ShiftScaleRotate( p=CONFIGURATION.SCR_PROBA, shift_limit=CONFIGURATION.SCR_SHIFT_LIMIT, scale_limit=CONFIGURATION.SCR_SCALE_LIMIT, rotate_limit=CONFIGURATION.SCR_ROTATE_LIMIT), albumentations.GaussianBlur(blur_limit=CONFIGURATION.BLUR_LIMIT, p=CONFIGURATION.BLUR_PROBA), albumentations.Cutout(num_holes=CONFIGURATION.NUM_HOLES, max_h_size=CONFIGURATION.HOLE_SIZE, max_w_size=CONFIGURATION.HOLE_SIZE, p=CONFIGURATION.CUTOUT_PROBA), albumentations.Downscale(scale_min=CONFIGURATION.SCALE_MIN, scale_max=CONFIGURATION.SCALE_MAX, p=CONFIGURATION.DOWNSCALE_PROBA), albumentations.RandomCrop(CONFIGURATION.CROP_SIZE_HEIGHT, CONFIGURATION.CROP_SIZE_WIDTH, p=1.0), albumentations.HorizontalFlip( p=CONFIGURATION.HORIZONTAL_FLIP_PROBA), ]) return augmentation
def __init__(self, config, split): """ :param opt: :param split: train/val """ super(DATASET_CUSTOM, self).__init__() self.data_dir = config['dataset']['data_dir'] self.img_dir = os.path.join(self.data_dir, 'images') self.input_h = config['model']['input_h'] self.input_w = config['model']['input_h'] self.pad = config['model']['pad'] self.down_ratio = config['model']['down_ratio'] self.mean = config['dataset']['mean'] self.std = config['dataset']['std'] self.max_objs = config['dataset']['max_object'] self.num_classes = config['dataset']['num_classes'] self.radius = config['dataset']['radius'] self.annot_path = os.path.join( self.data_dir, 'annotations', '{}.json').format(split) # print(self.data_dir) self.class_name = ['__background__'] + config['dataset']['label_name'] self._valid_ids = [_id for _id in range(1, self.num_classes+1)] # [1,2,..self.num_classes] self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} # {1:0, self.split = split print('==> initializing {} data.'.format(split)) self.coco = coco.COCO(self.annot_path) self.images = self.coco.getImgIds() self.num_samples = len(self.images) print('Loaded {} {} samples'.format(split, self.num_samples)) self.output_h = self.input_h // self.down_ratio # 512/4 = 128 self.output_w = self.input_w // self.down_ratio self.transform_train = A.Compose( [ A.OneOf([ A.RandomBrightnessContrast(brightness_limit=0.5, contrast_limit=0.4), A.RandomGamma(gamma_limit=(50, 150)), A.NoOp() ]), A.OneOf([ A.RGBShift(r_shift_limit=20, b_shift_limit=15, g_shift_limit=15), A.HueSaturationValue(hue_shift_limit=5, sat_shift_limit=5), A.NoOp() ]), A.HorizontalFlip(p=0.5), #OK A.ShiftScaleRotate(shift_limit=[0.1, 0.1], scale_limit=[0,0], rotate_limit=[-45, 45], p=0.5, border_mode=cv2.BORDER_CONSTANT, value=(255,255,255)), #OK A.Downscale(scale_min=0.1, scale_max=0.2, p=0.3), # OK # A.CoarseDropout(max_holes=5, max_height=100, max_width=100, min_holes=3, min_height=64, min_width=64, p=0.5), # error A.CLAHE(p=0.5), A.Resize(height=self.input_h, width=self.input_w, interpolation=cv2.INTER_LINEAR, always_apply=True), A.Normalize(mean=self.mean, std=self.std, always_apply=True) ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels']) ) self.transform_heatmap = A.Compose( [ A.Resize(height=self.output_h, width=self.output_w, interpolation=cv2.INTER_LINEAR, always_apply=True) ] , keypoint_params=A.KeypointParams(format='xy') ) self.transform_test = A.Compose( [ A.Resize(height=self.input_h, width=self.input_w, interpolation=cv2.INTER_LINEAR, always_apply=True), A.Normalize(mean=self.mean, std=self.std, always_apply=True) ], keypoint_params=A.KeypointParams(format='xy') )
def get_config(runner, raw_uri, processed_uri, root_uri, test=False, external_model=False, external_loss=False, augment=False): debug = False train_scene_info = get_scene_info(join(processed_uri, 'train-scenes.csv')) val_scene_info = get_scene_info(join(processed_uri, 'val-scenes.csv')) log_tensorboard = True run_tensorboard = True class_config = ClassConfig(names=['no_building', 'building']) if test: debug = True train_scene_info = train_scene_info[0:1] val_scene_info = val_scene_info[0:1] def make_scene(scene_info): (raster_uri, label_uri) = scene_info raster_uri = join(raw_uri, raster_uri) label_uri = join(processed_uri, label_uri) aoi_uri = join(raw_uri, aoi_path) if test: crop_uri = join(processed_uri, 'crops', os.path.basename(raster_uri)) label_crop_uri = join(processed_uri, 'crops', os.path.basename(label_uri)) save_image_crop(raster_uri, crop_uri, label_uri=label_uri, label_crop_uri=label_crop_uri, size=600, min_features=20, class_config=class_config) raster_uri = crop_uri label_uri = label_crop_uri id = os.path.splitext(os.path.basename(raster_uri))[0] raster_source = RasterioSourceConfig(channel_order=[0, 1, 2], uris=[raster_uri]) label_source = ChipClassificationLabelSourceConfig( vector_source=GeoJSONVectorSourceConfig(uri=label_uri, default_class_id=1, ignore_crs_field=True), ioa_thresh=0.5, use_intersection_over_cell=False, pick_min_class_id=False, background_class_id=0, infer_cells=True) return SceneConfig(id=id, raster_source=raster_source, label_source=label_source, aoi_uris=[aoi_uri]) chip_sz = 200 train_scenes = [make_scene(info) for info in train_scene_info] val_scenes = [make_scene(info) for info in val_scene_info] dataset = DatasetConfig(class_config=class_config, train_scenes=train_scenes, validation_scenes=val_scenes) if external_model: model = ClassificationModelConfig(external_def=ExternalModuleConfig( github_repo='lukemelas/EfficientNet-PyTorch', # uri='s3://raster-vision-ahassan/models/EfficientNet-PyTorch.zip', name='efficient_net', entrypoint='efficientnet_b0', force_reload=False, entrypoint_kwargs={ 'num_classes': len(class_config.names), 'pretrained': 'imagenet' })) else: model = ClassificationModelConfig(backbone=Backbone.resnet50) if external_loss: external_loss_def = ExternalModuleConfig( github_repo='AdeelH/pytorch-multi-class-focal-loss', name='focal_loss', entrypoint='focal_loss', force_reload=False, entrypoint_kwargs={ 'alpha': [.75, .25], 'gamma': 2 }) else: external_loss_def = None solver = SolverConfig(lr=1e-4, num_epochs=20, test_num_epochs=4, batch_sz=32, one_cycle=True, external_loss_def=external_loss_def) if augment: mu = np.array((0.485, 0.456, 0.406)) std = np.array((0.229, 0.224, 0.225)) aug_transform = A.Compose([ A.Flip(), A.Transpose(), A.RandomRotate90(), A.ShiftScaleRotate(), A.OneOf([ A.ChannelShuffle(), A.CLAHE(), A.FancyPCA(), A.HueSaturationValue(), A.RGBShift(), A.ToGray(), A.ToSepia(), ]), A.OneOf([ A.RandomBrightness(), A.RandomGamma(), ]), A.OneOf([ A.GaussNoise(), A.ISONoise(), A.RandomFog(), ]), A.OneOf([ A.Blur(), A.MotionBlur(), A.ImageCompression(), A.Downscale(), ]), A.CoarseDropout(max_height=32, max_width=32, max_holes=5) ]) base_transform = A.Normalize(mean=mu.tolist(), std=std.tolist()) plot_transform = A.Normalize(mean=(-mu / std).tolist(), std=(1 / std).tolist(), max_pixel_value=1.) else: aug_transform = None base_transform = None plot_transform = None backend = PyTorchChipClassificationConfig( model=model, solver=solver, log_tensorboard=log_tensorboard, run_tensorboard=run_tensorboard, test_mode=test, base_transform=A.to_dict(base_transform), aug_transform=A.to_dict(aug_transform), plot_options=PlotOptions(transform=A.to_dict(plot_transform))) config = ChipClassificationConfig(root_uri=root_uri, dataset=dataset, backend=backend, train_chip_sz=chip_sz, predict_chip_sz=chip_sz) return config
def get_augmentations(name, img_size): if name == 'training_none': aug = A.Compose([ A.Resize(img_size, img_size), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'training_dropout': aug = A.Compose([ A.Resize(img_size, img_size), A.CoarseDropout(min_height=int(img_size * 0.05), min_width=int(img_size * 0.05), max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=1, max_holes=20, p=0), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'training_1': aug = A.Compose([ A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.ShiftScaleRotate(p=0.5), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7), A.HueSaturationValue(hue_shift_limit=10, val_shift_limit=10, sat_shift_limit=10, p=0.7), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.GaussNoise(var_limit=[10, 50]), A.GaussianBlur(), A.MotionBlur(), A.MedianBlur(), ], p=0.3), A.OneOf([ A.OpticalDistortion(distort_limit=1.0), A.GridDistortion(num_steps=5, distort_limit=1.), A.ElasticTransform(alpha=3), ], p=0.3), A.OneOf([ A.ImageCompression(), A.Downscale(scale_min=0.1, scale_max=0.15), ], p=0.2), A.IAAPiecewiseAffine(p=0.2), A.IAASharpen(p=0.2), A.CoarseDropout(max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=5, max_holes=10, p=0.5), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'training_2': aug = A.Compose([ A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.ShiftScaleRotate(p=0.5), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7), A.HueSaturationValue(hue_shift_limit=10, val_shift_limit=10, sat_shift_limit=10, p=0.7), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.GaussNoise(var_limit=[10, 50]), A.GaussianBlur(), A.MotionBlur(), A.MedianBlur(), ], p=0.3), A.OneOf([ A.OpticalDistortion(distort_limit=1.0), A.GridDistortion(num_steps=5, distort_limit=1.), A.ElasticTransform(alpha=3), ], p=0.3), A.OneOf([ A.ImageCompression(), A.Downscale(scale_min=0.1, scale_max=0.15), ], p=0.2), A.IAAPiecewiseAffine(p=0.2), A.IAASharpen(p=0.2), A.CoarseDropout(max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=5, max_holes=10, p=0.5), A.Normalize(), ToTensorV2() ]) elif name == 'training_2_bis': aug = A.Compose([ A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.ShiftScaleRotate(rotate_limit=30, p=0.5), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7), A.HueSaturationValue(hue_shift_limit=10, val_shift_limit=10, sat_shift_limit=10, p=0.7), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.GaussNoise(var_limit=[10, 50]), A.GaussianBlur(), A.MotionBlur(), A.MedianBlur() ], p=0.3), #A.OneOf([A.OpticalDistortion(distort_limit=1.0), A.GridDistortion(num_steps=5, distort_limit=1.), # A.ElasticTransform(alpha=3)], p=0.3), A.OneOf([ A.ImageCompression(), A.Downscale(scale_min=0.1, scale_max=0.15) ], p=0.2), #A.IAAPiecewiseAffine(p=0.2), A.IAASharpen(p=0.2), A.CoarseDropout(max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=5, max_holes=10, p=0.5), A.Normalize(), ToTensorV2() ]) elif name == 'training_3': aug = A.Compose([ A.Rotate(limit=5), A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.5), A.CoarseDropout(min_height=int(img_size * 0.05), min_width=int(img_size * 0.05), max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=1, max_holes=10, p=0.5), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'training_4': aug = A.Compose([ A.Rotate(limit=5, p=1), A.RandomResizedCrop(img_size, img_size, scale=(0.9, 1), p=1), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=(-0.15, +0.25), contrast_limit=(-0.15, +0.25), p=1), A.CLAHE(clip_limit=(1, 4), p=0.5), A.OneOf([ A.GaussNoise(var_limit=(10, 50)), A.GaussianBlur(), A.MotionBlur(), A.MedianBlur(), ], p=1), A.IAASharpen(p=0.3), A.CoarseDropout(min_height=int(img_size * 0.05), min_width=int(img_size * 0.05), max_height=int(img_size * 0.1), max_width=int(img_size * 0.1), min_holes=1, max_holes=20, p=0), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2() ]) elif name == 'validation': aug = A.Compose( [A.Resize(img_size, img_size), A.Normalize(), ToTensorV2()]) elif name == 'none': aug = A.Compose([A.Resize(img_size, img_size)]) else: raise ValueError(f"{name} is not a valid augmentations name") return aug
def get_transform_imagenet(use_albu_aug): if use_albu_aug: train_transform = al.Compose([ # al.Flip(p=0.5), al.Resize(256, 256, interpolation=2), al.RandomResizedCrop(224, 224, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=2), al.HorizontalFlip(), al.OneOf( [ al.OneOf( [ al.ShiftScaleRotate( border_mode=cv2.BORDER_CONSTANT, rotate_limit=30), # , p=0.05), al.OpticalDistortion( border_mode=cv2.BORDER_CONSTANT, distort_limit=5.0, shift_limit=0.1), # , p=0.05), al.GridDistortion(border_mode=cv2.BORDER_CONSTANT ), # , p=0.05), al.ElasticTransform( border_mode=cv2.BORDER_CONSTANT, alpha_affine=15), # , p=0.05), ], p=0.1), al.OneOf( [ al.RandomGamma(), # p=0.05), al.HueSaturationValue(), # p=0.05), al.RGBShift(), # p=0.05), al.CLAHE(), # p=0.05), al.ChannelShuffle(), # p=0.05), al.InvertImg(), # p=0.05), ], p=0.1), al.OneOf( [ al.RandomSnow(), # p=0.05), al.RandomRain(), # p=0.05), al.RandomFog(), # p=0.05), al.RandomSunFlare(num_flare_circles_lower=1, num_flare_circles_upper=2, src_radius=110), # p=0.05, ), al.RandomShadow(), # p=0.05), ], p=0.1), al.RandomBrightnessContrast(p=0.1), al.OneOf( [ al.GaussNoise(), # p=0.05), al.ISONoise(), # p=0.05), al.MultiplicativeNoise(), # p=0.05), ], p=0.1), al.OneOf( [ al.ToGray(), # p=0.05), al.ToSepia(), # p=0.05), al.Solarize(), # p=0.05), al.Equalize(), # p=0.05), al.Posterize(), # p=0.05), al.FancyPCA(), # p=0.05), ], p=0.1), al.OneOf( [ # al.MotionBlur(blur_limit=1), al.Blur(blur_limit=[3, 5]), al.MedianBlur(blur_limit=[3, 5]), al.GaussianBlur(blur_limit=[3, 5]), ], p=0.1), al.OneOf( [ al.CoarseDropout(), # p=0.05), al.Cutout(), # p=0.05), al.GridDropout(), # p=0.05), al.ChannelDropout(), # p=0.05), al.RandomGridShuffle(), # p=0.05), ], p=0.1), al.OneOf( [ al.Downscale(), # p=0.1), al.ImageCompression(quality_lower=60), # , p=0.1), ], p=0.1), ], p=0.5), al.Normalize(), ToTensorV2() ]) else: train_transform = transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) test_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), ]) if use_albu_aug: train_transform = MultiDataTransformAlbu(train_transform) else: train_transform = MultiDataTransform(train_transform) return train_transform, test_transform
'train_transform': A.Compose([ A.Rotate(border_mode=cv2.BORDER_CONSTANT, interpolation=cv2.INTER_AREA, always_apply=True), A.Flip(), A.OneOf([ A.CLAHE(tile_grid_size=(5, 5)), A.RandomBrightnessContrast(), A.RandomGamma() ]), A.OneOf( [A.RGBShift(), A.HueSaturationValue(), A.ChannelShuffle(p=0.25)]), A.Downscale(scale_min=0.3, scale_max=0.5, always_apply=True), A.GaussNoise(var_limit=(20., 90.), always_apply=True), A.Blur(blur_limit=12, always_apply=True), A.Resize(224, 224, always_apply=True), A.ToFloat() ]), 'val_transform': A.Compose([A.Resize(224, 224, always_apply=True), A.ToFloat()]) } def apply_augmentation(image, is_training): if is_training: data = transformations['train_transform'](image=image) else:
def get_aug(aug_type: str = "val", task: str = "denoise", dataset: str = "cifar100", size: int = 64): """ Args: aug_type: {`val`, `test`, `light`, `medium`} task: {"denoise", "deblur", "sr"} dataset: Name of dataset to get MEAN and STD size: final size of the crop """ assert aug_type in ["val", "test", "light", "medium"] # Add the same noise for all channels for single-channel images mean, std, max_value = MEAN_STD_BY_NAME[dataset] if dataset == "medicaldecathlon": singlechannel = True normalization = albu.NoOp() noise = GaussNoiseNoClipping( singlechannel, var_limit=0.1, ) else: singlechannel = False normalization = albu.Normalize(mean=mean, std=std, max_pixel_value=max_value) noise = albu.MultiplicativeNoise(multiplier=(0.75, 1.25), per_channel=True, elementwise=True) NORM_TO_TENSOR = albu.Compose( [normalization, albu_pt.ToTensorV2()], additional_targets={"mask": "image"}) CROP_AUG = albu.Compose([ albu.PadIfNeeded(size, size), albu.RandomResizedCrop(size, size, scale=(0.5, 1.)), ]) if task == "deblur": TASK_AUG = albu.OneOf( [ albu.Blur(blur_limit=(3, 5)), albu.GaussianBlur(blur_limit=(3, 5)), # albu.MotionBlur(), # albu.MedianBlur(), # albu.GlassBlur(), ], p=1.0) elif task == "denoise": # TASK_AUG = noise TASK_AUG = albu.OneOf( [ noise, # albu.GaussNoise(), # GaussNoiseNoClipping(singlechannel, var_limit=0.1 if singlechannel else (20., 50.)), # albu.GlassBlur(), # albu.ISONoise(), # albu.MultiplicativeNoise(), ], p=1.0) elif task == "sr": TASK_AUG = albu.Downscale(scale_min=0.5, scale_max=0.5, interpolation=cv2.INTER_CUBIC, always_apply=True) else: raise ValueError("Name of task must be in {'deblur', 'denosie', 'sr'}") VAL_AUG = albu.Compose([ albu.PadIfNeeded(size, size), albu.CenterCrop(size, size), TASK_AUG, NORM_TO_TENSOR, ]) LIGHT_AUG = albu.Compose([ CROP_AUG, TASK_AUG, NORM_TO_TENSOR, ]) MEDIUM_AUG = albu.Compose([ albu.Flip(), albu.RandomRotate90(), CROP_AUG, TASK_AUG, NORM_TO_TENSOR ]) types = { "val": VAL_AUG, "light": LIGHT_AUG, "medium": MEDIUM_AUG, } return types[aug_type]
A.RandomBrightnessContrast(brightness_limit=[0.3,0.3], contrast_limit=[0.3,0.3]), #A.CLAHE(), ]), ], p=1) medium_aug = A.Compose([ A.OneOf([ A.Flip(), A.Rotate(limit=180, border_mode=0, value=0, mask_value=0), A.ElasticTransform(border_mode=0, value=0, p=1), ]), A.OneOf([ A.GaussNoise(var_limit=(50,100), mean=0), A.Downscale(scale_min=0.8, scale_max=0.99), ]), A.OneOf([ A.GaussianBlur(), A.MotionBlur(), A.MedianBlur(blur_limit=(3,5)), ]), A.OneOf([ A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3), A.CLAHE(), ]), ], p=0.5) heavy_aug = A.Compose([ A.OneOf([ A.Flip(),
def get_transforms(*, data): if data == 'train': import albumentations return Compose([ albumentations.RandomResizedCrop(CFG.size, CFG.size, scale=(0.9, 1), p=1), albumentations.HorizontalFlip(p=0.5), albumentations.ShiftScaleRotate(p=0.5), albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.7), albumentations.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=0.7), albumentations.CLAHE(clip_limit=(1, 4), p=0.5), albumentations.OneOf([ albumentations.OpticalDistortion(distort_limit=1.0), albumentations.GridDistortion(num_steps=5, distort_limit=1.), albumentations.ElasticTransform(alpha=3), ], p=0.2), albumentations.OneOf([ albumentations.GaussNoise(var_limit=[10, 50]), albumentations.GaussianBlur(), albumentations.MotionBlur(), albumentations.MedianBlur(), ], p=0.2), albumentations.Resize(CFG.size, CFG.size), albumentations.OneOf([ albumentations.JpegCompression(), albumentations.Downscale(scale_min=0.1, scale_max=0.15), ], p=0.2), albumentations.IAAPiecewiseAffine(p=0.2), albumentations.IAASharpen(p=0.2), albumentations.Cutout(max_h_size=int(CFG.size * 0.1), max_w_size=int(CFG.size * 0.1), num_holes=5, p=0.5), albumentations.Normalize(), ToTensorV2() ]) # if data == 'train': # return Compose([ # # Resize(int(CFG.size * 1.25), int(CFG.size * 1.25)), # #Resize(CFG.size, CFG.size), # RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)), # # HorizontalFlip(p=0.5), # # RandomBrightnessContrast(p=0.2, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2)), # # HueSaturationValue(p=0.2), # ShiftScaleRotate(p=0.2, shift_limit=0.0625, scale_limit=0.2, rotate_limit=20), # # # CoarseDropout(p=0.2), # max_holes=8, max_height=8, max_width=8 # # Cutout(p=0.2, max_h_size=16, max_w_size=16, fill_value=(0., 0., 0.), num_holes=16), # Normalize( # mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225], # ), # ToTensorV2(), # ]) elif data == 'valid': return Compose([ Resize(CFG.size, CFG.size), Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ), ToTensorV2(), ])