def get_transform(name='default', resize=512): if name == 'default': transform = A.Compose([ A.Resize(resize, resize), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.OneOf([ A.RandomContrast(), A.RandomGamma(), A.RandomBrightness(), A.ColorJitter(brightness=0.07, contrast=0.07, saturation=0.1, hue=0.1, always_apply=False, p=0.3), ], p=0.3), A.OneOf([ A.ElasticTransform( alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), A.GridDistortion(), A.OpticalDistortion(distort_limit=2, shift_limit=0.5), ], p=0.0), A.ShiftScaleRotate(), ]) elif name == 'train1': transform = A.Compose([ A.RandomCrop(resize, resize, True), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.ColorJitter(brightness=0.07, contrast=0.07, saturation=0.1, hue=0.1, always_apply=False, p=0.3), A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), A.ChannelShuffle(p=0.6) ]) elif name == 'val' or name == 'test': transform = A.Compose([A.Resize(resize, resize)]) else: return None return transform
def __init__(self, root, year, input_size, pooler_size): super(TrainDataset, self).__init__(root=os.path.join(root, f'train{year}'), annFile=os.path.join( root, 'annotations', f'instances_train{year}.json')) self.h, self.w = input_size self.pooler_size = pooler_size self.cat_idx_list, self.cat_to_label_map, _ = tools.get_cat_label_map( self.coco, tools.COCO_CLASSES) self.img_transform = alb.Compose([ alb.RandomSizedBBoxSafeCrop(width=self.w, height=self.h), alb.HorizontalFlip(p=0.5), alb.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, hue=0., p=0.8), ], bbox_params=alb.BboxParams( format='coco', label_fields=['class_labels'])) self.points, self.regress_ranges = tools.encode_points_and_regress_ranges( self.h, self.w)
def transform(image_size=256, is_training=True): if is_training: return A.Compose([ A.Resize(width=image_size, height=image_size,), A.RandomContrast(limit=0.2, p=0.4), A.ColorJitter(), A.Rotate(40), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.Normalize( mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255.0, ), ToTensorV2(), ]) return A.Compose([ A.Resize(height=image_size, width=image_size), A.Normalize( mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255.0, ), ToTensorV2(), ])
def get_transform(train: bool, im_size: int = 400): if train: aug = A.Compose([ A.OneOf([ A.RandomSizedCrop(min_max_height=(224, 720), height=im_size, width=im_size, p=0.5), A.Resize(height=im_size, width=im_size, interpolation=cv2.INTER_CUBIC, p=0.5) ], p=1), A.ChannelShuffle(p=0.5), A.HorizontalFlip(p=0.5), A.ColorJitter(p=0.5), # A.OneOf([ # A.ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03, p=0.5), # A.GridDistortion(p=0.5), # A.OpticalDistortion(distort_limit=2, shift_limit=0.5, p=1) # ], p=0.8), A.Blur(p=0.5), A.Normalize(), ToTensorV2(), ]) else: aug = A.Compose([ A.Resize(height=im_size, width=im_size, interpolation=cv2.INTER_CUBIC), A.Normalize(), ToTensorV2(), ]) return aug
def make_aug_default(img_size, is_train=True, rotate_limit=10, **kws): imw, imh = img_size color_aug = [] if is_train: color_aug = [ A.HueSaturationValue(), A.RandomBrightnessContrast(), A.ColorJitter(), ] geo_aug = [] if is_train: geo_aug = [ # A.HorizontalFlip(), # bad for keypoints A.ShiftScaleRotate(rotate_limit=rotate_limit, border_mode=cv2.BORDER_REPLICATE), A.Perspective((0.03, 0.05), pad_mode=cv2.BORDER_REPLICATE), ] transforms = [ A.LongestMaxSize(max_size=imw), *color_aug, A.PadIfNeeded(min_height=imh, min_width=imw, border_mode=cv2.BORDER_REPLICATE), *geo_aug, ] return [AlbuAug(transforms, skip_img_without_ann=True, **kws)]
def _get_train_transform(self): print("Using train transform") return A.Compose([ A.HorizontalFlip(p=0.5), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0, p=0.5), A.OneOf([ A.Compose([ A.SmallestMaxSize(max_size=self.target_size[0], interpolation=cv2.INTER_LINEAR, p=1.0), A.RandomCrop(height=self.target_size[0], width=self.target_size[0], p=1.0) ], p=1.0), A.RandomResizedCrop(height=self.target_size[0], width=self.target_size[1], scale=(0.25, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=cv2.INTER_LINEAR, p=1.0), A.Resize(height=self.target_size[0], width=self.target_size[1], interpolation=cv2.INTER_LINEAR, p=1.0) ], p=1.0) ])
def compose_im_trf(cfg): transform = A.Compose([ A.Resize(width=cfg.IMG_WIDTH, height=cfg.IMG_HEIGHT), # A.RandomCrop(width=cfg.IMG_WIDTH, height=cfg.IMG_HEIGHT), A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.RGBShift(r_shift_limit=20, g_shift_limit=20, b_shift_limit=20, p=0.9), A.OneOf([ A.Blur(blur_limit=1, p=0.5), A.ColorJitter(p=0.5), ], p=1.0), A.Normalize( mean=cfg.ds_mean, std=cfg.ds_std, max_pixel_value=255.0, ), ToTensorV2(), ]) transform_val = A.Compose([ A.Resize(width=cfg.IMG_WIDTH, height=cfg.IMG_HEIGHT), A.Normalize( mean=cfg.ds_mean, std=cfg.ds_std, # mean=[0.0, 0.0, 0.0], # std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ]) return transform, transform_val
def get_transform(train: bool, im_size: int = 400): if train: aug = A.Compose([ A.OneOf([ A.RandomSizedCrop(min_max_height=(224, 720), height=im_size, width=im_size, p=0.5), A.Resize(height=im_size, width=im_size, interpolation=cv2.INTER_CUBIC, p=0.5) ], p=1), A.ChannelShuffle(p=0.5), A.HorizontalFlip(p=0.5), A.ColorJitter(p=0.5), A.Blur(p=0.5), A.Normalize(), ToTensorV2(), ]) else: aug = A.Compose([ A.Resize(height=im_size, width=im_size, interpolation=cv2.INTER_CUBIC), A.Normalize(), ToTensorV2(), ]) return aug
def get_train_transforms(cfg_trans): min_area = cfg_trans['min_area'] trans_list = A.Compose([ A.Resize(height=cfg_trans['in_size'][1], width=cfg_trans['in_size'][0], p=1), A.HorizontalFlip(p=0.5), A.RandomResizedCrop(height=cfg_trans['in_size'][1], width=cfg_trans['in_size'][0], scale=(0.5, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=1, always_apply=False, p=0.5), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.5), A.Normalize(mean=3 * [0], std=3 * [1]), ToTensorV2(), ], bbox_params=A.BboxParams(format='pascal_voc', min_area=min_area, min_visibility=0.4)) return trans_list
def get_transform(img_height, img_width, input_height, input_width, val=False): if val: # Validation --> Only resize image transform = A.Compose( [ A.Resize(input_height, input_width, p=1.), ], bbox_params=A.BboxParams( format='albumentations', min_visibility=0, label_fields=['class_indices'], ), ) else: # Training --> Augment data h_crop_ratio = np.random.uniform(low=0.25, high=0.9) w_crop_ratio = np.random.uniform(low=0.25, high=0.9) h_crop = int(img_height * h_crop_ratio) w_crop = int(img_width * w_crop_ratio) transform = A.Compose( [ A.HorizontalFlip(p=0.5), A.RandomCrop(width=w_crop, height=h_crop, p=0.5), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5), A.Resize(input_height, input_width, p=1.), ], bbox_params=A.BboxParams( format='albumentations', min_visibility=0.2, label_fields=['class_indices'], ), ) return transform
def test_color_jitter(brightness, contrast, saturation, hue): np.random.seed(0) img = np.random.randint(0, 256, [100, 100, 3], dtype=np.uint8) pil_image = Image.fromarray(img) transform = A.Compose([ A.ColorJitter( brightness=[brightness, brightness], contrast=[contrast, contrast], saturation=[saturation, saturation], hue=[hue, hue], p=1, ) ]) pil_transform = ColorJitter( brightness=[brightness, brightness], contrast=[contrast, contrast], saturation=[saturation, saturation], hue=[hue, hue], ) res1 = transform(image=img)["image"] res2 = np.array(pil_transform(pil_image)) _max = np.abs(res1.astype(np.int16) - res2.astype(np.int16)).max() assert _max <= 2, "Max: {}".format(_max)
def get_transform(train: bool, im_size: int = 400): if train: transforms = A.Compose([ A.Resize( height=im_size, width=im_size, interpolation=cv2.INTER_CUBIC), A.ChannelShuffle(p=0.5), A.HorizontalFlip(p=0.5), A.ColorJitter(p=0.5), A.VerticalFlip(p=0.5), A.Blur(p=0.5), A.Normalize(), ToTensorV2(), ], bbox_params=A.BboxParams( format='pascal_voc', label_fields=['category_ids'])) else: transforms = A.Compose([ A.Resize( height=im_size, width=im_size, interpolation=cv2.INTER_CUBIC), A.Normalize(), ToTensorV2(), ], bbox_params=A.BboxParams( format='pascal_voc', label_fields=['category_ids'])) return transforms
def __init__(self, img_size=224, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): super(ImageTransform_3, self).__init__() self.transform = { 'train': albu.Compose([ albu.RandomResizedCrop(img_size, img_size), albu.ColorJitter(p=0.5), albu.HorizontalFlip(p=0.5), albu.VerticalFlip(p=0.5), albu.Transpose(p=0.5), albu.MotionBlur(p=0.5), albu.Normalize(mean, std), ToTensorV2(), ], p=1.0), 'val': albu.Compose([ albu.Resize(img_size, img_size), albu.Normalize(mean, std), ToTensorV2(), ], p=1.0) }
def __init__(self, root_dir, is_train): super(FaceDataset, self).__init__() #self.local_rank = local_rank self.is_train = is_train self.input_size = 256 self.num_kps = 68 transform_list = [] if is_train: transform_list += \ [ A.ColorJitter(brightness=0.8, contrast=0.5, p=0.5), A.ToGray(p=0.1), A.ISONoise(p=0.1), A.MedianBlur(blur_limit=(1,7), p=0.1), A.GaussianBlur(blur_limit=(1,7), p=0.1), A.MotionBlur(blur_limit=(5,12), p=0.1), A.ImageCompression(quality_lower=50, quality_upper=90, p=0.05), A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=40, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0, p=0.8), A.HorizontalFlip(p=0.5), RectangleBorderAugmentation(limit=0.33, fill_value=0, p=0.2), ] transform_list += \ [ A.geometric.resize.Resize(self.input_size, self.input_size, interpolation=cv2.INTER_LINEAR, always_apply=True), A.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ToTensorV2(), ] self.transform = A.ReplayCompose(transform_list, keypoint_params=A.KeypointParams( format='xy', remove_invisible=False)) self.root_dir = root_dir with open(osp.join(root_dir, 'annot.pkl'), 'rb') as f: annot = pickle.load(f) self.X, self.Y = annot train_size = int(len(self.X) * 0.99) if is_train: self.X = self.X[:train_size] self.Y = self.Y[:train_size] else: self.X = self.X[train_size:] self.Y = self.Y[train_size:] #if local_rank==0: # logging.info('data_transform_list:%s'%transform_list) flip_parts = ([1, 17], [2, 16], [3, 15], [4, 14], [5, 13], [6, 12], [7, 11], [8, 10], [18, 27], [19, 26], [20, 25], [21, 24], [22, 23], [32, 36], [33, 35], [37, 46], [38, 45], [39, 44], [40, 43], [41, 48], [42, 47], [49, 55], [50, 54], [51, 53], [62, 64], [61, 65], [68, 66], [59, 57], [60, 56]) self.flip_order = np.arange(self.num_kps) for pair in flip_parts: self.flip_order[pair[1] - 1] = pair[0] - 1 self.flip_order[pair[0] - 1] = pair[1] - 1 logging.info('len:%d' % len(self.X)) print('!!!len:%d' % len(self.X))
def __init__(self, img_size=224, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): super(ImageTransform_5, self).__init__() self.transform = { 'train': albu.Compose([ albu.RandomResizedCrop(img_size, img_size), albu.ColorJitter(p=0.5), albu.HorizontalFlip(p=0.5), albu.VerticalFlip(p=0.5), albu.Transpose(p=0.5), albu.MotionBlur(p=0.5), albu.OneOf([ albu.GridDistortion(p=1.0), albu.OpticalDistortion(p=1.0), ], p=0.5), albu.Normalize(mean, std), albu.CoarseDropout( max_height=15, max_width=15, max_holes=8, p=0.5), ToTensorV2(), ], p=1.0), 'val': albu.Compose([ albu.Resize(img_size, img_size), albu.Normalize(mean, std), ToTensorV2(), ], p=1.0) }
def __call__(self, img): transform = A.Compose([ # Default #A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.5), A.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.4, always_apply=False, p=0.8), ]) channel_count = np.shape(img)[0] # Only S2 has RGB channels if channel_count == 10: # S2 # the channels must be at the last dimension img = np.rollaxis(img, 0, 3) rgb_img = img[:, :, [0, 1, 2]] # Augment an image transformed = transform(image=rgb_img) transformed_image = transformed["image"] # Update the transformed pixels in the original S2 image img[:, :, [0, 1, 2]] = transformed_image[:, :, [0, 1, 2]] # Change back the channel position img = np.rollaxis(img, -1, 0) return img
def main(): load_dotenv('cassava.env') seed_everything(SEED) root_path = os.getenv('ROOT_PATH') train_csv_path = root_path + 'train.csv' train_root_path = root_path + 'train_images' num_classes = int(os.getenv('NUM_CLASSES', 5)) num_epoch = int(os.getenv('NUM_EPOCH', 10)) num_folds = int(os.getenv('NUM_FOLDS', 5)) batch_size = int(os.getenv('BATCH_SIZE'), 16) grad_acc = int(os.getenv('GRAD_ACC', 8)) resize = os.getenv('RESIZE', 224) normalize = A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transform = A.Compose([ A.HorizontalFlip(), A.ShiftScaleRotate(p=1.0), A.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.2, hue=0.0, p=1.0, always_apply=False), A.RandomResizedCrop(resize, resize, p=1.0, always_apply=True), normalize, ToTensorV2(p=1.0), ], p=1.0) test_transform = A.Compose([ A.Resize(int(resize * 1.5), int(resize * 1.5)), normalize, ToTensorV2(p=1.0), ], p=1.0) tta_transform = tta.Compose([ tta.FiveCrops(resize, resize), ]) criterion = MixedLabelLoss(nn.CrossEntropyLoss(reduction='none')) augmentations = [snapmix, ] df = pd.read_csv(train_csv_path) folds = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=SEED).split(df['image_id'], df['label']) for _fold, (train, test) in enumerate(folds): train = df.iloc[train] test = df.iloc[test] scheduler = optim.lr_scheduler.CosineAnnealingLR model = TimmNet('efficientnet_b3a', num_classes, criterion, learning_rate=1e-3, scheduler=scheduler, n_epoch=num_epoch, eta_min=1e-6, augmentations=augmentations, tta_transform=tta_transform) dm = DataFrameDataModule(train, train_root_path, test, batch_size=batch_size, train_transform=train_transform, test_transform=test_transform) mlf_logger = MLFlowLogger( experiment_name='cassava', tracking_uri='file:./cassava' ) trainer = Trainer(gpus=-1, precision=32, deterministic=True, accumulate_grad_batches=grad_acc, profiler='simple', val_check_interval=1.0, logger=mlf_logger, max_epochs=num_epoch) trainer.fit(model, datamodule=dm)
def run(config): config['data']['voc_dir'] = '../../' + config['data']['voc_dir'] config['model'][ 'pretrained_dir'] = '../../' + config['model']['pretrained_dir'] seed_everything(config['seed']) # Build data loader data_config = config['data'] voc_dir = data_config['voc_dir'] image_paths = glob(f'{voc_dir}/JPEGImages/*.jpg') image_paths_train, image_paths_val \ = train_test_split(image_paths, test_size=data_config['val_ratio']) augment = A.Compose( [A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.ColorJitter()], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels'])) train_loader = VOCLoader(image_paths_train, batch_size=data_config['batch_size'], shuffle=True, augment=augment) val_loader = VOCLoader(image_paths_val, batch_size=data_config['batch_size']) # Build detection model model, target_size, ckpt = build_detection_model(**config['model']) model_dir = 'models' if not os.path.exists(model_dir): os.makedirs(model_dir) # Get single train step function train_config = config['train'] optimizer = tf.keras.optimizers.Adam(train_config['learning_rate']) target_weights = [w for w in model.trainable_weights if 'Head' in w.name] train_step = get_train_step(model=model, optimizer=optimizer, target_weights=target_weights, target_size=target_size) for e in range(train_config['n_epochs']): for i, (images, boxes_list, labels_list) in enumerate(train_loader): loss = train_step(images, boxes_list, labels_list) show_progress(epoch=e, batch=i, batch_total=len(train_loader), loss=loss.numpy()) val_result = validate(model, val_loader) ckpt.save(f'{model_dir}/ckpt-{e}') visualize(model, val_loader, num_visualize=3)
def get_transform(crop_size: int): return A.Compose( [ A.RandomResizedCrop(crop_size, crop_size, scale=(0.2, 1.0), p=1.0), A.HorizontalFlip(p=0.5), A.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1, p=0.8), A.GaussianBlur(blur_limit=0, sigma_limit=(0.1, 2.0), p=0.5), A.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD), ToTensorV2(), ], )
def __init__(self): self.augmentor = A.Compose( [ A.MotionBlur(p=0.25), A.ColorJitter(p=0.5), A.RandomRain(p=0.1), # random occlusion A.RandomSunFlare(p=0.1), A.JpegCompression(p=0.25), A.ISONoise(p=0.25) ], p=1.0)
def transformers(img_size): return A.Compose( [ ## A.RandomResizedCrop(img_size, img_size), A.CenterCrop(img_size, img_size, p=1.0), A.HorizontalFlip(p=1.0), A.ShiftScaleRotate(p=1.0), A.ColorJitter(p=1.0), A.Cutout(p=1.0), ], bbox_params=A.BboxParams('pascal_voc', label_fields=['category_ids']), keypoint_params=A.KeypointParams('xy'))
def create_train_transform(flip, noise, cutout, resize, size = 112, bright = True): translist = [] if flip: translist+=[albumentations.OneOf([ albumentations.Rotate(limit=30), albumentations.IAAPiecewiseAffine(), albumentations.ShiftScaleRotate( shift_limit=0.02, scale_limit=0.3, rotate_limit=10, ), albumentations.HorizontalFlip()],p=0.7)] if noise: translist+=[albumentations.OneOf([ albumentations.MotionBlur(blur_limit=6), albumentations.MedianBlur(blur_limit=5), albumentations.OpticalDistortion(), albumentations.CLAHE(), albumentations.GaussNoise(var_limit=(5.0,20.0))], p=0.75)] if bright: translist+=[albumentations.OneOf([ albumentations.RandomBrightness(limit=0.6), #albumentations.Sharpen(), albumentations.ColorJitter(), albumentations.RandomBrightnessContrast(brightness_limit=0.6, contrast_limit=0.6)],p=0.7)] if cutout: translist+=[albumentations.OneOf([ albumentations.CoarseDropout(), albumentations.Cutout(max_h_size = int(size * np.random.rand(1)*0.5), max_w_size = int(size * np.random.rand(1)*0.5), num_holes=np.random.randint(1,3)) ],p=0.75)] if resize: translist+=[albumentations.Resize(size+10, size+10, interpolation=2)] translist+=[albumentations.RandomCrop(size,size,always_apply=True)] #translist+=[albumentations.Normalize(mean=(0.2481, 0.2292, 0.2131), std = (0.2167,0.2071,0.2014))] #translist+=[albumentations.Normalize(mean=(0.2248, 0.2080, 0.1929), std = (0.2231, 0.2140, 0.2083))] #trainlist+=[albumentations.Normalize(mean=(0.2539, 0.2348, 0.2189), std = (0.2195,0.2110,0.2061))] #translist+=[albumentations.Normalize(mean=(0.2580, 0.2360, 0.2215), std = (0.2235, 0.2132, 0.2100))] translist+=[albumentations.Normalize(mean=(0.1977, 0.2115, 0.2275), std = (0.2177, 0.2227, 0.2317))] #translist+=[albumentations.Normalize(mean=(0.2527, 0.2343, 0.2177), std = (0.2171, 0.2082, 0.2026))] transform = albumentations.Compose(translist) return transform
def get_training_tfms(dataset_normalize_stats): size = getModelInputSize() return A.Compose([ A.Resize(size[0], size[1]), A.RandomRotate90(p=0.5), A.Flip(p=0.5), A.ColorJitter(p=0.5), A.RandomGamma(p=0.5), A.RandomContrast(p=0.3), A.RandomBrightness(p=0.5), ToTensor(dataset_normalize_stats), ])
def get_training_augmentation(): test_transform = [ albu.Rotate((90, 170, 280), border_mode=0, value=0), albu.Flip(), albu.RandomResizedCrop(224, 224, scale=(0.4, 1.0)), albu.ShiftScaleRotate(shift_limit=0.1, scale_limit=0, rotate_limit=0), albu.ColorJitter(p=0.8), albu.OneOf( [albu.GaussianBlur(sigma_limit=(0.1, 2.0)), albu.IAASharpen()], p=0.5), ] return albu.Compose(test_transform)
def get_train_transforms(width, height): train_transforms = A.Compose([ A.RandomResizedCrop(width, height, scale=(0.1, 0.8)), A.ColorJitter(p=0.5), A.ToFloat(max_value=1.0), A.ShiftScaleRotate(p=0.5), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), A.CoarseDropout(p=0.5), ToTensorV2(), ]) return get_wrapper(train_transforms)
def __init__(self, cfg, phase='train'): self.phase = phase self.size = cfg['image_size'] self.root_dir = cfg['root_dir'] cls_names = cfg['class_names'] self.category_id_to_name = {k: v for k, v in enumerate(cls_names)} self.category_name_to_id = { v: k for k, v in self.category_id_to_name.items() } if self.phase == 'train': self.data_list = self.load_annos(cfg['train_data_file'], self.category_name_to_id) else: self.data_list = self.load_annos(cfg['val_data_file'], self.category_name_to_id) self.aug = A.Compose( [ # A.RandomScale(scale_limit=0.1, p=0.5), A.ShiftScaleRotate( shift_limit=0.2, scale_limit=0.4, rotate_limit=45), A.PadIfNeeded(self.size[0], self.size[1]), # A.RandomSizedCrop(min_max_height=(int(self.size[0]*0.8), self.size[0]*1.2), # height=self.size[0], # width=self.size[1], # w2h_ratio=self.size[1]/self.size[0]), A.RandomResizedCrop( self.size[0], self.size[1], scale=(0.8, 1.0)), A.IAAPerspective(scale=(0.05, 0.1)), A.Rotate(), A.Flip(), # A.RandomSizedBBoxSafeCrop(height=self.size[1], width=self.size[0]), # A.RandomBrightnessContrast(p=0.5), # A.HueSaturationValue(p=0.5), A.ColorJitter() ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['cls_ids'], min_area=0.3, min_visibility=0.3)) self.to_tensor = T.Compose([ T.ToTensor(), T.Normalize(cfg['imagenet_default_mean'], cfg['imagenet_default_std']) ])
def det_train_augs(height: int, width: int) -> albu.Compose: return albu.Compose( [ albu.RandomResizedCrop(512, 512), albu.ColorJitter(), albu.RandomGamma(), albu.Flip(), albu.Transpose(), albu.Rotate(), albu.Normalize(), ], bbox_params=albu.BboxParams(format="pascal_voc", label_fields=["category_ids"], min_visibility=0.2), )
def color_aug(img): augumented = albu.Compose([ albu.OneOf([ albu.GaussianBlur(p=1), albu.MedianBlur(p=1), albu.GaussNoise(p=1), ], p=.5), albu.ColorJitter(brightness=0.2, contrast=((0.75, 1.25)), saturation=0.2, hue=(-0.04, 0.04), p=0.5) ]) image = augumented(image=img)['image'] return image
def create_augmentations(img_height=224,img_width=224,p=0.1): AUGMENTATIONS = albumentations.Compose([ albumentations.Resize(img_height, img_width, p=1.), albumentations.HorizontalFlip(p=0.5), albumentations.IAAPerspective(p=p, scale=(0.01, 0.05)), albumentations.GridDistortion(p=p, distort_limit=0.2), albumentations.GridDistortion(p=p,distort_limit=0.2), albumentations.CoarseDropout(p=p, max_holes=10, max_height=25, max_width=25), albumentations.GaussNoise(p=p,var_limit=(40.0, 70.0)), albumentations.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, interpolation=1,border_mode=4, always_apply=False, p=2*p), albumentations.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2, p=2*p), albumentations.Blur(p=p,blur_limit=10), albumentations.ToGray(p=p), albumentations.ChannelShuffle(p=0.05), albumentations.RandomGamma(p=p,gamma_limit=(20,200)), AddShadow(p=p), ]) return AUGMENTATIONS
def get_yolo_transform(img_size, mode='train'): if mode == 'train': scale = 1.1 transform = A.Compose([ A.LongestMaxSize(max_size=int(img_size * scale)), A.PadIfNeeded(min_height=int(img_size * scale), min_width=int(img_size * scale), border_mode=cv2.BORDER_CONSTANT), A.RandomCrop(width=img_size, height=img_size), A.ColorJitter( brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.4), A.OneOf([ A.ShiftScaleRotate( rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.4), A.IAAAffine(shear=10, mode='constant', p=0.4) ], p=1.0), A.HorizontalFlip(p=0.5), A.Blur(p=0.1), A.CLAHE(p=0.1), A.Posterize(p=0.1), A.ToGray(p=0.1), A.ChannelShuffle(p=0.05), A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255), ToTensorV2() ], bbox_params=A.BboxParams(format='yolo', min_visibility=0.4, label_fields=[])) elif mode == 'test': transform = A.Compose([ A.LongestMaxSize(max_size=img_size), A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=cv2.BORDER_CONSTANT), A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255), ToTensorV2(), ], bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[])) else: raise ValueError("'mode' can only accept 'train' or 'test'") return transform