def get_transform(train, augmentation_ver=None, imageclassidentifier=None): if augmentation_ver is None or augmentation_ver == 0: data_transforms = albumentations.Compose([ albumentations.Flip(), #albumentations.RandomBrightness(0.2), albumentations.ShiftScaleRotate(rotate_limit=90, scale_limit=0.10), #ToTensorV2() ], bbox_params=albumentations.BboxParams(format='pascal_voc', label_fields=['class_labels'])) # min_visibility=0.2 elif augmentation_ver == 1: if imageclassidentifier is None: imageclassidentifier = preprocess.ImageClassIdentifier() def ifpred(**kwargs): img = kwargs['image'] predicted_class = imageclassidentifier.detect(np.array(img)) if predicted_class == 2: return True return False data_transforms = albumentations.Compose([ IfThenElse(ifpred, albumentations.Compose([ albumentations.RandomSizedCrop((256, 1024), 1024, 1024) #albumentations.RandomSizedBBoxSafeCrop(height=1024, width=1024) ]), albumentations.Compose([ # do nothing ])), albumentations.Flip(), albumentations.VerticalFlip(), albumentations.RandomRotate90(p=0.5), albumentations.ShiftScaleRotate(rotate_limit=15, scale_limit=0.10), #ToTensorV2() ], bbox_params=albumentations.BboxParams(format='pascal_voc', label_fields=['class_labels'])) # min_visibility=0.2 return data_transforms
def __init__( self, in_channels=3, architecture=None, split_size=7, num_boxes=2, num_classes=20, ): super(YoloV1Model, self).__init__() self.in_channels = in_channels self.darknet = self._create_conv(architecture) self.fcs = self._create_fcs(split_size, num_boxes, num_classes) self.train_transform = A.Compose( [ A.Resize(width=448, height=448), A.ShiftScaleRotate( shift_limit=0.2, scale_limit=0.2, rotate_limit=0), ToTensor(), ], bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), ) self.test_transform = A.Compose( [A.Resize(width=448, height=448), ToTensor()], bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), )
def get_transform(train: bool, im_size: int = 400): if train: transforms = A.Compose([ A.Resize( height=im_size, width=im_size, interpolation=cv2.INTER_CUBIC), A.ChannelShuffle(p=0.5), A.HorizontalFlip(p=0.5), A.ColorJitter(p=0.5), A.VerticalFlip(p=0.5), A.Blur(p=0.5), A.Normalize(), ToTensorV2(), ], bbox_params=A.BboxParams( format='pascal_voc', label_fields=['category_ids'])) else: transforms = A.Compose([ A.Resize( height=im_size, width=im_size, interpolation=cv2.INTER_CUBIC), A.Normalize(), ToTensorV2(), ], bbox_params=A.BboxParams( format='pascal_voc', label_fields=['category_ids'])) return transforms
def setup(self, stage=None): self.train_transform = A.Compose( [ A.Resize(width=416, height=416), A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=0), ToTensor(), ], bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), ) self.test_transform = A.Compose( [A.Resize(width=416, height=416), ToTensor()], bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), ) if stage == "fit" or stage is None: train_files = [BASE_PATH / "train2017.txt"] val_files = [BASE_PATH / "val2017.txt"] # self.train_dataset = COCODataset(train_files, "train2017", transform=self.test_transform) self.train_dataset = COCODataset( val_files, "val2017", transform=self.test_transform ) self.val_dataset = COCODataset( val_files, "val2017", transform=self.test_transform ) if stage == "test": test_files = [BASE_PATH / "val2017.txt"] self.test_dataset = COCODataset( test_files, "val2017", transform=self.test_transform )
def setup(self, stage=None): BASE_PATH = pathify(configs.BASE_DIR) self.train_transform = A.Compose( [ A.Resize(width=448, height=448), A.ShiftScaleRotate( shift_limit=0.2, scale_limit=0.2, rotate_limit=0), ToTensor(), ], bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), ) self.test_transform = A.Compose( [A.Resize(width=448, height=448), ToTensor()], bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]), ) if stage == "fit" or stage is None: train_files = [ BASE_PATH / "2007_train.txt", BASE_PATH / "2012_train.txt" ] val_files = [ BASE_PATH / "2007_val.txt", BASE_PATH / "2012_val.txt" ] self.train_dataset = VOCDataset(train_files, transform=self.train_transform) self.val_dataset = VOCDataset(val_files, transform=self.test_transform) if stage == "test": test_files = [BASE_PATH / "2007_test.txt"] self.test_dataset = VOCDataset(test_files, transform=self.test_transform)
def __init__(self, config: dict, mode: str): super(YoloDataset, self).__init__(config, mode) if self.augment: assert 'hyper' in config.keys( ), 'Please add the parameters for data augmentation !' self.hyp = config['hyper'] self.transforms = A.Compose([ A.ShiftScaleRotate( shift_limit=self.hyp.get('shift', default=0.02), scale_limit=self.hyp.get('scale', default=0.01), rotate_limit=self.hyp.get('rotate', default=10)), A.HorizontalFlip(), A.VerticalFlip(), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ], bbox_params=A.BboxParams( format='yolo', min_area=1024, min_visibility=0.7)) else: self.transforms = A.Compose([ A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ], bbox_params=A.BboxParams( format='yolo', min_area=256, min_visibility=0.7))
def test_perspective_keep_size(): h, w = 100, 100 img = np.zeros([h, w, 3], dtype=np.uint8) h, w = img.shape[:2] bboxes = [] for _ in range(10): x1 = np.random.randint(0, w - 1) y1 = np.random.randint(0, h - 1) x2 = np.random.randint(x1 + 1, w) y2 = np.random.randint(y1 + 1, h) bboxes.append([x1, y1, x2, y2]) keypoints = [(np.random.randint(0, w), np.random.randint(0, h), np.random.random()) for _ in range(10)] transform_1 = A.Compose( [A.Perspective(keep_size=True, p=1)], keypoint_params=A.KeypointParams("xys"), bbox_params=A.BboxParams("pascal_voc", label_fields=["labels"]), ) transform_2 = A.Compose( [A.Perspective(keep_size=False, p=1), A.Resize(h, w)], keypoint_params=A.KeypointParams("xys"), bbox_params=A.BboxParams("pascal_voc", label_fields=["labels"]), ) set_seed() res_1 = transform_1(image=img, bboxes=bboxes, keypoints=keypoints, labels=[0] * len(bboxes)) set_seed() res_2 = transform_2(image=img, bboxes=bboxes, keypoints=keypoints, labels=[0] * len(bboxes)) assert np.allclose(res_1["bboxes"], res_2["bboxes"]) assert np.allclose(res_1["keypoints"], res_2["keypoints"])
def __init__(self, config: dict, mode: str): super(NanodetDataset, self).__init__(config, mode) if self.augment: assert 'hyper' in config.keys( ), 'Please add the parameters for data augmentation !' self.hyp = config['hyper'] self.transforms = A.Compose( [ A.SmallestMaxSize(max_size=min(self.img_size)), A.ShiftScaleRotate(shift_limit=self.hyp.get('shift', 0.), scale_limit=self.hyp.get('scale', 0.), rotate_limit=self.hyp.get('rotate', 0.)), A.HorizontalFlip(), A.VerticalFlip(), A.RandomCrop(height=self.img_size[0], width=self.img_size[1]), # A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ToTensorV2() ], bbox_params=A.BboxParams(format='coco', min_area=256, min_visibility=0.2, label_fields=['gt_labels'])) else: self.transforms = A.Compose( [A.LongestMaxSize(max_size=max(self.img_size)), ToTensorV2()], bbox_params=A.BboxParams(format='coco', min_area=256, min_visibility=0.2, label_fields=['gt_labels']))
def setup(self, stage=None): if stage == 'fit' or stage is None: self.train_set = Sku( csv_file=self.data_dir + '/annotations/annotations_train.csv', root_dir=self.data_dir + '/images', transform=A.Compose([ A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(p=0.5), A.RandomBrightnessContrast(p=0.2), A.RGBShift(p=0.2), A.RandomSizedBBoxSafeCrop(width=1333, height=800), ], bbox_params=A.BboxParams( format='pascal_voc', label_fields=['class_labels']))) self.train_set, test_set = torch.utils.data.random_split( self.train_set, [5000, len(self.train_set) - 5000], generator=torch.Generator().manual_seed(42)) self.val_set = Sku( csv_file=self.data_dir + '/annotations/annotations_val.csv', root_dir=self.data_dir + '/images', transform=A.Compose([ A.RandomSizedBBoxSafeCrop(width=1333, height=800), ], bbox_params=A.BboxParams( format='pascal_voc', label_fields=['class_labels']))) self.val_set, test_set = torch.utils.data.random_split( self.val_set, [500, len(self.val_set) - 500], generator=torch.Generator().manual_seed(42))
def get_transform(img_height, img_width, input_height, input_width, val=False): if val: # Validation --> Only resize image transform = A.Compose( [ A.Resize(input_height, input_width, p=1.), ], bbox_params=A.BboxParams( format='albumentations', min_visibility=0, label_fields=['class_indices'], ), ) else: # Training --> Augment data h_crop_ratio = np.random.uniform(low=0.25, high=0.9) w_crop_ratio = np.random.uniform(low=0.25, high=0.9) h_crop = int(img_height * h_crop_ratio) w_crop = int(img_width * w_crop_ratio) transform = A.Compose( [ A.HorizontalFlip(p=0.5), A.RandomCrop(width=w_crop, height=h_crop, p=0.5), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.5), A.Resize(input_height, input_width, p=1.), ], bbox_params=A.BboxParams( format='albumentations', min_visibility=0.2, label_fields=['class_indices'], ), ) return transform
class Apollo(object): data_format = 'VOC' voc_root = 'datasets/apollo' train_split = 'train.txt' val_split = 'val.txt' class_names = [ 'bicycle', 'bicycle_group', 'bus', 'car', 'car_groups', 'motorbicycle', 'motorbicycle_group', 'person', 'person_group', 'rider', 'rider_group', 'tricycle', 'truck' ] class_names = [ 'bus', 'car', 'motorbicycle_group', 'person', 'tricycle', 'truck' ] img_format = 'jpg' width = 512 height = 416 train_transform = A.Compose( [ A.RandomSizedCrop(min_max_height=(640, 640), height=800, width=1000, w2h_ratio=1.25, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.9), ], p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), # A.VerticalFlip(p=0.5), A.Resize(height=height, width=width, p=1), # A.Cutout(num_holes=5, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) val_transform = A.Compose([ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels' ]))
def __init__(self, config): width = height = config.DATA.SCALE # 300/512 self.train_transform = A.Compose( # Yolo [ # A.RandomSizedCrop(min_max_height=(800, 1024), height=1024, width=1024, p=0.5), # A.RandomScale(scale_limit=0.3, p=1.0), # 这个有问题 C.RandomResize(scale_limit=0.3, p=1.0), # 调节长宽比 [1/1.3, 1.3] A.OneOf( [ A.Sequential( [ A.SmallestMaxSize(min(height, width), p=1.0), A.RandomCrop( height, width, p=1.0) # 先resize到短边544,再crop成544×544 ], p=0.4), A.LongestMaxSize(max(height, width), p=0.6), # resize到长边544 ], p=1.0), # A.LongestMaxSize(max(height, width), p=1.0), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.4, sat_shift_limit=0.4, val_shift_limit=0.4, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.3, contrast_limit=0.3, p=0.9), ], p=0.9), # A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=(0.5,0.5,0.5), p=1.0), C.RandomPad(min_height=height, min_width=width, border_mode=0, value=(123 / 255, 117 / 255, 104 / 255), p=1.0), A.HorizontalFlip(p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) self.val_transform = A.Compose([ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
class Rtts(object): data_format = 'VOC' voc_root = 'datasets/RTTS' train_split = 'train.txt' val_split = 'all.txt' class_names = [ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] img_format = 'png' width = 480 height = 480 train_transform = A.Compose( [ A.Resize(height=1024, width=1024, p=1), # 896 A.RandomSizedCrop( min_max_height=(600, 800), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.9), ], p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=height, width=width, p=1), # A.Cutout(num_holes=5, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) val_transform = A.Compose( [ A.Resize(height=height, width=width, p=1.0), # A.LongestMaxSize(416, p=1.0), # A.PadIfNeeded(min_height=416, min_width=416, border_mode=0, value=(0.5,0.5,0.5), p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
def get_tfms_faster(ds): """Function that returns the transformations to be applied to both loaders (training, validation) Keyword arguments: - cropsize: tupple - scalling: list of two values """ if ds == "datamatrix": train_tfms = albu.Compose([ albu.OneOf([ albu.augmentations.transforms.RandomSizedBBoxSafeCrop( 480, 640, p=0.2), albu.augmentations.transforms.RandomSizedBBoxSafeCrop( 960, 1280, p=0.2), albu.augmentations.transforms.Resize(750, 1000, p=0.6), ], p=1), albu.augmentations.transforms.RandomBrightness(limit=0.5), albu.augmentations.transforms.RandomContrast(limit=0.5), albu.HorizontalFlip(), ToTensor(), ], bbox_params=albu.BboxParams( format='pascal_voc', min_area=0., min_visibility=0., label_fields=['labels'])) elif ds == "coco": train_tfms = albu.Compose([ albu.augmentations.transforms.RandomBrightness(limit=0.5), albu.augmentations.transforms.RandomContrast(limit=0.5), albu.HorizontalFlip(), albu.VerticalFlip(), ToTensor(), ], bbox_params=albu.BboxParams( format='pascal_voc', min_area=0., min_visibility=0., label_fields=['labels'])) val_tfms = albu.Compose([ ToTensor(), ], bbox_params=albu.BboxParams( format='pascal_voc', min_area=0., min_visibility=0., label_fields=['labels'])) return train_tfms, val_tfms
class Cityscapes(object): data_format = 'VOC' voc_root = 'datasets/cityscapes' train_split = 'train.txt' val_split = 'val.txt' class_names = [ 'bus', 'bicycle', 'car', 'motorcycle', 'person', 'rider', 'train', 'truck' ] img_format = 'png' width = 1024 height = 512 train_transform = A.Compose( [ A.RandomSizedCrop(min_max_height=(600, 800), height=1024, width=2048, w2h_ratio=2., p=0.5), # A.OneOf([ # A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, # val_shift_limit=0.2, p=0.9), # A.RandomBrightnessContrast(brightness_limit=0.2, # contrast_limit=0.2, p=0.9), # ],p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), # A.VerticalFlip(p=0.5), A.Resize(height=height, width=width, p=1), # A.Cutout(num_holes=5, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) val_transform = A.Compose([ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels' ]))
class FRCNN(object): width = height = short_side = opt.scale if opt.scale else 600 divisor = 32 train_transform = A.Compose( # FRCNN [ A.SmallestMaxSize(short_side, p=1.0), # resize到短边600 A.PadIfNeeded(min_height=None, min_width=None, pad_height_divisor=divisor, pad_width_divisor=divisor, p=1.0), # A.RandomCrop(height=height, width=width, p=1.0), # 600×600 A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.3, sat_shift_limit=0.3, val_shift_limit=0.3, p=0.95), A.RandomBrightnessContrast( brightness_limit=0.3, contrast_limit=0.3, p=0.95), ], p=1.0), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) # FRCNN val_transform = A.Compose( [ A.SmallestMaxSize(short_side, p=1.0), # resize到短边600 A.PadIfNeeded(min_height=None, min_width=None, pad_height_divisor=divisor, pad_width_divisor=divisor, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
class Wheat(object): data_format = 'VOC' voc_root = 'datasets/wheat_detection' train_split = 'train.txt' val_split = 'val.txt' class_names = ['wheat'] img_format = 'jpg' width = 512 height = 512 train_transform = A.Compose( [ # A.Resize(height=1024, width=1024, p=1), # 896 A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9), ],p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=height, width=width, p=1), A.Cutout(num_holes=5, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'] ), ) val_transform = A.Compose( [ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'] ) )
def get_transform(augment): """Albumentations transformation of bounding boxs""" if augment: transform = A.Compose( [A.HorizontalFlip(p=0.5), ToTensorV2()], bbox_params=A.BboxParams(format='pascal_voc', label_fields=["category_ids"])) else: transform = A.Compose([ToTensorV2()], bbox_params=A.BboxParams( format='pascal_voc', label_fields=["category_ids"])) return transform
def get_train_transforms(): return A.Compose( [ A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.1, sat_shift_limit= 0.3, val_shift_limit=0.3, p=0.9), A.RandomBrightnessContrast(brightness_limit=0.4, contrast_limit=0.3, p=0.9), ],p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=512, width=512, p=1), A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'] ) )
def get_train_transforms(): return A.Compose([ A.RandomSizedCrop( min_max_height=(800, 1024), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.9), ], p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=0.5), A.Transpose(p=0.5), A.JpegCompression(quality_lower=85, quality_upper=95, p=0.2), A.OneOf( [A.Blur(blur_limit=3, p=1.0), A.MedianBlur(blur_limit=3, p=1.0)], p=0.1), A.Resize(height=1024, width=1024, p=1), A.Cutout( num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
def get_train_transforms(cfg): return A.Compose( [ A.RandomSizedCrop(min_max_height=cfg.INPUT.RSC_MIN_MAX_HEIGHT, height=cfg.INPUT.RSC_HEIGHT, width=cfg.INPUT.RSC_WIDTH, p=cfg.INPUT.RSC_PROB), A.OneOf([ A.HueSaturationValue(hue_shift_limit=cfg.INPUT.HSV_H, sat_shift_limit=cfg.INPUT.HSV_S, val_shift_limit=cfg.INPUT.HSV_V, p=cfg.INPUT.HSV_PROB), A.RandomBrightnessContrast(brightness_limit=cfg.INPUT.BC_B, contrast_limit=cfg.INPUT.BC_C, p=cfg.INPUT.BC_PROB), ], p=cfg.INPUT.COLOR_PROB), A.ToGray(p=cfg.INPUT.TOFGRAY_PROB), A.HorizontalFlip(p=cfg.INPUT.HFLIP_PROB), A.VerticalFlip(p=cfg.INPUT.VFLIP_PROB), # A.Resize(height=512, width=512, p=1), A.Cutout(num_holes=cfg.INPUT.COTOUT_NUM_HOLES, max_h_size=cfg.INPUT.COTOUT_MAX_H_SIZE, max_w_size=cfg.INPUT.COTOUT_MAX_W_SIZE, fill_value=cfg.INPUT.COTOUT_FILL_VALUE, p=cfg.INPUT.COTOUT_PROB), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
def get_resize_augmentation(image_size, keep_ratio=False, box_transforms=False): """ Resize an image, support multi-scaling :param image_size: shape of image to resize :param keep_ratio: whether to keep image ratio :param box_transforms: whether to augment boxes :return: albumentation Compose """ bbox_params = A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['class_labels' ]) if box_transforms else None if not keep_ratio: return A.Compose([A.Resize(height=image_size[1], width=image_size[0])], bbox_params=bbox_params) else: return A.Compose([ A.LongestMaxSize(max_size=max(image_size)), A.PadIfNeeded(min_height=image_size[1], min_width=image_size[0], p=1.0, border_mode=cv2.BORDER_CONSTANT), ], bbox_params=bbox_params)
def get_train_transforms(): return A.Compose([ A.RandomSizedCrop(min_max_height=(800, 800), height=args["image_size"], width=args["image_size"], p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.9), ], p=0.9), A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, brightness_by_max=True, p=1.0), A.RandomGamma(gamma_limit=(90, 110)), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=args["image_size"], width=args["image_size"], p=1), A.Cutout( num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
def get_train_transforms(cfg): image_size = cfg["image_size"] return A.Compose([ A.RandomSizedCrop(min_max_height=(image_size - 200, image_size - 200), height=image_size, width=image_size, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.9), A.GaussNoise( var_limit=(0.01, .005), mean=0, always_apply=False, p=0.6), ], p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=image_size, width=image_size, p=1), A.CoarseDropout( max_holes=8, max_height=32, max_width=128, fill_value=0, p=0.3), A.CoarseDropout( max_holes=8, max_height=128, max_width=32, fill_value=0, p=0.3), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
def __init__(self, step, annos_path, image_dir, no_classes, input_size, annos_encoder, random_seed, transform_norm_parameters): """Constructor method """ self.image_path = image_dir self.annos_path = annos_path self.step = step self.input_size = input_size self.annos_encoder = annos_encoder self.no_classes = no_classes transform_list = list() transform_list.append(A.Resize(input_size[0], input_size[1])) transform_list.append(A.Normalize(*transform_norm_parameters)) transform_list.append(ToTensorV2(transpose_mask=True)) self.transformer = A.Compose(transform_list, bbox_params=A.BboxParams( format='coco', label_fields=['class_labels'])) self.id_to_img_dict, self.id_to_anno_dict = _parse_coco_annos( annos_path) total_ids = list(self.id_to_img_dict.keys()) if self.step != 'test': self.step_ids = get_step_ids(self.step, total_ids, random_seed) else: self.step_ids = total_ids
def _get_augmentations(h, w, augment: bool): def normalize(x, **kwargs): return x / 255.0 resizing: list = [ # A.LongestMaxSize(max_size=WIDTH, always_apply=True), A.PadIfNeeded(min_height=h, min_width=w, border_mode=cv2.BORDER_CONSTANT), A.RandomCrop(h, w), # A.Resize(height=HEIGHT, width=WIDTH, always_apply=True), ] compatibility: list = [ ToTensorV2(always_apply=True), A.Lambda(image=normalize), ] augmentations: list = [] if augment: augmentations = [ A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(p=0.2), ] return A.Compose( resizing + augmentations + compatibility, bbox_params=A.BboxParams(format="pascal_voc", min_visibility=0.05, label_fields=["class_labels"]), )
def __init__(self): self.transform = None try: import albumentations as A check_version(A.__version__, '1.0.3', hard=True) # version requirement self.transform = A.Compose([ A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), A.CLAHE(p=0.01), A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), A.ImageCompression(quality_lower=75, p=0.0) ], bbox_params=A.BboxParams( format='yolo', label_fields=['class_labels'])) LOGGER.info( colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p)) except ImportError: # package not installed, skip pass except Exception as e: LOGGER.info(colorstr('albumentations: ') + f'{e}')
def test_RandomTransform_with_bboxes(): random.seed(1) image = np.zeros((32, 32, 3), dtype=np.uint8) bboxes = np.array([[0, 0, 1, 1], [31, 31, 32, 32]]) / 32.0 classes = ["A", "B"] aug = A.Compose( [tk.image.RandomTransform(size=(8, 8), base_scale=4.0, with_bboxes=True)], bbox_params=A.BboxParams(format="albumentations", label_fields=["classes"]), ) a_count = 0 b_count = 0 for _ in range(100): d = aug(image=image, bboxes=bboxes, classes=classes) assert d["image"].shape == (8, 8, 3) if len(d["bboxes"]) == 0: assert len(d["classes"]) == 0 else: assert len(d["bboxes"]) in (1, 2) if "A" in d["classes"]: a_count += 1 if "B" in d["classes"]: b_count += 1 # 100回中何回bboxが出力されたか。 # (挙動が変わった時に気付きやすいように==にしているが、 # with_bboxes=Falseの場合(≒(0, 0))より多く、かつa_count ≒ b_countになればOK) assert (a_count, b_count) == (14, 16)
def test_non_contiguous_input(augmentation_cls, params, bboxes): image = np.empty([3, 100, 100], dtype=np.uint8).transpose(1, 2, 0) mask = np.empty([3, 100, 100], dtype=np.uint8).transpose(1, 2, 0) # check preconditions assert not image.flags["C_CONTIGUOUS"] assert not mask.flags["C_CONTIGUOUS"] if augmentation_cls == A.RandomCropNearBBox: # requires "cropping_bbox" arg aug = augmentation_cls(p=1, **params) aug(image=image, mask=mask, cropping_bbox=bboxes[0]) elif augmentation_cls == A.RandomSizedBBoxSafeCrop: # requires "bboxes" arg aug = A.Compose([augmentation_cls(p=1, **params)], bbox_params=A.BboxParams(format="pascal_voc")) aug(image=image, mask=mask, bboxes=bboxes) else: # standard args: image and mask if augmentation_cls == A.FromFloat: # requires float image image = (image / 255).astype(np.float32) assert not image.flags["C_CONTIGUOUS"] elif augmentation_cls == A.MaskDropout: # requires single channel mask mask = mask[:, :, 0] aug = augmentation_cls(p=1, **params) aug(image=image, mask=mask)
def valid_augs(self): augs = [] if self.image_size is not None: augs.append(A.Resize(self.image_size, self.image_size)) if self.normalize: # imagenet normalization augs.append( A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0, ) ) if self.bbox_format is not None: return A.Compose( augs, bbox_params=A.BboxParams( format=self.bbox_format, min_area=0, min_visibility=0, label_fields=["labels"], ), ) return A.Compose(augs)