def _gradcam(model, image_pil_array: "PIL.Image"): """grad-cam++""" target_layer = model.features # mobilenet_v2 img_cv2 = pil2cv(image_pil_array) img = img_cv2 / 255 a_transform = A.Compose([A.Resize(224, 224, p=1.0), ToTensorV2(p=1.0)], p=1.0) img = a_transform(image=img)["image"].unsqueeze(0) a_transform = A.Compose( [ A.Resize(224, 224, p=1.0), A.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0, ), ToTensorV2(p=1.0), ], p=1.0, ) batch_t = a_transform(image=img_cv2)["image"].unsqueeze(0) gradcam_pp = GradCAMpp(model, target_layer) mask_pp, logit = gradcam_pp(batch_t) heatmap_pp, result_pp = visualize_cam(mask_pp, img) return result_pp
def build_transforms(args, class_index_dict: Dict[str, int]) -> Tuple[any, any]: train_transforms = AlbumentationsDetectionWrapperTransform( [ A.HorizontalFlip(), A.RandomResizedCrop(width=args.image_size, height=args.image_size, scale=(0.8, 1.), p=1.), A.OneOf([ A.RandomGamma(), A.RandomBrightnessContrast(), A.Blur(blur_limit=5), ], p=0.5), ToTensorV2(), ], annotation_transform=VOCAnnotationTransform(class_index_dict)) test_transforms = AlbumentationsDetectionWrapperTransform( [ A.Resize(width=args.image_size, height=args.image_size), ToTensorV2(), ], annotation_transform=VOCAnnotationTransform(class_index_dict)) return train_transforms, test_transforms
def get_album_transforms(norm_mean, norm_std): ''' get the train and test transform by albumentations ''' train_transform = A.Compose([ A.PadIfNeeded(min_height=36, min_width=36, border_mode=4, value=[0, 0, 0], always_apply=True), A.RandomResizedCrop(height=32, width=32, always_apply=True), A.Flip(0.5), A.Cutout(num_holes=1, max_h_size=8, max_w_size=8, fill_value=0, always_apply=False, p=0.5), A.Normalize(mean=norm_mean, std=norm_std), ToTensorV2() ]) test_transform = A.Compose( [A.Normalize(mean=norm_mean, std=norm_std), ToTensorV2()]) return train_transform, test_transform
def get_train_aug(config): train_size = config.data["train_size"] if config.data["train_size"]==config.data["input_size"]: aug = get_aug([ A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9),]), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(train_size, train_size, p=1), A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0),]) else: # define augumentation aug = get_aug([ A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9),]), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomCrop(train_size, train_size, p=1), A.Cutout(num_holes=8, max_h_size=32, max_w_size=32, fill_value=0, p=0.5), ToTensorV2(p=1.0),]) return aug
class Crop256(object): width = height = 256 train_transform = A.Compose( [ A.RandomCrop(height=height, width=width, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Transpose(p=0.5), # TTA×8 ToTensorV2(p=1.0), ], p=1.0, additional_targets={'gt': 'image'}) divisor = 8 # padding成8的倍数 val_transform = A.Compose([ A.PadIfNeeded(min_height=None, min_width=None, pad_height_divisor=divisor, pad_width_divisor=divisor, p=1.0), ToTensorV2(p=1.0), ], p=1.0, additional_targets={'gt': 'image'})
def build_transforms(args, n_classes): train_transforms = A.Compose([ A.HorizontalFlip(), A.RandomResizedCrop(width=args.image_size, height=args.image_size, scale=(0.7, 1.2)), A.CoarseDropout(max_height=int(args.image_size / 5), max_width=int(args.image_size / 5)), A.OneOf([ A.RandomBrightnessContrast(), A.RandomGamma(), A.Blur(blur_limit=5), ]), ToTensorV2(), ]) train_transforms = AlbumentationsSegmentationWrapperTransform( train_transforms, class_num=n_classes, ignore_indices=[ 255, ]) test_transforms = A.Compose([ A.Resize(width=args.image_size, height=args.image_size), ToTensorV2(), ]) test_transforms = AlbumentationsSegmentationWrapperTransform( test_transforms, class_num=n_classes, ignore_indices=[ 255, ]) return train_transforms, test_transforms
def train_pascalvoc0712(): transforms = { "TRAIN": A.Compose([ A.Resize(300, 300), A.HorizontalFlip(p=0.5), A.MotionBlur(p=0.5), A.Normalize(), ToTensorV2(p=1.0), ], bbox_params={ 'format': 'pascal_voc', 'label_fields': ['labels'] }), "TEST": A.Compose([ A.Resize(300, 300), A.Normalize(), ToTensorV2(p=1.0), ], bbox_params={ 'format': 'pascal_voc', 'label_fields': ['labels'] }) } training(config_path=CONFIG_PATH, transforms=transforms)
def apply_transforms(mean, std): train_transforms = A.Compose([ A.HorizontalFlip(p=0.2), A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, p=0.2), A.CoarseDropout( max_holes=1, max_height=16, max_width=16, min_holes=1, min_height=16, min_width=16, fill_value=tuple((x * 255.0 for x in mean)), p=0.2, ), A.ToGray(p=0.15), A.Normalize(mean=mean, std=std, always_apply=True), ToTensorV2(), ]) test_transforms = A.Compose([ A.Normalize(mean=mean, std=std, always_apply=True), ToTensorV2(), ]) return lambda img: train_transforms(image=np.array(img))[ "image"], lambda img: test_transforms(image=np.array(img))["image"] # if __name__ == "__main__": # pass
def get_test_transforms(mode): if mode == 0: return A.Compose([ A.Resize(height=512, width=512, p=1.0), A.Normalize(p=1.0), ToTensorV2(p=1.0), ], p=1.0) elif mode == 1: return A.Compose([ A.HorizontalFlip(p=1), A.Resize(height=512, width=512, p=1.0), A.Normalize(p=1.0), ToTensorV2(p=1.0), ], p=1.0) elif mode == 2: return A.Compose([ A.VerticalFlip(p=1), A.Resize(height=512, width=512, p=1.0), A.Normalize(p=1.0), ToTensorV2(p=1.0), ], p=1.0) else: return A.Compose([ A.HorizontalFlip(p=1), A.VerticalFlip(p=1), A.Resize(height=512, width=512, p=1.0), A.Normalize(p=1.0), ToTensorV2(p=1.0), ], p=1.0)
def train_transform(resize, normalize=None): if normalize == 'imagenet': trans_fucn = [ albu.VerticalFlip(p=0.5), albu.HorizontalFlip(p=0.5), # albu.ToFloat(max_value=255, p=1.0), albu.Normalize(p=1.0), ToTensorV2(p=1.0) ] elif normalize == 'global_norm': trans_fucn = [ albu.VerticalFlip(p=0.5), albu.HorizontalFlip(p=0.5), GlobalNormalize(p=1.0), # albu.ToFloat(max_value=255, p=1.0), ToTensorV2(p=1.0) ] else: trans_fucn = [ albu.VerticalFlip(p=0.5), albu.HorizontalFlip(p=0.5), albu.Normalize(mean=(0, 0, 0), std=(1, 1, 1)), # albu.ToFloat(max_value=255, p=1.0), ToTensorV2(p=1.0) ] return Compose(trans_fucn, p=1.0)
def get_aug(atype, size): print('using aug', atype) if atype == '0': return alb.Compose([ alb.Resize(size, size, p=1), alb.Transpose(p=0.5), alb.HorizontalFlip(p=0.5), alb.VerticalFlip(p=0.5), alb.ShiftScaleRotate(p=0.5), alb.Normalize(p=1), ToTensorV2(p=1), ]) elif atype == '1': return alb.Compose([ alb.RandomResizedCrop(size, size), alb.Transpose(p=0.5), alb.HorizontalFlip(p=0.5), alb.VerticalFlip(p=0.5), alb.ShiftScaleRotate(p=0.5), alb.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5), alb.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5), alb.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0), alb.CoarseDropout(p=0.5), alb.Cutout(p=0.5), ToTensorV2(p=1.0), ]) elif atype == '2': return alb.Compose([ alb.RandomResizedCrop(size, size, p=1, scale=(0.9, 1)), alb.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.2), alb.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.2), alb.Transpose(p=0.2), alb.HorizontalFlip(p=0.2), alb.VerticalFlip(p=0.2), alb.ShiftScaleRotate(p=0.2), alb.Normalize(p=1), ToTensorV2(p=1), ]) elif atype == '3': return alb.Compose([ alb.RandomResizedCrop(size, size, p=1), alb.HorizontalFlip(p=0.2), alb.Normalize(p=1), ToTensorV2(p=1), ]) else: raise Exception('atype |{}| not supoprted'.format(atype))
def get_transforms(img_size=256, trans_type='train'): if trans_type == 'train': return A.Compose([ A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=10, border_mode=0, p=0.7), A.Resize(img_size, img_size), A.Cutout(max_h_size=int(img_size * 0.4), max_w_size=int(img_size * 0.4), num_holes=1, p=0.5), A.Normalize(), ToTensorV2(p=1.0), ]) # A.Compose([ # A.Resize(height=img_size, width=img_size, p=1), # A.RandomSizedCrop(min_max_height=(int(img_size * 0.8), int(img_size * 0.8)), height=img_size, width=img_size, p=0.5), # A.RandomRotate90(p=0.5), # A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15), # A.HorizontalFlip(p=0.5), # A.VerticalFlip(p=0.5), # A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), # A.Normalize(), # ToTensorV2(p=1.0), #], p=1.0) return A.Compose( [A.Resize(img_size, img_size), A.Normalize(), ToTensorV2(p=1.0)])
def get_transforms(stage: str = None, mode: str = None): if mode == 'train': return albumentations.Compose([ # blur albumentations.OneOf([ albumentations.Blur((1, 4), p=1.0), albumentations.GaussianBlur(3, p=1.0), albumentations.MedianBlur(blur_limit=5, p=1.0), ], p=3/4), # transformations albumentations.ShiftScaleRotate(scale_limit=0.2, rotate_limit=25, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), # cut and drop # distortion albumentations.OneOf([ albumentations.OpticalDistortion(0.6, p=1.0), albumentations.GridDistortion(8, 0.06, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), albumentations.ElasticTransform(sigma=10, alpha=1, alpha_affine=10, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), ], p=3/4), # add noise albumentations.OneOf([ albumentations.GaussNoise((0, 250), p=1.0), albumentations.MultiplicativeNoise(p=1.0), ], p=2/3), # common albumentations.Normalize(TRAIN_MEAN, TRAIN_STD), GridMask((3, 7), rotate=15, p=0.75), ToTensorV2(), ]) elif mode == 'valid': return albumentations.Compose([ albumentations.Normalize(TRAIN_MEAN, TRAIN_STD), ToTensorV2(), ]) else: raise ValueError('mode is %s' % mode)
def __get_transformations(self): norm_mean = [0.3456, 0.2281, 0.2233] norm_std = [0.2528, 0.2135, 0.2104] normalize = Normalize(mean=norm_mean, std=norm_std) training_augmentation = Compose([ ShiftScaleRotate(shift_limit=0.1, scale_limit=(-0.2, 0.5), rotate_limit=15, border_mode=0, value=0, p=0.7), ]) data_transformations = {} data_transformations["train"] = Compose([ Resize(height=self.input_height, width=self.input_width), training_augmentation, normalize, ToTensorV2(), ]) data_transformations["val"] = Compose([ Resize(height=self.input_height, width=self.input_width), normalize, ToTensorV2(), ]) data_transformations["test"] = data_transformations["val"] return data_transformations
def get_transform(image_size, base_size=366): if image_size > base_size: resize = albumentations.Resize(image_size, image_size) else: resize = albumentations.CenterCrop(image_size, image_size) train_transform = albumentations.Compose([ albumentations.VerticalFlip(p=0.5), albumentations.HorizontalFlip(p=0.5), # albumentations.Equalize(p=0.3), # albumentations.OneOf([ # albumentations.RandomContrast(), # albumentations.RandomBrightness(), # albumentations.CLAHE(), # ],p=0.3), albumentations.OneOf([ albumentations.GaussianBlur(blur_limit=3), albumentations.GaussNoise(var_limit=(3, 10)), albumentations.MedianBlur(blur_limit=3) ], p=0.5), resize, # albumentations.Cutout(max_h_size = int(image_size * 0.1), max_w_size = int(image_size * 0.1), num_holes = 3, p =0.3), albumentations.Normalize(), ToTensorV2() ]) val_transform = albumentations.Compose([ resize, albumentations.Normalize(), ToTensorV2( ) # always use V2 follow this answer: https://albumentations.ai/docs/faq/#which-transformation-should-i-use-to-convert-a-numpy-array-with-an-image-or-a-mask-to-a-pytorch-tensor-totensor-or-totensorv2 ]) return train_transform, val_transform
def __init__(self, config): width = height = config.DATA.SCALE # 300/512 self.train_transform = A.Compose( # Yolo [ # A.RandomSizedCrop(min_max_height=(800, 1024), height=1024, width=1024, p=0.5), # A.RandomScale(scale_limit=0.3, p=1.0), # 这个有问题 C.RandomResize(scale_limit=0.3, p=1.0), # 调节长宽比 [1/1.3, 1.3] A.OneOf( [ A.Sequential( [ A.SmallestMaxSize(min(height, width), p=1.0), A.RandomCrop( height, width, p=1.0) # 先resize到短边544,再crop成544×544 ], p=0.4), A.LongestMaxSize(max(height, width), p=0.6), # resize到长边544 ], p=1.0), # A.LongestMaxSize(max(height, width), p=1.0), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.4, sat_shift_limit=0.4, val_shift_limit=0.4, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.3, contrast_limit=0.3, p=0.9), ], p=0.9), # A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=(0.5,0.5,0.5), p=1.0), C.RandomPad(min_height=height, min_width=width, border_mode=0, value=(123 / 255, 117 / 255, 104 / 255), p=1.0), A.HorizontalFlip(p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) self.val_transform = A.Compose([ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
class Apollo(object): data_format = 'VOC' voc_root = 'datasets/apollo' train_split = 'train.txt' val_split = 'val.txt' class_names = [ 'bicycle', 'bicycle_group', 'bus', 'car', 'car_groups', 'motorbicycle', 'motorbicycle_group', 'person', 'person_group', 'rider', 'rider_group', 'tricycle', 'truck' ] class_names = [ 'bus', 'car', 'motorbicycle_group', 'person', 'tricycle', 'truck' ] img_format = 'jpg' width = 512 height = 416 train_transform = A.Compose( [ A.RandomSizedCrop(min_max_height=(640, 640), height=800, width=1000, w2h_ratio=1.25, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.9), ], p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), # A.VerticalFlip(p=0.5), A.Resize(height=height, width=width, p=1), # A.Cutout(num_holes=5, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) val_transform = A.Compose([ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels' ]))
def __init__(self, imgs): super(ZCTESTDataset).__init__() self.imgs = imgs self.transform = A.Compose([ToTensorV2(always_apply=True, p=1.0)], p=1.0) self.transform_hflip = A.Compose( [A.HorizontalFlip(p=1.0), ToTensorV2(always_apply=True, p=1.0)], p=1.0)
def get_final_transforms_v2(opt): compose_A = Compose([ tr.ToFloat(max_value=1024), ToTensorV2() ]) compose_B = Compose([ tr.ToFloat(max_value=256), ToTensorV2() ]) return compose_A, compose_B
def get_val_aug(config): train_size = config.data["train_size"] if config.data["train_size"]==config.data["input_size"]: valaug = get_aug([ A.Resize(train_size, train_size,p=1), ToTensorV2(p=1.0),]) else: valaug = get_aug([ ToTensorV2(p=1.0),]) return valaug
class Rtts(object): data_format = 'VOC' voc_root = 'datasets/RTTS' train_split = 'train.txt' val_split = 'all.txt' class_names = [ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] img_format = 'png' width = 480 height = 480 train_transform = A.Compose( [ A.Resize(height=1024, width=1024, p=1), # 896 A.RandomSizedCrop( min_max_height=(600, 800), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.9), ], p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=height, width=width, p=1), # A.Cutout(num_holes=5, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) val_transform = A.Compose( [ A.Resize(height=height, width=width, p=1.0), # A.LongestMaxSize(416, p=1.0), # A.PadIfNeeded(min_height=416, min_width=416, border_mode=0, value=(0.5,0.5,0.5), p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
def train_val_dataloaders(train_path: str, val_path: str, augment: bool, batch_size: int): """Form the dataloaders for training and validation and store them in the dictionary. :param train_path: path to images for trainin :param val_path: path to images for validation :param batch_size: size of the batch :return: the dictionary with dataloaders """ if augment: train_transform = Compose([ Blur(p=0.1), ChannelDropout(p=0.1), Flip(p=0.5), GaussNoise((10.0, 30.0), 25.0, p=0.1), HueSaturationValue(p=0.1), RandomBrightnessContrast(brightness_limit=(-0.20, 0.50), p=0.1), RandomGamma(p=0.1), RandomRotate90(p=0.5), RGBShift(p=0.1), Transpose(p=0.25), Resize(224, 224, p=1.0), Normalize(), ToTensorV2(), ]) else: train_transform = Compose( [Resize(224, 224), Normalize(), ToTensorV2()]) val_transforms = Compose([Resize(224, 224), Normalize(), ToTensorV2()]) train_dataset = AlbumentationsImageFolder(train_path, train_transform) val_dataset = AlbumentationsImageFolder(val_path, val_transforms) dataloader = dict() dataloader["train"] = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True, ) dataloader["val"] = torch.utils.data.DataLoader( dataset=val_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True, ) return dataloader
def __init__(self, img, tiles, transform=None): """ Args: img (zarr.Group / numpy.array): image for prediction. tiles (list): list of tuples with tile coordinates [(x1, x2, y1, y2), ...]. """ self.img = img self.tiles = tiles if transform: self.transform = A.Compose([transform, ToTensorV2(p=1)]) else: self.transform = ToTensorV2(p=1)
def get_transforms(params, data): if data == 'train': return A.Compose([ A.HorizontalFlip(p=0.5), A.RandomResizedCrop(params.height, params.width, scale=(0.85, 1.0)), A.RandomContrast(limit=0.2, always_apply=False, p=0.5), ToTensorV2(p=1.0), ]) elif data == 'valid': return A.Compose([ ToTensorV2(p=1.0), ])
class Cityscapes(object): data_format = 'VOC' voc_root = 'datasets/cityscapes' train_split = 'train.txt' val_split = 'val.txt' class_names = [ 'bus', 'bicycle', 'car', 'motorcycle', 'person', 'rider', 'train', 'truck' ] img_format = 'png' width = 1024 height = 512 train_transform = A.Compose( [ A.RandomSizedCrop(min_max_height=(600, 800), height=1024, width=2048, w2h_ratio=2., p=0.5), # A.OneOf([ # A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, # val_shift_limit=0.2, p=0.9), # A.RandomBrightnessContrast(brightness_limit=0.2, # contrast_limit=0.2, p=0.9), # ],p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), # A.VerticalFlip(p=0.5), A.Resize(height=height, width=width, p=1), # A.Cutout(num_holes=5, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) val_transform = A.Compose([ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels' ]))
class Wheat(object): data_format = 'VOC' voc_root = 'datasets/wheat_detection' train_split = 'train.txt' val_split = 'val.txt' class_names = ['wheat'] img_format = 'jpg' width = 512 height = 512 train_transform = A.Compose( [ # A.Resize(height=1024, width=1024, p=1), # 896 A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, val_shift_limit=0.2, p=0.9), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9), ],p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=height, width=width, p=1), A.Cutout(num_holes=5, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'] ), ) val_transform = A.Compose( [ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'] ) )
def __init__(self, model, config, device): self.model = model self.device = device transform_list = { 'original': [A.HorizontalFlip(p=0), ToTensorV2(p=1)], # set `HorizontalFlip` p=0, this is only for avoid error. 'hflip': [A.HorizontalFlip(p=1), ToTensorV2(p=1)], 'vflip': [A.VerticalFlip(p=1), ToTensorV2(p=1)], 'vhflip': [A.HorizontalFlip(p=1), A.VerticalFlip(p=1), ToTensorV2(p=1)] } transform_inv_list = { 'original': [A.HorizontalFlip(p=0), ToTensorV2(p=1)], 'hflip': [A.HorizontalFlip(p=1), ToTensorV2(p=1)], 'vflip': [A.VerticalFlip(p=1), ToTensorV2(p=1)], 'vhflip': [A.HorizontalFlip(p=1), A.VerticalFlip(p=1), ToTensorV2(p=1)] } using_tf_list =[k for k, v in config['test_time_augmentation']['augments'].items() if v] self.transforms = [ A.Compose(transform_list[tf_name], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) for tf_name in using_tf_list ] self.transforms_inv = [ A.Compose(transform_inv_list[tf_name], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}) for tf_name in using_tf_list ]
class FRCNN(object): width = height = short_side = opt.scale if opt.scale else 600 divisor = 32 train_transform = A.Compose( # FRCNN [ A.SmallestMaxSize(short_side, p=1.0), # resize到短边600 A.PadIfNeeded(min_height=None, min_width=None, pad_height_divisor=divisor, pad_width_divisor=divisor, p=1.0), # A.RandomCrop(height=height, width=width, p=1.0), # 600×600 A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.3, sat_shift_limit=0.3, val_shift_limit=0.3, p=0.95), A.RandomBrightnessContrast( brightness_limit=0.3, contrast_limit=0.3, p=0.95), ], p=1.0), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) # FRCNN val_transform = A.Compose( [ A.SmallestMaxSize(short_side, p=1.0), # resize到短边600 A.PadIfNeeded(min_height=None, min_width=None, pad_height_divisor=divisor, pad_width_divisor=divisor, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
def get_transforms(stage: str = None, mode: str = None): if mode == 'train': return albumentations.Compose([ # transformations albumentations.ShiftScaleRotate( scale_limit=0.2, rotate_limit=25, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), # distortion albumentations.OneOf([ albumentations.OpticalDistortion(1.2, p=1.0), albumentations.GridDistortion( 8, 0.06, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), albumentations.ElasticTransform( sigma=10, alpha=1, alpha_affine=10, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0), ], p=3 / 4), # custom MorphologyGradient(binarize=True, p=1.0), # add noise albumentations.OneOf([ albumentations.GaussNoise((0, 150), p=1.0), albumentations.MultiplicativeNoise(p=1.0), ], p=2 / 3), # common albumentations.Normalize(TRAIN_MEAN, TRAIN_STD), GridMask(5, rotate=45, p=0.9), ToTensorV2(), ]) elif mode == 'valid': return albumentations.Compose([ MorphologyGradient(binarize=True, p=1.0), albumentations.Normalize(TRAIN_MEAN, TRAIN_STD), ToTensorV2(), ]) else: raise ValueError('mode is %s' % mode)
def get_train_transforms(): return A.Compose( [ A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.1, sat_shift_limit= 0.3, val_shift_limit=0.3, p=0.9), A.RandomBrightnessContrast(brightness_limit=0.4, contrast_limit=0.3, p=0.9), ],p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.Resize(height=512, width=512, p=1), A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'] ) )