def aug_mega_hardcore(p=.95): return Compose([ OneOf([CLAHE(clip_limit=2), IAASharpen(p=.25), IAAEmboss(p=.25)], p=.35), OneOf([ IAAAdditiveGaussianNoise(p=.3), GaussNoise(p=.7), ], p=.5), RandomRotate90(), Flip(), Transpose(), OneOf([ MotionBlur(p=.2), MedianBlur(blur_limit=3, p=.3), Blur(blur_limit=3, p=.5), ], p=.4), OneOf([ RandomContrast(p=.5), RandomBrightness(p=.5), ], p=.4), ShiftScaleRotate( shift_limit=.0, scale_limit=.45, rotate_limit=45, p=.7), OneOf([ OpticalDistortion(p=0.3), GridDistortion(p=0.2), ElasticTransform(p=.2), IAAPerspective(p=.2), IAAPiecewiseAffine(p=.3), ], p=.6), HueSaturationValue(p=.5) ], p=p)
def read_train_img(images_paths): images = [] gts = [] for image_path in images_paths: gt_path = image_path.replace('images', 'gt') image = tifffile.imread(image_path) gt = tifffile.imread(gt_path) # 数据扩充 h, w = image.shape[0], image.shape[1] aug = Compose([ VerticalFlip(p=0.5), RandomRotate90(p=0.5), HorizontalFlip(p=0.5), RandomSizedCrop(min_max_height=(128, 512), height=h, width=w, p=0.5) ]) augmented = aug(image=image, mask=gt) image = augmented['image'] gt = augmented['mask'] # 数据预处理 image = image / 255.0 gt_temp = gt.copy() gt[gt_temp == 255] = 1 gt = np.expand_dims(gt, axis=2) # gt = np_utils.to_categorical(gt, num_classes=1) images.append(image) gts.append(gt) return np.array(images), np.array(gts)
def __init__(self, data, img_size=384, aug=True, mode='train'): self.data = data self.mode = mode if mode is 'train': self.images = data.ImageId.unique() self._aug = Compose([ Flip(), RandomRotate90(), ShiftScaleRotate(), Normalize(), # Resize(256, 256), Resize(img_size, img_size), ]) elif mode is 'test' or mode is 'val': self.images = data.ImageId.unique() self._aug = Compose([ Normalize(), # Resize(256, 256), Resize(img_size, img_size), # PadIfNeeded(768, 768) ]) else: raise RuntimeError()
def get_aug(p=1.0): return Compose([ HorizontalFlip(), VerticalFlip(), RandomRotate90(), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=15, p=0.9, border_mode=cv2.BORDER_REFLECT), OneOf([ OpticalDistortion(p=0.3), GridDistortion(p=.1), IAAPiecewiseAffine(p=0.3), ], p=0.3), OneOf([ HueSaturationValue(10, 15, 10), CLAHE(clip_limit=2), RandomBrightnessContrast(), ], p=0.3), ], p=p)
def augmentation_hardcore(size_image, p=0.8): ''' Only use for second model About albumentation, p in compose mean the prob that all transform in Compose work ''' return Compose([ Resize(size_image, size_image), CenterCrop(height=200, width=200, p=0.5), Cutout(), RandomShadow(shadow_dimension=3), OneOf([ Flip(), VerticalFlip(), HorizontalFlip(), ], p=0.5), OneOf([ RandomRotate90(), Transpose(), ], p=0.5), OneOf([GaussNoise(), GaussianBlur(blur_limit=9), Blur()], p=0.5), OneOf([ HueSaturationValue( hue_shift_limit=10, sat_shift_limit=25, val_shift_limit=20), RGBShift(), RandomBrightness(brightness_limit=0.4), RandomContrast(), RandomBrightnessContrast(), ], p=0.5), OneOf([ShiftScaleRotate(), ElasticTransform(), RandomGridShuffle()], p=0.5) ], p=p)
def __getitem__(self, idx): label1 = int(self.df[idx][1]) c = str(self.df[idx][0]) image = cv2.imread(c) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = RandomRotate90()(image=image)['image'] image = Flip()(image=image)['image'] image = JpegCompression(quality_lower=9, quality_upper=10)(image=image)['image'] image = Transpose()(image=image)['image'] image = Downscale()(image=image)['image'] image = IAAAdditiveGaussianNoise()(image=image)['image'] image = Blur(blur_limit=7)(image=image)['image'] image = ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45)(image=image)['image'] image = IAAPiecewiseAffine()(image=image)['image'] image = RGBShift()(image=image)['image'] image = RandomBrightnessContrast()(image=image)['image'] image = HueSaturationValue()(image=image)['image'] image = transforms.ToPILImage()(image) if self.transform: image = self.transform(image) return image, label1
def create_train_transforms(conf): height = conf['crop_height'] width = conf['crop_width'] return Compose([ ShiftScaleRotate(shift_limit=0.2, scale_limit=0, rotate_limit=0), OneOf([ RandomSizedCrop(min_max_height=(int(height * 0.8), int( height * 1.2)), w2h_ratio=1., height=height, width=width, p=0.9), RandomCrop(height=height, width=width, p=0.1) ], p=1), Rotate(limit=10, p=0.2, border_mode=cv2.BORDER_CONSTANT, value=0), HorizontalFlip(), VerticalFlip(), RandomRotate90(), Transpose(), OneOf( [RGBShift(), RandomBrightnessContrast(), RandomGamma()], p=0.5), ])
def create_train_transforms(size): return Compose([ # ImageCompression(quality_lower=60, quality_upper=100, p=0.5), GaussNoise(p=0.1), GaussianBlur(blur_limit=3, p=0.05), HorizontalFlip(), RandomRotate90(), Resize(height=size[0], width=size[1]), PadIfNeeded(min_height=size[0], min_width=size[1], border_mode=cv2.BORDER_CONSTANT), OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7), ToGray(p=0.1), ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ])
def __init__( self, image_path='../../data/training/images/', groundtruth_path='../../data/training/groundtruth/', val_split=0.1, additional_images_path='../../additional_data_generation/additional_data/images/', additional_masks_path='../../additional_data_generation/additional_data/masks/' ): training_images = os.listdir(image_path) training_truths = os.listdir(groundtruth_path) # shuffling the images so to obtain a random train-test split zipped = list(zip(training_images, training_truths)) random.shuffle(zipped) training_images, training_truths = zip(*zipped) self.images = [] self.truths = [] self.validation_images = [] self.validation_truths = [] self.additional_images = [] self.additional_masks = [] self.treshold = 0.25 counter = int((val_split) * len(training_images)) print('Reading images...', flush=True) for i, t in tqdm(list(zip(training_images, training_truths))): if counter > 0 and val_split != 0.0: self.validation_images.append(imread(image_path + i)) self.validation_truths.append( rgb2gray(imread(groundtruth_path + t))) counter -= 1 self.images.append(imread(image_path + i)) self.truths.append(rgb2gray(imread(groundtruth_path + t))) print('Done!') additional_paths = [ p for p in list(os.listdir(additional_images_path)) if p.endswith('.png') ] random.shuffle(additional_paths) print('Reading additional data...', flush=True) for p in tqdm(additional_paths): self.additional_images.append(imread(additional_images_path + p)) self.additional_masks.append( rgb2gray(imread(additional_masks_path + p))) print('Done!') self.albument_p = .5 self.albumenters_1 = [ VerticalFlip(p=self.albument_p), HorizontalFlip(p=self.albument_p), RandomRotate90(p=self.albument_p), ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=self.albument_p) ] self.albumenters_2 = [ RandomContrast(limit=.6, p=self.albument_p), HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=self.albument_p), RandomBrightness(limit=0.2, p=self.albument_p) ]
def policy_transform(split, policies=None, size=224, per_image_norm=False, mean_std=None, **kwargs): means = np.array([127.5, 127.5, 127.5, 127.5]) stds = np.array([255.0, 255.0, 255.0, 255.0]) base_aug = Compose([ RandomRotate90(), Flip(), Transpose(), ]) if policies is None: policies = [] if isinstance(policies, str): with open(policies, 'r') as fid: policies = eval(fid.read()) policies = itertools.chain.from_iterable(policies) aug_list = [] for policy in policies: op_1, params_1 = policy[0] op_2, params_2 = policy[1] print('op_1 ', op_1, ' pa_1 ', params_1) print('op_2 ', op_2, ' pa_2 ', params_2) aug = Compose([ globals().get(op_1)(**params_1), globals().get(op_2)(**params_2), ]) aug_list.append(aug) print('len(aug_list):', len(aug_list)) resize = Resize(height=size, width=size, always_apply=True) def transform(image): image = np.array(image) if split == 'train': # image = base_aug(image=image)['image'] # if len(aug_list) > 0: # aug = random.choice(aug_list) # image = aug(image=image)['image'] # print('image shape ', image.shape) # image = resize(image=image)['image'] # image = misc.imresize(image, (size, size)).astype('float32') image = cv2.resize(image, (size, size)) transform = transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), # range [0, 255] -> [0.0,1.0] transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5]), transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5)) ]) image = transform(image) else: # if size != image.shape[0]: # image = resize(image=image)['image'] #image = misc.imresize(image, (size, size)).astype('float32') image = cv2.resize(image, (size, size)) transform = transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), # range [0, 255] -> [0.0,1.0] transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5]), transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5)) ]) image = transform(image) # image = image.astype(np.float32) # if per_image_norm: # mean = np.mean(image.reshape(-1, 3), axis=0) # std = np.std(image.reshape(-1, 3), axis=0) # image -= mean # image /= (std + 0.0000001) # else: # image -= means # image /= stds # image = np.transpose(image, (2, 0, 1)) return image return transform
if __name__ == '__main__': import matplotlib.pyplot as plt from tqdm import tqdm from albumentations import Compose, OneOf, Normalize from albumentations import HorizontalFlip, VerticalFlip, RandomRotate90, RandomScale, RandomCrop from albumentations.pytorch import ToTensorV2 dataset = Xview2( r'D:\DATA\xView2\train\images', r'D:\DATA\xView2\train\labels', transforms=Compose([ OneOf([ HorizontalFlip(True), VerticalFlip(True), RandomRotate90(True) ], p=0.5), # RandomDiscreteScale([0.75, 1.25, 1.5], p=0.5), RandomCrop(640, 640, True), Normalize(mean=(0.485, 0.456, 0.406, 0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225, 0.229, 0.224, 0.225), max_pixel_value=255), ToTensorV2(True), ]), include=('pre', 'post')).pairwise_mode() print(len(dataset)) a = dataset[1] print() # img, mask = dataset[4]
def train(model, cfg, model_cfg, start_epoch=0): cfg.batch_size = 28 if cfg.batch_size < 1 else cfg.batch_size cfg.val_batch_size = cfg.batch_size cfg.input_normalization = model_cfg.input_normalization crop_size = model_cfg.crop_size loss_cfg = edict() loss_cfg.instance_loss = SigmoidBinaryCrossEntropyLoss() loss_cfg.instance_loss_weight = 1.0 num_epochs = 120 num_masks = 1 train_augmentator = Compose([ Flip(), RandomRotate90(), ShiftScaleRotate(shift_limit=0.03, scale_limit=0, rotate_limit=(-3, 3), border_mode=0, p=0.75), PadIfNeeded( min_height=crop_size[0], min_width=crop_size[1], border_mode=0), RandomCrop(*crop_size), RandomBrightnessContrast(brightness_limit=(-0.25, 0.25), contrast_limit=(-0.15, 0.4), p=0.75), RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=0.75) ], p=1.0) val_augmentator = Compose([ PadIfNeeded( min_height=crop_size[0], min_width=crop_size[1], border_mode=0), RandomCrop(*crop_size) ], p=1.0) def scale_func(image_shape): return random.uniform(0.75, 1.25) points_sampler = MultiPointSampler(model_cfg.num_max_points, prob_gamma=0.7, merge_objects_prob=0.15, max_num_merged_objects=2) trainset = SBDDataset( cfg.SBD_PATH, split='train', num_masks=num_masks, augmentator=train_augmentator, points_from_one_object=False, input_transform=model_cfg.input_transform, min_object_area=80, keep_background_prob=0.0, image_rescale=scale_func, points_sampler=points_sampler, samples_scores_path='./models/sbd/sbd_samples_weights.pkl', samples_scores_gamma=1.25) valset = SBDDataset(cfg.SBD_PATH, split='val', augmentator=val_augmentator, num_masks=num_masks, points_from_one_object=False, input_transform=model_cfg.input_transform, min_object_area=80, image_rescale=scale_func, points_sampler=points_sampler) optimizer_params = {'lr': 5e-4, 'betas': (0.9, 0.999), 'eps': 1e-8} lr_scheduler = partial(torch.optim.lr_scheduler.MultiStepLR, milestones=[100], gamma=0.1) trainer = ISTrainer(model, cfg, model_cfg, loss_cfg, trainset, valset, optimizer_params=optimizer_params, lr_scheduler=lr_scheduler, checkpoint_interval=5, image_dump_interval=200, metrics=[AdaptiveIoU()], max_interactive_points=model_cfg.num_max_points) logger.info(f'Starting Epoch: {start_epoch}') logger.info(f'Total Epochs: {num_epochs}') for epoch in range(start_epoch, num_epochs): trainer.training(epoch) trainer.validation(epoch)
def policy_transform(split, policies=None, size=512, per_image_norm=False, mean_std=None, **kwargs): means = np.array([127.5, 127.5, 127.5, 127.5]) stds = np.array([255.0, 255.0, 255.0, 255.0]) base_aug = Compose([ RandomRotate90(), Flip(), Transpose(), ]) if policies is None: policies = [] if isinstance(policies, str): with open(policies, 'r') as fid: policies = eval(fid.read()) policies = itertools.chain.from_iterable(policies) aug_list = [] for policy in policies: op_1, params_1 = policy[0] op_2, params_2 = policy[1] print('op_1 ', op_1, ' pa_1 ', params_1) print('op_2 ', op_2, ' pa_2 ', params_2) aug = Compose([ globals().get(op_1)(**params_1), globals().get(op_2)(**params_2), ]) aug_list.append(aug) print('len(aug_list):', len(aug_list)) resize = Resize(height=size, width=size, always_apply=True) def transform(image): if split == 'train': image = base_aug(image=image)['image'] if len(aug_list) > 0: aug = random.choice(aug_list) image = aug(image=image)['image'] image = resize(image=image)['image'] else: if size != image.shape[0]: image = resize(image=image)['image'] image = image.astype(np.float32) if per_image_norm: mean = np.mean(image.reshape(-1, 4), axis=0) std = np.std(image.reshape(-1, 4), axis=0) image -= mean image /= (std + 0.0000001) else: image -= means image /= stds image = np.transpose(image, (2, 0, 1)) return image return transform
from albumentations import Compose, Resize, RandomCrop, Flip, HorizontalFlip, VerticalFlip, Transpose, RandomRotate90, \ ShiftScaleRotate, OneOf, OpticalDistortion from albumentations.pytorch import ToTensor train_aug = Compose([ RandomCrop(height=96, width=96, p=0.2), OneOf([ VerticalFlip(p=0.2), HorizontalFlip(p=0.3), Transpose(p=0.2), RandomRotate90(p=0.2), ], p=0.3), ShiftScaleRotate(p=0.2), OpticalDistortion(p=0.2), Resize(128, 128, always_apply=True), ToTensor() ]) valid_aug = Compose([Resize(128, 128, always_apply=True), ToTensor()])
def __getitem__(self, idx): if len(self.channels) < 2: raise Exception('You have to specify at least two channels.') data_info_row = self.df.iloc[idx] instance_name = '_'.join( [data_info_row['name'], data_info_row['position']]) images_array, masks_array = [], [] for k in range(1, self.num_images + 1): image_path = get_filepath(self.dataset_path, data_info_row['dataset_folder'], self.images_folder, instance_name + f'_{k}', file_type=self.image_type) img = filter_by_channels(read_tensor(image_path), self.channels, 1) images_array.append(img) mask_path = get_filepath(self.dataset_path, data_info_row['dataset_folder'], self.masks_folder, instance_name, file_type=self.mask_type) masks_array = read_tensor(mask_path) if self.phase == 'train': aug = Compose([ RandomRotate90(), Flip(), OneOf( [ RandomSizedCrop(min_max_height=(int( self.image_size * 0.7), self.image_size), height=self.image_size, width=self.image_size), RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15), #MedianBlur(blur_limit=3, p=0.2), MaskDropout(p=0.6), ElasticTransform(alpha=15, sigma=5, alpha_affine=5), GridDistortion(p=0.6) ], p=0.8), ToTensor() ]) else: aug = ToTensor() ''' keys = ['image'] values = [images_array[0]] for k in range(self.num_images-1): keys.append(f'image{k}') values.append(images_array[k+1]) keys.append('mask') values.append(masks_array) #{"image" : images_array[0], "image2" : images_array[1], ..., "mask": masks_array, ...} aug_input = { keys[i] : values[i] for i in range(len(keys)) } augmented = aug(**aug_input) augmented_images = [augmented['image']] for k in range(self.num_images-1): augmented_images.append(np.transpose(augmented[f'image{k}'], ( 2, 0, 1))/255) augmented_masks = [augmented['mask']] return {'features': augmented_images, 'targets': augmented_masks, 'name': data_info_row['name'], 'position': data_info_row['position']} ''' augmented = aug(image=np.concatenate( (images_array[0], images_array[1]), axis=-1), mask=masks_array) augmented_images = [ augmented['image'][:count_channels(self.channels), :, :], augmented['image'][count_channels(self.channels):, :, :] ] augmented_masks = [augmented['mask']] return { 'features': augmented_images, 'targets': augmented_masks, 'name': data_info_row['name'], 'position': data_info_row['position'] }
import re import os import glob import numpy as np import argparse import matplotlib.pyplot as plt from albumentations import (HorizontalFlip, VerticalFlip, Transpose, RandomRotate90) from PIL import Image NUM_LABELS = 12 AUGMENTS = [ RandomRotate90(p=1), HorizontalFlip(p=1), VerticalFlip(p=1), Transpose(p=1) ] parser = argparse.ArgumentParser() parser.add_argument('--dataroot', required=True, help='expriments results path') opt = parser.parse_args() def read(index, ext): file_name = f'{index}.{ext}' img_path = os.path.join(opt.dataroot, 'train', file_name)
from albumentations import Compose, ShiftScaleRotate, PadIfNeeded, RandomCrop, Resize, RandomSizedCrop, CLAHE, \ RandomRotate90, Flip, OneOf, MotionBlur, MedianBlur, Blur, CenterCrop, LongestMaxSize, HorizontalFlip, VerticalFlip, \ Transpose from albumentations.pytorch import ToTensor transform_train = Compose([ RandomRotate90(0.2), Flip(p=0.2), ShiftScaleRotate(), OneOf([ MotionBlur(p=.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ToTensor() ]) transform_test = Compose([ToTensor()]) IMG_SIZE_RAW = 224 RAW_CROP_SIZE = 448 transform_train_raw = Compose([ Resize(RAW_CROP_SIZE, RAW_CROP_SIZE), # CenterCrop(width=IMG_SIZE_RAW, height=IMG_SIZE_RAW), RandomRotate90(0.2), Flip(p=0.2), ShiftScaleRotate(), ToTensor() ])
def __init__(self, config): super(AugmentedPair3, self).__init__(config) additional_targets = { "image{}".format(i): "image" for i in range(1, self.n_images) } p = 0.9 appearance_augmentation = Compose( [ OneOf( [ MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.5, ), OneOf( [ RandomBrightnessContrast(p=0.3), RGBShift(p=0.3), HueSaturationValue(p=0.3), ], p=0.8, ), OneOf( [ RandomBrightnessContrast(p=0.3), RGBShift(p=0.3), HueSaturationValue(p=0.3), ], p=0.8, ), OneOf( [ RandomBrightnessContrast(p=0.3), RGBShift(p=0.3), HueSaturationValue(p=0.3), ], p=0.8, ), ToGray(p=0.1), ChannelShuffle(p=0.3), ], p=p, additional_targets=additional_targets, ) self.appearance_augmentation = appearance_augmentation p = 0.9 shape_augmentation = Compose( [ OneOf([ Transpose(p=0.5), HorizontalFlip(p=0.5), ], p=0.9), OneOf([ RandomRotate90(p=1.0), ], p=0.9), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.25, rotate_limit=25, p=0.3, border_mode=cv2.BORDER_REPLICATE), OneOf([ # OpticalDistortion(p=0.3), # GridDistortion(p=0.1), IAAPiecewiseAffine(p=0.5), ElasticTransform(p=0.5, border_mode=cv2.BORDER_REPLICATE) ], p=0.3), ], p=p, additional_targets=additional_targets, ) self.shape_augmentation = shape_augmentation
def __init__(self, prob): self.horizontal = HorizontalFlip(p=prob) self.vertical = VerticalFlip(p=prob) self.rotate = RandomRotate90(p=prob) self.transpose = Transpose(p=prob)
def __getitem__(self, idx): if len(self.channels) < 2: raise Exception('You have to specify at least two channels.') data_info_row = self.df.iloc[idx] instance_name = '_'.join( [data_info_row['name'], data_info_row['position']]) images_array, masks_array = [], [] #for k in range(1,self.num_images+1): for k in range(self.num_images, 0, -1): image_path = get_filepath(self.dataset_path, data_info_row['dataset_folder'], self.images_folder, instance_name + f'_{k}', file_type=self.image_type) img = filter_by_channels(read_tensor(image_path), self.channels, 1) images_array.append(img) mask_path = get_filepath(self.dataset_path, data_info_row['dataset_folder'], self.masks_folder, instance_name + f'_{k}', file_type=self.mask_type) msk = read_tensor(mask_path) masks_array.append(np.expand_dims(msk, axis=-1)) aug = Compose([ RandomRotate90(), Flip(), OneOf([ RandomSizedCrop(min_max_height=(int( self.image_size * 0.7), self.image_size), height=self.image_size, width=self.image_size), RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15), ElasticTransform(alpha=15, sigma=5, alpha_affine=5), GridDistortion(p=0.6) ], p=0.8), ToTensor() ]) augmented = aug(image=np.concatenate(images_array, axis=-1), mask=np.concatenate(masks_array, axis=-1)) augmented_images = torch.stack([ augmented['image'][num_img * count_channels(self.channels):(num_img + 1) * count_channels(self.channels), :, :] for num_img in range(self.num_images) ]) if self.all_masks: augmented_masks = torch.stack([ augmented['mask'][:, :, :, i] for i in range(augmented['mask'].shape[-1]) ]).squeeze() else: augmented_masks = torch.stack([augmented['mask'][:, :, :, -1]]) return { 'features': augmented_images, 'targets': augmented_masks, 'name': data_info_row['name'], 'position': data_info_row['position'] }
HorizontalFlip, VerticalFlip, CenterCrop, Compose, RandomRotate90, RandomBrightnessContrast, Normalize, RandomCrop, Blur ) channel_max = [92.87763, 91.97153, 91.65466, 91.9873] transform_rgb = { 'train': Compose( [RandomCrop(512, 512, p=1.0), HorizontalFlip(p=.5), VerticalFlip(p=.5), RandomRotate90(p=0.5), RandomBrightnessContrast(p=0.3), Blur(p=0.3), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensor(), ]), 'valid': Compose( [CenterCrop(512, 512), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ToTensor()]), 'inference': Compose( [ Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
def __init__(self, imgs: Sequence[str] = None, suffix: str = '.path', line_width: int = 4, im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose( []), mode: str = 'path', augmentation: bool = False, valid_baselines: Sequence[str] = None, merge_baselines: Dict[str, Sequence[str]] = None, valid_regions: Sequence[str] = None, merge_regions: Dict[str, Sequence[str]] = None): """ Reads a list of image-json pairs and creates a data set. Args: imgs (list): suffix (int): Suffix to attach to image base name to load JSON files from. line_width (int): Height of the baseline in the scaled input. target_size (tuple): Target size of the image as a (height, width) tuple. mode (str): Either path, alto, page, xml, or None. In alto, page, and xml mode the baseline paths and image data is retrieved from an ALTO/PageXML file. In `None` mode data is iteratively added through the `add` method. augmentation (bool): Enable/disable augmentation. valid_baselines (list): Sequence of valid baseline identifiers. If `None` all are valid. merge_baselines (dict): Sequence of baseline identifiers to merge. Note that merging occurs after entities not in valid_* have been discarded. valid_regions (list): Sequence of valid region identifiers. If `None` all are valid. merge_regions (dict): Sequence of region identifiers to merge. Note that merging occurs after entities not in valid_* have been discarded. """ super().__init__() self.mode = mode self.im_mode = '1' self.aug = None self.targets = [] # n-th entry contains semantic of n-th class self.class_mapping = { 'aux': { '_start_separator': 0, '_end_separator': 1 }, 'baselines': {}, 'regions': {} } self.class_stats = { 'baselines': defaultdict(int), 'regions': defaultdict(int) } self.num_classes = 2 self.mbl_dict = merge_baselines if merge_baselines is not None else {} self.mreg_dict = merge_regions if merge_regions is not None else {} self.valid_baselines = valid_baselines self.valid_regions = valid_regions if mode in ['alto', 'page', 'xml']: if mode == 'alto': fn = parse_alto elif mode == 'page': fn = parse_page elif mode == 'xml': fn = parse_xml im_paths = [] self.targets = [] for img in imgs: try: data = fn(img) im_paths.append(data['image']) lines = defaultdict(list) for line in data['lines']: if valid_baselines is None or line[ 'script'] in valid_baselines: lines[self.mbl_dict.get(line['script'], line['script'])].append( line['baseline']) self.class_stats['baselines'][self.mbl_dict.get( line['script'], line['script'])] += 1 regions = defaultdict(list) for k, v in data['regions'].items(): if valid_regions is None or k in valid_regions: regions[self.mreg_dict.get(k, k)].extend(v) self.class_stats['regions'][self.mreg_dict.get( k, k)] += len(v) data['regions'] = regions self.targets.append({ 'baselines': lines, 'regions': data['regions'] }) except KrakenInputException as e: logger.warning(e) continue # get line types imgs = im_paths # calculate class mapping line_types = set() region_types = set() for page in self.targets: for line_type in page['baselines'].keys(): line_types.add(line_type) for reg_type in page['regions'].keys(): region_types.add(reg_type) idx = -1 for idx, line_type in enumerate(line_types): self.class_mapping['baselines'][ line_type] = idx + self.num_classes self.num_classes += idx + 1 idx = -1 for idx, reg_type in enumerate(region_types): self.class_mapping['regions'][ reg_type] = idx + self.num_classes self.num_classes += idx + 1 elif mode == 'path': pass elif mode is None: imgs = [] else: raise Exception('invalid dataset mode') if augmentation: from albumentations import ( Compose, ToFloat, FromFloat, RandomRotate90, Flip, OneOf, MotionBlur, MedianBlur, Blur, ShiftScaleRotate, OpticalDistortion, ElasticTransform, RandomBrightnessContrast, HueSaturationValue, ) self.aug = Compose([ ToFloat(), RandomRotate90(), Flip(), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2), OneOf([ OpticalDistortion(p=0.3), ElasticTransform(p=0.1), ], p=0.2), HueSaturationValue(hue_shift_limit=20, sat_shift_limit=0.1, val_shift_limit=0.1, p=0.3), ], p=0.5) self.imgs = imgs self.line_width = line_width # split image transforms into two. one part giving the final PIL image # before conversion to a tensor and the actual tensor conversion part. self.head_transforms = transforms.Compose(im_transforms.transforms[:2]) self.tail_transforms = transforms.Compose(im_transforms.transforms[2:]) self.seg_type = None
# gray norm_mean = [0.46152964] norm_std = [0.10963361] # RGB # norm_mean = [0.4976264, 0.45133978, 0.3993562] # norm_std = [0.11552592, 0.10886826, 0.10727626] # Albumentations train_Transform = Compose( [ Resize(height=args.inputsize, width=args.inputsize, interpolation=Image.NEAREST, p=1), RandomRotate90(0.5), Flip(p=0.5), ShiftScaleRotate(p=0.2, interpolation=Image.NEAREST ), # , border_mode=cv2.BORDER_CONSTANT, value=0 # Normalize(mean=norm_mean, std=norm_std), ], p=1.0) valid_Transform = Compose( [ Resize(height=args.inputsize, width=args.inputsize, interpolation=Image.NEAREST, p=1), # Normalize(mean=norm_mean, std=norm_std), ],
def train(inputs, working_dir, fold_id): start_epoch, step = 0, 0 # TopCoder num_workers, batch_size = 8, 4 * 8 gpus = [0, 1, 2, 3] # My machine # num_workers, batch_size = 8, 2 * 3 # gpus = [0, 1] patience, n_epochs = 8, 150 lr, min_lr, lr_update_rate = 1e-4, 5e-5, 0.5 training_timelimit = 60 * 60 * 24 * 2 # 2 days st_time = time.time() model = unet_vgg16(pretrained=True) model = nn.DataParallel(model, device_ids=gpus).cuda() train_transformer = Compose([ HorizontalFlip(p=0.5), RandomRotate90(p=0.5), RandomCrop(512, 512, p=1.0), Normalize(), ], p=1.0) val_transformer = Compose([ CenterCrop(512, 512, p=1.0), Normalize(), ], p=1.0) # train/val loadrs df_cvfolds = read_cv_splits(inputs) trn_loader, val_loader = make_train_val_loader(train_transformer, val_transformer, df_cvfolds, fold_id, batch_size, num_workers) # train criterion = binary_loss(jaccard_weight=0.25) optimizer = Adam(model.parameters(), lr=lr) report_epoch = 10 model_name = f'v12_f{fold_id}' fh = open_log(model_name) # vers for early stopping best_score = 0 not_improved_count = 0 for epoch in range(start_epoch, n_epochs): model.train() tl = trn_loader # alias trn_metrics = Metrics() try: tq = tqdm.tqdm(total=(len(tl) * trn_loader.batch_size)) tq.set_description(f'Ep{epoch:>3d}') for i, (inputs, targets, labels, names) in enumerate(trn_loader): inputs = inputs.cuda() targets = targets.cuda() outputs = model(inputs) loss = criterion(outputs, targets) optimizer.zero_grad() # Increment step counter batch_size = inputs.size(0) loss.backward() optimizer.step() step += 1 tq.update(batch_size) # Update eval metrics trn_metrics.loss.append(loss.item()) trn_metrics.bce.append(criterion._stash_bce_loss.item()) trn_metrics.jaccard.append(criterion._stash_jaccard.item()) if i > 0 and i % report_epoch == 0: report_metrics = Bunch( epoch=epoch, step=step, trn_loss=np.mean(trn_metrics.loss[-report_epoch:]), trn_bce=np.mean(trn_metrics.bce[-report_epoch:]), trn_jaccard=np.mean( trn_metrics.jaccard[-report_epoch:]), ) write_event(fh, **report_metrics) tq.set_postfix(loss=f'{report_metrics.trn_loss:.5f}', bce=f'{report_metrics.trn_bce:.5f}', jaccard=f'{report_metrics.trn_jaccard:.5f}') # End of epoch report_metrics = Bunch( epoch=epoch, step=step, trn_loss=np.mean(trn_metrics.loss[-report_epoch:]), trn_bce=np.mean(trn_metrics.bce[-report_epoch:]), trn_jaccard=np.mean(trn_metrics.jaccard[-report_epoch:]), ) write_event(fh, **report_metrics) tq.set_postfix(loss=f'{report_metrics.trn_loss:.5f}', bce=f'{report_metrics.trn_bce:.5f}', jaccard=f'{report_metrics.trn_jaccard:.5f}') tq.close() save(model, epoch, step, model_name) # Run validation val_metrics = validation(model, criterion, val_loader, epoch, step, fh) report_val_metrics = Bunch( epoch=epoch, step=step, val_loss=np.mean(val_metrics.loss[-report_epoch:]), val_bce=np.mean(val_metrics.bce[-report_epoch:]), val_jaccard=np.mean(val_metrics.jaccard[-report_epoch:]), ) write_event(fh, **report_val_metrics) if time.time() - st_time > training_timelimit: tq.close() break if best_score < report_val_metrics.val_jaccard: best_score = report_val_metrics.val_jaccard not_improved_count = 0 copy_best(model, epoch, model_name, step) else: not_improved_count += 1 if not_improved_count >= patience: # Update learning rate and optimizer lr *= lr_update_rate # Stop criterion if lr < min_lr: tq.close() break not_improved_count = 0 # Load best weight del model model = unet_vgg16(pretrained=False) path = f'/root/working/models/{model_name}/{model_name}_best' cp = torch.load(path) model = nn.DataParallel(model).cuda() epoch = cp['epoch'] model.load_state_dict(cp['model']) model = model.module model = nn.DataParallel(model, device_ids=gpus).cuda() # Init optimizer optimizer = Adam(model.parameters(), lr=lr) except KeyboardInterrupt: save(model, epoch, step, model_name) tq.close() fh.close() sys.exit(1) except Exception as e: raise e break fh.close()
RandomGamma, ) transformimg = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize( mean=[0.485, 0.456, 0.406][::-1], std=[0.225, 0.224, 0.225][::-1] ), ] ) transformaug = Compose( [ VerticalFlip(p=0.5), RandomRotate90(p=0.5), ISONoise(p=0.5), RandomBrightnessContrast(p=0.5), RandomGamma(p=0.5), RandomFog(fog_coef_lower=0.025, fog_coef_upper=0.1, p=0.5), ] ) class XViewDataset(Dataset): def __init__( self, size=None, aug=True, pattern="data/train/images1024/*pre_disaster*.png" ): self.name = "train" self.aug = aug self.pre = glob(pattern)
def augment_data(images, masks, save_path, augment=True): """ Performing data augmentation. """ crop_size = (256, 256) size = (2018, 2006) # 将数据与标签组合 for image, mask in tqdm(zip(images, masks), total=len(images)): image_name = image.split("/")[-1].split(".")[0] mask_name = mask.split("/")[-1].split(".")[0] x, y = read_data(image, mask) # try except 使用 try: h, w, c = x.shape # 获取图像的 w h z except Exception as e: image = image[:-1] x, y = read_data(image, mask) h, w, c = x.shape # 进行数据增强 if augment == True: # Center Crop aug = CenterCrop(p=1, height=crop_size[1], width=crop_size[0]) augmented = aug(image=x, mask=y) x1 = augmented['image'] y1 = augmented['mask'] # Crop x_min = 0 y_min = 0 x_max = x_min + crop_size[0] y_max = y_min + crop_size[1] aug = Crop(p=1, x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max) augmented = aug(image=x, mask=y) x2 = augmented['image'] y2 = augmented['mask'] # Random Rotate 90 degree aug = RandomRotate90(p=1) augmented = aug(image=x, mask=y) x3 = augmented['image'] y3 = augmented['mask'] # Transpose aug = Transpose(p=1) augmented = aug(image=x, mask=y) x4 = augmented['image'] y4 = augmented['mask'] # ElasticTransform aug = ElasticTransform(p=1, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03) augmented = aug(image=x, mask=y) x5 = augmented['image'] y5 = augmented['mask'] # Grid Distortion aug = GridDistortion(p=1) augmented = aug(image=x, mask=y) x6 = augmented['image'] y6 = augmented['mask'] # Optical Distortion aug = OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5) augmented = aug(image=x, mask=y) x7 = augmented['image'] y7 = augmented['mask'] # Vertical Flip aug = VerticalFlip(p=1) augmented = aug(image=x, mask=y) x8 = augmented['image'] y8 = augmented['mask'] # Horizontal Flip aug = HorizontalFlip(p=1) augmented = aug(image=x, mask=y) x9 = augmented['image'] y9 = augmented['mask'] # Grayscale x10 = cv2.cvtColor(x, cv2.COLOR_RGB2GRAY) y10 = y # Grayscale Vertical Flip aug = VerticalFlip(p=1) augmented = aug(image=x10, mask=y10) x11 = augmented['image'] y11 = augmented['mask'] # Grayscale Horizontal Flip aug = HorizontalFlip(p=1) augmented = aug(image=x10, mask=y10) x12 = augmented['image'] y12 = augmented['mask'] # Grayscale Center Crop aug = CenterCrop(p=1, height=crop_size[1], width=crop_size[0]) augmented = aug(image=x10, mask=y10) x13 = augmented['image'] y13 = augmented['mask'] # Random Brightness Contrast aug = RandomBrightnessContrast(p=1) augmented = aug(image=x, mask=y) x14 = augmented['image'] y14 = augmented['mask'] # Random Gamma aug = RandomGamma(p=1) augmented = aug(image=x, mask=y) x15 = augmented['image'] y15 = augmented['mask'] aug = HueSaturationValue(p=1) augmented = aug(image=x, mask=y) x16 = augmented['image'] y16 = augmented['mask'] aug = RGBShift(p=1) augmented = aug(image=x, mask=y) x17 = augmented['image'] y17 = augmented['mask'] aug = RandomBrightness(p=1) augmented = aug(image=x, mask=y) x18 = augmented['image'] y18 = augmented['mask'] aug = RandomContrast(p=1) augmented = aug(image=x, mask=y) x19 = augmented['image'] y19 = augmented['mask'] aug = MotionBlur(p=1, blur_limit=7) augmented = aug(image=x, mask=y) x20 = augmented['image'] y20 = augmented['mask'] aug = MedianBlur(p=1, blur_limit=10) augmented = aug(image=x, mask=y) x21 = augmented['image'] y21 = augmented['mask'] aug = GaussianBlur(p=1, blur_limit=10) augmented = aug(image=x, mask=y) x22 = augmented['image'] y22 = augmented['mask'] aug = GaussNoise(p=1) augmented = aug(image=x, mask=y) x23 = augmented['image'] y23 = augmented['mask'] aug = ChannelShuffle(p=1) augmented = aug(image=x, mask=y) x24 = augmented['image'] y24 = augmented['mask'] aug = CoarseDropout(p=1, max_holes=8, max_height=32, max_width=32) augmented = aug(image=x, mask=y) x25 = augmented['image'] y25 = augmented['mask'] images = [ x, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25 ] masks = [ y, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14, y15, y16, y17, y18, y19, y20, y21, y22, y23, y24, y25 ] else: images = [x] masks = [y] idx = 0 # 数据增强之后数据 for i, m in zip(images, masks): i = cv2.resize(i, size) m = cv2.resize(m, size) tmp_image_name = f"{image_name}_{idx}.jpg" tmp_mask_name = f"{mask_name}_{idx}.jpg" image_path = os.path.join(save_path, "image/", tmp_image_name) mask_path = os.path.join(save_path, "mask/", tmp_mask_name) # 保存数据 cv2.imwrite(image_path, i) cv2.imwrite(mask_path, m) idx += 1
from albumentations import (RandomRotate90, Transpose, Flip, Compose, Resize, Normalize) from albumentations.pytorch import ToTensor SIZE = 320 p = 0.4 train_aug = Compose([ RandomRotate90(), Flip(), Transpose(), Resize(width=SIZE, height=SIZE, always_apply=True), ToTensor(normalize={ 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225] }) ], p=p) valid_aug = Compose([ Resize(width=SIZE, height=SIZE, always_apply=True), ToTensor(normalize={ 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225] }), ], p=1.0)
def get_data(img_size, batch_size): CSV_PATH = DATA_PATH / 'train_v2.csv' IMG_FOLDER = DATA_PATH / 'train-jpg' EXT = 'jpg' SZ = img_size BS = batch_size MEAN, STD = np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]) # torch transforms # transform = transforms.Compose([ # transforms.Resize(SZ), # transforms.ToTensor(), # transforms.Normalize(MEAN, STD) # ]) transform = { 'train': Compose([ Resize(height=SZ, width=SZ), CLAHE(clip_limit=1.0, p=0.25), Flip(p=0.5), RandomRotate90(p=0.5), RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5), ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=0), RGBShift(p=0.25), Normalize(mean=MEAN, std=STD), ToTensor() ]), 'val': Compose([ Resize(height=SZ, width=SZ), Flip(p=0.5), RandomRotate90(p=0.5), Normalize(mean=MEAN, std=STD), ToTensor() ]) } train_ds = PlanetDataset(CSV_PATH / 'train.csv', IMG_FOLDER, EXT, transform['train']) val_ds = PlanetDataset(CSV_PATH / 'val.csv', IMG_FOLDER, EXT, transform['val'], val=True) train_dl = DataLoader(train_ds, batch_size=BS, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) val_dl = DataLoader(val_ds, batch_size=BS * 2, shuffle=False, num_workers=4, pin_memory=True, drop_last=True) # Show the details in the console print(f'''Train DS: {train_ds.img_folder} \t \ Ext: {train_ds.ext} \t \ x_train: {train_ds.x_train.shape} \t \ y_train: {train_ds.y_train.shape} \t''') print(f'''Validation DS: {val_ds.img_folder} \t \ Ext: {val_ds.ext} \t \ x_train: {val_ds.x_train.shape} \t \ y_train: {val_ds.y_train.shape} \t''') return (train_dl, val_dl)
def augment_data(images, masks, save_path, augment=True): """ Performing data augmentation. """ size = (512, 512) crop_size = (448, 448) for idx, (x, y) in tqdm(enumerate(zip(images, masks)), total=len(images)): image_name = x.split("/")[-1].split(".")[0] mask_name = y.split("/")[-1].split(".")[0] x = cv2.imread(x, cv2.IMREAD_COLOR) y = cv2.imread(y, cv2.IMREAD_COLOR) if x.shape[0] >= size[0] and x.shape[1] >= size[1]: if augment == True: ## Crop x_min = 0 y_min = 0 x_max = x_min + size[0] y_max = y_min + size[1] aug = Crop(p=1, x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max) augmented = aug(image=x, mask=y) x1 = augmented['image'] y1 = augmented['mask'] # Random Rotate 90 degree aug = RandomRotate90(p=1) augmented = aug(image=x, mask=y) x2 = augmented['image'] y2 = augmented['mask'] ## ElasticTransform aug = ElasticTransform(p=1, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03) augmented = aug(image=x, mask=y) x3 = augmented['image'] y3 = augmented['mask'] ## Grid Distortion aug = GridDistortion(p=1) augmented = aug(image=x, mask=y) x4 = augmented['image'] y4 = augmented['mask'] ## Optical Distortion aug = OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5) augmented = aug(image=x, mask=y) x5 = augmented['image'] y5 = augmented['mask'] ## Vertical Flip aug = VerticalFlip(p=1) augmented = aug(image=x, mask=y) x6 = augmented['image'] y6 = augmented['mask'] ## Horizontal Flip aug = HorizontalFlip(p=1) augmented = aug(image=x, mask=y) x7 = augmented['image'] y7 = augmented['mask'] ## Grayscale x8 = cv2.cvtColor(x, cv2.COLOR_RGB2GRAY) y8 = y ## Grayscale Vertical Flip aug = VerticalFlip(p=1) augmented = aug(image=x8, mask=y8) x9 = augmented['image'] y9 = augmented['mask'] ## Grayscale Horizontal Flip aug = HorizontalFlip(p=1) augmented = aug(image=x8, mask=y8) x10 = augmented['image'] y10 = augmented['mask'] # aug = RandomBrightnessContrast(p=1) # augmented = aug(image=x, mask=y) # x11 = augmented['image'] # y11 = augmented['mask'] # # aug = RandomGamma(p=1) # augmented = aug(image=x, mask=y) # x12 = augmented['image'] # y12 = augmented['mask'] # # aug = HueSaturationValue(p=1) # augmented = aug(image=x, mask=y) # x13 = augmented['image'] # y13 = augmented['mask'] aug = RGBShift(p=1) augmented = aug(image=x, mask=y) x14 = augmented['image'] y14 = augmented['mask'] # aug = RandomBrightness(p=1) # augmented = aug(image=x, mask=y) # x15 = augmented['image'] # y15 = augmented['mask'] # # aug = RandomContrast(p=1) # augmented = aug(image=x, mask=y) # x16 = augmented['image'] # y16 = augmented['mask'] aug = ChannelShuffle(p=1) augmented = aug(image=x, mask=y) x17 = augmented['image'] y17 = augmented['mask'] aug = CoarseDropout(p=1, max_holes=10, max_height=32, max_width=32) augmented = aug(image=x, mask=y) x18 = augmented['image'] y18 = augmented['mask'] aug = GaussNoise(p=1) augmented = aug(image=x, mask=y) x19 = augmented['image'] y19 = augmented['mask'] # aug = MotionBlur(p=1, blur_limit=7) # augmented = aug(image=x, mask=y) # x20 = augmented['image'] # y20 = augmented['mask'] # # aug = MedianBlur(p=1, blur_limit=11) # augmented = aug(image=x, mask=y) # x21 = augmented['image'] # y21 = augmented['mask'] # # aug = GaussianBlur(p=1, blur_limit=11) # augmented = aug(image=x, mask=y) # x22 = augmented['image'] # y22 = augmented['mask'] ## aug = CenterCrop(256, 256, p=1) augmented = aug(image=x, mask=y) x23 = augmented['image'] y23 = augmented['mask'] aug = CenterCrop(384, 384, p=1) augmented = aug(image=x, mask=y) x24 = augmented['image'] y24 = augmented['mask'] aug = CenterCrop(448, 448, p=1) augmented = aug(image=x, mask=y) x25 = augmented['image'] y25 = augmented['mask'] ## x23 Vertical Flip aug = VerticalFlip(p=1) augmented = aug(image=x23, mask=y23) x26 = augmented['image'] y26 = augmented['mask'] ## x23 Horizontal Flip aug = HorizontalFlip(p=1) augmented = aug(image=x23, mask=y23) x27 = augmented['image'] y27 = augmented['mask'] ## x24 Vertical Flip aug = VerticalFlip(p=1) augmented = aug(image=x24, mask=y24) x28 = augmented['image'] y28 = augmented['mask'] ## x24 Horizontal Flip aug = HorizontalFlip(p=1) augmented = aug(image=x24, mask=y24) x29 = augmented['image'] y29 = augmented['mask'] ## x25 Vertical Flip aug = VerticalFlip(p=1) augmented = aug(image=x25, mask=y25) x30 = augmented['image'] y30 = augmented['mask'] ## x25 Horizontal Flip aug = HorizontalFlip(p=1) augmented = aug(image=x25, mask=y25) x31 = augmented['image'] y31 = augmented['mask'] images = [ x, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, # x11, x12, x13, x14, # x15, x16, x17, x18, x19, # x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ] masks = [ y, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, # y11, y12, y13, y14, # y15, y16, y17, y18, y19, # y20, y21, y22, y23, y24, y25, y26, y27, y28, y29, y30, y31 ] else: images = [x] masks = [y] idx = 0 for i, m in zip(images, masks): i = cv2.resize(i, size) m = cv2.resize(m, size) if len(images) == 1: tmp_image_name = f"{image_name}.jpg" tmp_mask_name = f"{mask_name}.jpg" else: tmp_image_name = f"{image_name}_{idx}.jpg" tmp_mask_name = f"{mask_name}_{idx}.jpg" image_path = os.path.join(save_path, "image/", tmp_image_name) mask_path = os.path.join(save_path, "mask/", tmp_mask_name) cv2.imwrite(image_path, i) cv2.imwrite(mask_path, m) idx += 1
def train(file_pattern, train_num_batches=None, train_aug=False, train_batch_size=1, val_batch_size=1, learning_rate=1e-3, epochs=1, verbosity=2, file_directory=None, resume=None, train_shuffle=True, pre_image_mean=None, post_image_mean=None): """ Function to train the UNet model Parameters ---------- file_pattern : string Location where the image folder is for the data. Example format: "images/*pre_disaster*.png" train_num_batches : int Number of batches for the training set, if none, the full dataset will be used. train_aug : bool If true, augmentations are performed. train_batch_size : int, default 5 Batch size for the training set. val_batch_size : int, default 5 Batch size for the validation set. learning_rate : float, default 0.00001 Learning rate for the UNet. epochs : int, default 1 How many epochs for the training to run. verbosity : int, default 2 How verbose you'd like the output to be. file_directory : string, default None: Directory where you'd like the output files saved. resume : string, default None Enter in a string for the saved model file and training will resume from this instance. train_shuffle : bool If True, the training data is shuffled for each epoch. pre_image_mean : str The filepath for the pre image mean numpy array file. post_image_mean : str The filepath for the post image mean numpy array file. Returns ------- Saves the model weights, csv logs, and tensorboard files in the original directories specified. """ if file_directory is None: file_directory = os.path.abspath( os.path.join(os.getcwd(), "saved_models")) tensorboard_path = os.path.join( file_directory, "logs", "tboard_{}".format(datetime.datetime.now().strftime("%Y%m%d-%H%M"))) weights_path = os.path.join( file_directory, "unet_weights_{}".format( datetime.datetime.now().strftime("%Y%m%d-%H%M"))) csv_logger_path = os.path.join( file_directory, "log_unet_{}{}".format(datetime.datetime.now().strftime("%Y%m%d-%H%M"), ".csv")) if train_aug: train_augs = Compose([ VerticalFlip(p=0.5), RandomRotate90(p=0.5), ISONoise(p=0.5), RandomBrightnessContrast(p=0.5), RandomGamma(p=0.5), RandomFog(fog_coef_lower=0.025, fog_coef_upper=0.1, p=0.5), ]) else: train_augs = None # Weighted categorical cross entropy weights # class_weights = tf.constant([0.1, 1.0, 2.0, 2.0, 2.0]) # class_weights = tf.constant([1.0, 1.0, 0.5, 0.5, 0.5]) class_weights = tf.constant([1.0, 1.0, 3.0, 3.0, 3.0]) train_data = LabeledImageDataset(num_batches=train_num_batches, augmentations=train_augs, pattern=file_pattern, shuffle=train_shuffle, n_classes=5, batch_size=train_batch_size, normalize=True) # Using random samples from train for validation val_data = LabeledImageDataset(num_batches=100, augmentations=train_augs, pattern=file_pattern, shuffle=train_shuffle, n_classes=5, batch_size=val_batch_size, normalize=True) if resume: try: print("the pretrained model was loaded") model = UNet(num_classes=5).model((None, None, 3)) model.load_weights(resume) except OSError: print("The model file could not be found. " "Starting from a new model instance") model = UNet(num_classes=5).model((None, None, 3)) else: model = UNet(num_classes=5).model((None, None, 3)) metrics = [tf.keras.metrics.CategoricalAccuracy()] for i in range(5): metrics.append(Precision(class_id=i, name=f"prec_class_{i}")) metrics.append(Recall(class_id=i, name=f"rec_class_{i}")) model.compile(optimizer=keras.optimizers.RMSprop(lr=learning_rate), loss=CombinedLoss(class_weights), metrics=metrics) # Creating a checkpoint to save the model after every epoch if the # validation loss has decreased model_checkpoint = ModelCheckpoint("dual_unet_{epoch:02d}-{loss:.2f}.hdf5", monitor='loss', save_best_only=False, mode='min', save_weights_only=True, verbose=verbosity) csv_logger = CSVLogger(csv_logger_path, append=True, separator=',') lr_logger = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=1, verbose=verbosity, mode='min', min_lr=1e-6) tensorboard_cb = TensorBoard(log_dir=tensorboard_path, write_images=True) try: model.fit(train_data, epochs=epochs, verbose=verbosity, callbacks=[ LossAndErrorPrintingCallback(), model_checkpoint, csv_logger, lr_logger, tensorboard_cb ], validation_data=val_data, workers=6) except KeyboardInterrupt: save_model(model, pause=1) sys.exit() except Exception as exc: save_model(model, pause=0) raise exc