def test_perspective_keep_size(): h, w = 100, 100 img = np.zeros([h, w, 3], dtype=np.uint8) h, w = img.shape[:2] bboxes = [] for _ in range(10): x1 = np.random.randint(0, w - 1) y1 = np.random.randint(0, h - 1) x2 = np.random.randint(x1 + 1, w) y2 = np.random.randint(y1 + 1, h) bboxes.append([x1, y1, x2, y2]) keypoints = [(np.random.randint(0, w), np.random.randint(0, h), np.random.random()) for _ in range(10)] transform_1 = A.Compose( [A.Perspective(keep_size=True, p=1)], keypoint_params=A.KeypointParams("xys"), bbox_params=A.BboxParams("pascal_voc", label_fields=["labels"]), ) transform_2 = A.Compose( [A.Perspective(keep_size=False, p=1), A.Resize(h, w)], keypoint_params=A.KeypointParams("xys"), bbox_params=A.BboxParams("pascal_voc", label_fields=["labels"]), ) set_seed() res_1 = transform_1(image=img, bboxes=bboxes, keypoints=keypoints, labels=[0] * len(bboxes)) set_seed() res_2 = transform_2(image=img, bboxes=bboxes, keypoints=keypoints, labels=[0] * len(bboxes)) assert np.allclose(res_1["bboxes"], res_2["bboxes"]) assert np.allclose(res_1["keypoints"], res_2["keypoints"])
def test_perspective_valid_keypoints_after_transform(seed: int, scale: float, h: int, w: int): random.seed(seed) np.random.seed(seed) image = np.zeros([h, w, 3], dtype=np.uint8) keypoints = [ [0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0], ] transform = A.Compose([A.Perspective(scale=(scale, scale), p=1)], keypoint_params={ "format": "xy", "remove_invisible": False }) res = transform(image=image, keypoints=keypoints)["keypoints"] x1, y1 = res[0] x2, y2 = res[1] x3, y3 = res[2] x4, y4 = res[3] assert x1 < x3 and x1 < x4 and x2 < x3 and x2 < x4 and y1 < y2 and y1 < y3 and y4 < y2 and y4 < y3
def tr_da_fn(height, width): train_transform = [ A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(scale_limit=0.10, rotate_limit=7, shift_limit=0.10, border_mode=cv2.BORDER_CONSTANT, p=1.0), A.Perspective(scale=(0.025, 0.04), p=0.3), A.RandomResizedCrop(height=height, width=width, scale=(0.9, 1.0), p=0.3), A.OneOf( [ A.CLAHE(p=1), A.RandomBrightness(p=1), A.RandomGamma(p=1), A.RandomContrast(limit=0.2, p=1.0), ], p=0.5, ), A.OneOf( [ A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1.0), A.Blur(blur_limit=[2, 3], p=1.0), A.GaussNoise(var_limit=(5, 25), p=1.0), # A.MotionBlur(blur_limit=3, p=1.0), ], p=0.5, ), A.Lambda(image=_da_negative, p=0.2), A.LongestMaxSize(max_size=max(height, width), always_apply=True), A.PadIfNeeded(min_height=height, min_width=width, border_mode=cv2.BORDER_CONSTANT, always_apply=True), ] return A.Compose(train_transform)
def make_aug_default(img_size, is_train=True, rotate_limit=10, **kws): imw, imh = img_size color_aug = [] if is_train: color_aug = [ A.HueSaturationValue(), A.RandomBrightnessContrast(), A.ColorJitter(), ] geo_aug = [] if is_train: geo_aug = [ # A.HorizontalFlip(), # bad for keypoints A.ShiftScaleRotate(rotate_limit=rotate_limit, border_mode=cv2.BORDER_REPLICATE), A.Perspective((0.03, 0.05), pad_mode=cv2.BORDER_REPLICATE), ] transforms = [ A.LongestMaxSize(max_size=imw), *color_aug, A.PadIfNeeded(min_height=imh, min_width=imw, border_mode=cv2.BORDER_REPLICATE), *geo_aug, ] return [AlbuAug(transforms, skip_img_without_ann=True, **kws)]
def __init__(self, mode=None, resize=224): # assert type(resize) == list, f'resize type is not list ' if mode == 'train_tfms_mask': self.transform = A.Compose([ A.OneOf([ A.Perspective(p=1.0), A.Rotate(limit=20, p=1.0, border_mode=1), ], p=0.5), A.OneOf([ A.RandomBrightness(p=1.0), A.HueSaturationValue(p=1.0), A.RandomContrast(p=1.0), ], p=0.5), A.Compose([ A.Resize(resize, resize), A.Normalize(), ]) ]) elif mode == 'train_age_gender': self.transform = A.Compose([ A.Rotate(limit=20, p=0.5, border_mode=1), A.OneOf( [ A.RandomGridShuffle(grid=(2, 2), p=1.0), # not using for gender # A.RandomGridShuffle(grid=(4, 2), p=1.0), A.Perspective(p=1.0) ], p=0.5), A.GaussNoise(p=0.5), A.Compose([ A.Resize(resize, resize), A.Normalize(), ]) ]) # elif mode = elif mode == 'valid_tfms': self.transform = A.Compose([ A.Resize(resize, resize), A.Normalize(), ])
def __init__(self, cfg): self.cfg = cfg self.data = self.prepare() self.mean = self.cfg.mean self.std = self.cfg.std self.normal_transform = A.Compose([ A.Resize(384, 288, p=1.0), A.HorizontalFlip(p=0.5), A.Normalize(p=1.0, mean=self.mean, std=self.std) ]) self.augment_transform = A.Compose([ A.Resize(384, 288, p=1.0), A.HorizontalFlip(p=0.7), A.GaussNoise(p=0.5), A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.25, rotate_limit=20, p=0.6, border_mode=0), A.OneOf([ A.CLAHE(p=0.5), A.Compose([ A.RandomBrightness(limit=0.5, p=0.6), A.RandomContrast(limit=0.4, p=0.6), A.RandomGamma(p=0.6), ]) ], p=0.65), A.OneOf([ A.HueSaturationValue(10, 20, 10, p=1.0), A.RGBShift(p=1.0), A.Emboss(p=1.0), ], p=0.5), A.RandomFog(fog_coef_lower=0.3, fog_coef_upper=0.3, p=0.3), A.OneOf([ A.Perspective(p=1.0, scale=(0.05, 0.1)), A.GridDistortion(p=1.0, distort_limit=0.25, border_mode=0), A.OpticalDistortion( p=1.0, shift_limit=0.1, distort_limit=0.1, border_mode=0) ], p=0.65), A.Normalize(p=1.0, mean=self.mean, std=self.std), ])
def get_training_augmentation(): train_transform = [ albu.HorizontalFlip(p=0.5), albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0), albu.PadIfNeeded(min_height=320, min_width=320, always_apply=True, border_mode=0), albu.RandomCrop(height=320, width=320, always_apply=True), albu.GaussNoise(p=0.2), albu.Perspective(p=0.5), albu.OneOf( [ albu.CLAHE(p=1), albu.RandomBrightnessContrast(p=1), #RandomBrightness(p=1), albu.RandomGamma(p=1) ], p=0.9, ), albu.OneOf( [ albu.Sharpen(p=1), albu.Blur(blur_limit=3, p=1), albu.MotionBlur(blur_limit=3, p=1), ], p=0.9, ), albu.OneOf( [ albu.RandomBrightnessContrast(p=1), #RandomContrast(p=1), albu.HueSaturationValue(p=1), ], p=0.9, ), ] return albu.Compose(train_transform)
def offline_da_fn(height, width, augment=True): da_transform = [] if augment: da_transform += [ # A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(scale_limit=0.05, rotate_limit=7, shift_limit=0.05, border_mode=cv2.BORDER_CONSTANT, p=1.0), A.Perspective(scale=(0.015, 0.025), p=0.3), A.RandomResizedCrop(height=height, width=width, scale=(0.95, 1.0), p=0.3), # A.OneOf( # [ # A.CLAHE(p=1), # A.RandomBrightness(p=1), # A.RandomGamma(p=1), # A.RandomContrast(limit=0.1, p=1.0), # ], # p=0.5, # ), # # A.OneOf( # [ # A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1.0), # A.Blur(blur_limit=[2, 3], p=1.0), # A.GaussNoise(var_limit=(5, 25), p=1.0), # # A.MotionBlur(blur_limit=3, p=1.0), # ], # p=0.5, # ), # # A.Lambda(image=_da_negative, p=0.2), ] da_transform += [ A.LongestMaxSize(max_size=max(height, width), interpolation=cv2.INTER_LANCZOS4, always_apply=True), A.PadIfNeeded(min_height=height, min_width=width, border_mode=cv2.BORDER_CONSTANT, always_apply=True), ] return A.Compose(da_transform)
def train_process(data_path, config): def _worker_init_fn_(): import random import numpy as np import torch random_seed = config.random_seed torch.manual_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) if torch.cuda.is_available(): torch.cuda.manual_seed(random_seed) input_size = (config.img_height, config.img_width) PAD_VALUE = (0, 0, 0) IGNORE_INDEX = 255 transforms = [ abm.RandomResizedCrop( scale=(0.7, 1), ratio=(1.5, 2), height=config.img_height, width=config.img_width, interpolation=cv2.INTER_NEAREST, always_apply=True, ), abm.OneOf([abm.IAAAdditiveGaussianNoise(), abm.GaussNoise()], p=0.5), abm.OneOf( [ abm.MedianBlur(blur_limit=3), abm.GaussianBlur(blur_limit=3), abm.MotionBlur(blur_limit=3), ], p=0.5, ), abm.OneOf([ abm.ShiftScaleRotate( rotate_limit=7, interpolation=cv2.INTER_NEAREST, border_mode=cv2.BORDER_CONSTANT, value=PAD_VALUE, mask_value=IGNORE_INDEX, p=1.0, ), abm.ElasticTransform( interpolation=cv2.INTER_NEAREST, border_mode=cv2.BORDER_CONSTANT, alpha_affine=30, value=PAD_VALUE, mask_value=IGNORE_INDEX, p=1.0, ), abm.Perspective( scale=(0.05), interpolation=cv2.INTER_NEAREST, pad_mode=cv2.BORDER_CONSTANT, pad_val=PAD_VALUE, mask_pad_val=IGNORE_INDEX, keep_size=True, fit_output=True, p=1.0, ), ]), abm.RandomGamma(gamma_limit=(80, 120), p=0.5), abm.RandomBrightnessContrast(brightness_limit=(-0.5, 0.5), contrast_limit=(-0.5, 0.5), p=0.5), abm.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5), abm.RandomShadow(p=0.5), abm.ChannelShuffle(p=0.5), abm.ChannelDropout(p=0.5), abm.HorizontalFlip(p=0.5), abm.ImageCompression(quality_lower=50, p=0.5), abm.Cutout(num_holes=100, max_w_size=8, max_h_size=8, p=0.5), ] data_transform = DataTransformBase(transforms=transforms, input_size=input_size, normalize=True) train_dataset = EgoRailDataset(data_path=data_path, phase="train", transform=data_transform) val_dataset = EgoRailDataset(data_path=data_path, phase="val", transform=data_transform) # train_dataset.weighted_class() weighted_values = [8.90560578, 1.53155476] train_data_loader = DataLoader( train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True, worker_init_fn=_worker_init_fn_(), ) val_data_loader = DataLoader( val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers, drop_last=True, ) data_loaders_dict = {"train": train_data_loader, "val": val_data_loader} model = BiSeNetV2(n_classes=config.num_classes) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") criterion = OHEMCELoss(thresh=config.ohem_ce_loss_thresh, weighted_values=weighted_values) base_lr_rate = config.lr_rate / (config.batch_size * config.batch_multiplier) base_weight_decay = config.weight_decay * (config.batch_size * config.batch_multiplier) def _lambda_epoch(epoch): import math max_epoch = config.num_epochs return math.pow((1 - epoch * 1.0 / max_epoch), 0.9) optimizer = torch.optim.SGD( model.parameters(), lr=base_lr_rate, momentum=config.momentum, weight_decay=base_weight_decay, ) scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=_lambda_epoch) trainer = BiSeNetV2Trainer( model=model, criterion=criterion, metric_func=None, optimizer=optimizer, data_loaders_dict=data_loaders_dict, config=config, scheduler=scheduler, device=device, ) if config.snapshot and os.path.isfile(config.snapshot): trainer.resume_checkpoint(config.snapshot) with torch.autograd.set_detect_anomaly(True): trainer.train()
def __init__(self, args, main_dir='/opt/ml/', mode='train'): self.postfix = args.postfix self.main_dir = Path(main_dir) self.data_dir = self.main_dir / 'input/data' self.image_folder = self.data_dir / mode / 'cropped_images' self.meta_dir = self.data_dir / mode / str(mode + '.csv') self.gridshuffle = True if args.gridshuffle == 1 else False self.mixed_precision = True if args.mixed_precision == 1 else False self.n_fold = args.n_fold self.s_epoch = args.s_epoch self.t_epoch = args.t_epoch self.weight_path = None self.weighed_sampler = True if args.weighted_sampler == 1 else False self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.nosiy_elimination = True if args.nosiy_elimination == 1 else False # criterion self.clipping = True if "nfnet" in args.model_type else False self.crit = args.crit self.arcface_crit = args.arcface_crit self.focal_type = args.focal_type self.cls_weight = True if args.cls_weight == 1 else False self.focal_gamma = 5.0 # optimizer self.optim = args.optim self.lr = args.lr self.weight_decay = args.decay # scheduler self.sched_type = args.sched_type self.sched_T_0 = args.T_max if args.T_max != 0 else self.t_epoch self.eta_min = args.eta_min # model self.cls_num = 18 self.backbone_name = args.model_type self.checkpoint = self.main_dir / 'checkpoints' / str(self.backbone_name + "_" + args.postfix) if not os.path.exists(self.checkpoint): os.makedirs(self.checkpoint, exist_ok=True) self.backbone_pretrained = True if mode == 'train' else False self.embed_size = args.embed_size self.pool = args.pool self.p_trainable = True self.neck = args.neck self.multi_dropout = True if args.multi_dropout == 1 else False self.multi_dropout_num = 16 self.multi_dropout_prob = 0.2 # pseudo label self.pseudo_label = True if args.pseudo_label == 1 else False self.pseudo_label_data = self.main_dir / 'submission' / args.pseudo_label_path # logging self.log_interval = 50 self.log_dir = self.main_dir / 'logs' if not os.path.exists(self.log_dir): os.makedirs(self.log_dir, exist_ok=True) self.log_dir = self.log_dir / (self.backbone_name + "_" + args.postfix + '.txt') self.mean = [0.56019358, 0.52410121, 0.501457] self.std = [0.23318603, 0.24300033, 0.24567522] # transforms self.trn_tfms = A.Compose([ A.Resize(384, 288, p=1.0), A.HorizontalFlip(p=0.7), A.GaussNoise(p=0.5), A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.25, rotate_limit=20, p=0.6, border_mode=0), A.OneOf([ A.CLAHE(p=0.5), A.Compose([ A.RandomBrightness(limit=0.5, p=0.6), A.RandomContrast(limit=0.4, p=0.6), A.RandomGamma(p=0.6), ]) ], p=0.65), A.OneOf([ A.HueSaturationValue(10, 20, 10, p=1.0), A.RGBShift(p=1.0), A.Emboss(p=1.0), ], p=0.5), A.RandomFog(fog_coef_lower=0.3, fog_coef_upper=0.3, p=0.3), A.OneOf([ A.Perspective(p=1.0, scale=(0.05, 0.1)), A.GridDistortion(p=1.0, distort_limit=0.25, border_mode=0), A.OpticalDistortion(p=1.0, shift_limit=0.1, distort_limit=0.1, border_mode=0) ], p=0.65), A.Normalize(p=1.0, mean=self.mean, std=self.std), ]) self.val_tfms = A.Compose([ A.Resize(384, 288), A.Normalize(p=1.0, mean=self.mean, std=self.std), ])
Attention! Images and corresponding masks must have the same names, e. g. 123.png and 123.npy ''' if __name__ == '__main__': IMG_PATH = sys.argv[1] MASK_PATH = sys.argv[2] N_CLASSES = int(sys.argv[3]) BATCH_SZ = int(sys.argv[4]) LR = float(sys.argv[5]) N_EPOCHS = int(sys.argv[6]) DICT_PATH = sys.argv[7] df = get_names(IMG_PATH) model = UNet(num_class=N_CLASSES) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train,test = train_test_split(df['id'].values,test_size = 0.2, random_state = 1337) train_transform = A.Compose([A.OneOf([A.HorizontalFlip(),A.VerticalFlip(),A.RandomRotate90()],p=0.8), A.Perspective(p=0.7,scale=(0.07,0.12)),A.Blur(p=0.5,blur_limit=6), A.RandomBrightnessContrast((0,0.5),(0,0.5)),A.GaussNoise()]) test_transform = A.Compose([A.OneOf([A.HorizontalFlip(),A.VerticalFlip(),A.RandomRotate90()],p=0.8), A.GaussNoise()]) train_set = PipeDataset(IMG_PATH,MASK_PATH,train,train_transform) test_set = PipeDataset(IMG_PATH,MASK_PATH,test,test_transform) train_loader = DataLoader(train_set, batch_size = BATCH_SZ, shuffle = True) test_loader = DataLoader(test_set, batch_size = BATCH_SZ, shuffle = True) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr = LR) history,best_model_dict = fit(N_EPOCHS, model, N_CLASSES, train_loader, test_loader, criterion, optimizer, device) torch.save(best_model_dict,DICT_PATH)