def __init__(self, config): width = height = config.DATA.SCALE # 300/512 self.train_transform = A.Compose( # Yolo [ # A.RandomSizedCrop(min_max_height=(800, 1024), height=1024, width=1024, p=0.5), # A.RandomScale(scale_limit=0.3, p=1.0), # 这个有问题 C.RandomResize(scale_limit=0.3, p=1.0), # 调节长宽比 [1/1.3, 1.3] A.OneOf( [ A.Sequential( [ A.SmallestMaxSize(min(height, width), p=1.0), A.RandomCrop( height, width, p=1.0) # 先resize到短边544,再crop成544×544 ], p=0.4), A.LongestMaxSize(max(height, width), p=0.6), # resize到长边544 ], p=1.0), # A.LongestMaxSize(max(height, width), p=1.0), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.4, sat_shift_limit=0.4, val_shift_limit=0.4, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.3, contrast_limit=0.3, p=0.9), ], p=0.9), # A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=(0.5,0.5,0.5), p=1.0), C.RandomPad(min_height=height, min_width=width, border_mode=0, value=(123 / 255, 117 / 255, 104 / 255), p=1.0), A.HorizontalFlip(p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) self.val_transform = A.Compose([ A.Resize(height=height, width=width, p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
def make_coco_transforms(image_set='train', image_size=(640, 640)): normalize = A.Sequential([ A.Normalize(), #imagenet norms default ToTensorV2() ]) if image_set == 'train': transforms = A.Compose([ A.PadIfNeeded(*image_size, border_mode=0), #pad with zeros A.RandomResizedCrop(*image_size), A.HorizontalFlip(), normalize ]) elif image_set == 'val': transforms = A.Compose([ A.Resize(*image_size), normalize ]) else: raise ValueError(f'{image_set} not recognized!') return transforms
def train_albu_augment(record): verbose = record.get('verbose', False) image_size = record['image_size'] image = record['image'] mask = record['mask'] if verbose: pipeline = albu.ReplayCompose else: pipeline = albu.Compose aug = pipeline([ albu.Sequential([ albu.GaussNoise(0.05, p=1), albu.augmentations.transforms.Blur( blur_limit=5, always_apply=False, p=1), ], p=0.001), albu.OneOf([ albu.ElasticTransform(p=0.5), albu.IAAPiecewiseAffine(p=0.5), albu.OpticalDistortion(p=1) ], p=0.001), albu.OneOf([ albu.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, brightness_by_max=True, always_apply=False, p=1), albu.RandomBrightnessContrast(brightness_limit=(-0.2, 0.6), contrast_limit=.2, brightness_by_max=True, always_apply=False, p=1), albu.augmentations.transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1, hue=0.1, always_apply=False, p=1), albu.RandomGamma(p=0.5) ], p=0.5), albu.OneOf([ albu.GaussNoise(0.02, p=.5), albu.IAAAffine(p=.5), ], p=.25), albu.OneOf([ albu.augmentations.transforms.Blur( blur_limit=15, always_apply=False, p=0.25), albu.augmentations.transforms.Blur( blur_limit=3, always_apply=False, p=0.5) ], p=0.5), albu.RandomRotate90(p=.5), albu.HorizontalFlip(p=.5), albu.VerticalFlip(p=.5), albu.RandomCrop(width=image_size, height=image_size), ]) data = aug(image=image, mask=mask) record['image'] = data['image'] record['mask'] = data['mask'] if verbose: for transformation in data['replay']['transforms']: if not isinstance(transformation, dict): print('not a dict') pass elif transformation.get('applied', False): print(30 * '-') if 'OneOf' in transformation['__class_fullname__']: print(30 * '=') for _trans in transformation['transforms']: if not _trans.get('applied', False): continue _name = _trans['__class_fullname__'] if 'Flip' in _name: continue print(_trans['__class_fullname__']) for k, v in _trans.items(): if k in [ '__class_fullname__', 'applied', 'always_apply' ]: continue print(f"{k}: {v}") else: _name = transformation['__class_fullname__'] if 'Flip' in _name: continue print(_name) for k, v in transformation.items(): if k in [ '__class_fullname__', 'applied', 'always_apply' ]: continue print(f"{k}: {v}") return record
def aug(source, images_output_path, size): images_path = images_output_path + "/JPEGImages/" os.makedirs(images_path, exist_ok=True) xml_path = images_output_path + "/Annotations/" os.makedirs(xml_path, exist_ok=True) transform = A.Compose( [ # A.CLAHE(), # A.RandomScale(scale_limit=[0.5, 1]), # A.RandomCrop(width=450, height=450), A.OneOf([ A.Sequential( [A.RandomCrop(width=800, height=600), A.RandomRotate90()]), # A.Sequential( # [ # A.RandomSizedBBoxSafeCrop(width=800, height=600), # A.RandomRotate90(), # ] # ), A.Sequential([ A.RandomScale(scale_limit=0.2), A.Flip(), A.RandomRotate90(), ], # p=0.3, ), A.Sequential( [ A.Rotate(), ], p=0.3, ), ]) # A.Transpose(), # A.Resize(0.9, 0.9), # A.Blur(blur_limit=3), # A.OpticalDistortion(), # A.GridDistortion(), # A.HueSaturationValue(), ], bbox_params=A.BboxParams(format="pascal_voc", min_visibility=0.5, label_fields=["class_labels"]), ) rows = [] random.seed(42) images_index = 1 for name, group in source.groupby("filename"): row = group.iloc[0] print(row["filename"]) image = cv2.imread(row["filename"]) same = set() bboxes = [] class_labels = [] aleady_box = {} for _, vrow in group.iterrows(): bboxes.append( [vrow["xmin"], vrow["ymin"], vrow["xmax"], vrow["ymax"]]) class_labels.append(vrow["class"]) aleady_box[vrow["class"]] = set() all_count = 0 print(aleady_box) while int(all_count) < size: augmented = transform( image=image, bboxes=bboxes, class_labels=class_labels, ) file_name = f"{images_index}.jpg" if len(augmented["bboxes"]) < 1: continue writer = Writer(file_name, augmented["image"].shape[1], augmented["image"].shape[0]) findbox = False for index, bbox in enumerate(augmented["bboxes"]): x_min, y_min, x_max, y_max = map(lambda v: int(v), bbox[:4]) same.add(x_min) rows.append({ "filename": f"{images_path}/{file_name}", "width": augmented["image"].shape[1], "height": augmented["image"].shape[0], "class": augmented["class_labels"][index], "xmin": x_min, "ymin": y_min, "xmax": x_max, "ymax": y_max, "imageindex": str(images_index), }) writer.addObject(augmented["class_labels"][index], x_min, y_min, x_max, y_max) if len(aleady_box[augmented["class_labels"][index]]) >= size: continue aleady_box[augmented["class_labels"][index]].add(x_min) findbox = True if findbox: cv2.imwrite(f"{images_path}/{file_name}", augmented["image"]) writer.save(f"{xml_path}/{images_index}.xml") images_index += 1 print(aleady_box) all_count = sum([min(len(v), size) for k, v in aleady_box.items()]) / len(aleady_box) df = pd.DataFrame(rows) return df
bbox_params = A.BboxParams(format='pascal_voc', min_visibility=0.3) train_transforms = A.Compose([ A.Resize(height=img_size[0], width=img_size[1], p=1.0), A.HorizontalFlip(p=0.5), # A.OneOf([ # A.Sequential([ # A.Resize(height=img_size[0], width=img_size[1], p=1.0), # ], p=1.0), # A.Sequential([ # A.RandomSizedBBoxSafeCrop(height=img_size[0], width=img_size[1], p=1.0), # ], p=1.0) # ], p=1.0), A.OneOf([ A.Sequential([ A.GaussNoise(var_limit=(100, 150), p=0.5), A.MotionBlur(blur_limit=17, p=0.5) ], p=1.0), A.Sequential([ A.GaussNoise(var_limit=(100, 150), p=0.5), A.MotionBlur(blur_limit=17, p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5), A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=0, p=0.5), ], p=1.0), A.Sequential([ A.GaussNoise(var_limit=(100, 150), p=0.5), A.MotionBlur(blur_limit=17, p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5), A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=0, p=0.5), A.ChannelShuffle(p=0.5), # A.ShiftScaleRotate(shift_limit=0.1, scale_limit=(-0.15, 0.15), rotate_limit=30, p=0.5, # border_mode=cv2.BORDER_CONSTANT, value=0),
def train_function(gpu, world_size, node_rank, gpus, fold_number, group_name): import torch.multiprocessing torch.multiprocessing.set_sharing_strategy('file_system') torch.manual_seed(25) np.random.seed(25) rank = node_rank * gpus + gpu dist.init_process_group( backend='nccl', init_method='env://', world_size=world_size, rank=rank ) device = torch.device("cuda:{}".format(gpu) if torch.cuda.is_available() else "cpu") batch_size = 64 width_size = 416 init_lr = 1e-4 end_lr = 1e-6 n_epochs = 20 emb_size = 512 margin = 0.5 dropout = 0.0 iters_to_accumulate = 1 if rank == 0: wandb.init(project='shopee_effnet0', group=group_name, job_type=str(fold_number)) checkpoints_dir_name = 'effnet0_{}_{}_{}'.format(width_size, dropout, group_name) os.makedirs(checkpoints_dir_name, exist_ok=True) wandb.config.model_name = checkpoints_dir_name wandb.config.batch_size = batch_size wandb.config.width_size = width_size wandb.config.init_lr = init_lr wandb.config.n_epochs = n_epochs wandb.config.emb_size = emb_size wandb.config.dropout = dropout wandb.config.iters_to_accumulate = iters_to_accumulate wandb.config.optimizer = 'adam' wandb.config.scheduler = 'ShopeeScheduler' df = pd.read_csv('../../dataset/reliable_validation_tm.csv') train_df = df[df['fold_group'] != fold_number] train_transforms = alb.Compose([ alb.RandomResizedCrop(width_size, width_size), alb.ShiftScaleRotate(shift_limit=0.1, rotate_limit=30), alb.HorizontalFlip(), alb.OneOf([ alb.Sequential([ alb.HueSaturationValue(hue_shift_limit=50), alb.RandomBrightnessContrast(), ]), alb.FancyPCA(), alb.ChannelDropout(), alb.ChannelShuffle(), alb.RGBShift() ]), alb.CoarseDropout(max_height=int(width_size*0.1), max_width=int(width_size*0.1)), alb.OneOf([ alb.ElasticTransform(), alb.OpticalDistortion(), alb.GridDistortion() ]), alb.Resize(width_size, width_size), alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ]) train_set = ImageDataset(train_df, train_df, '../../dataset/train_images', train_transforms) sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True) train_dataloader = DataLoader(train_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4, sampler=sampler) # valid_df = df[df['fold_strat'] == fold_number] valid_transforms = alb.Compose([ alb.Resize(width_size, width_size), alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ]) # valid_set = ImageDataset(train_df, valid_df, '../../dataset/train_images', valid_transforms) # valid_dataloader = DataLoader(valid_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4) test_df = df[df['fold_group'] == fold_number] test_set = ImageDataset(test_df, test_df, '../../dataset/train_images', valid_transforms) test_dataloader = DataLoader(test_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4) model = EfficientNetArcFace(emb_size, train_df['label_group'].nunique(), device, dropout=dropout, backbone='tf_efficientnet_b0_ns', pretrained=True, margin=margin, is_amp=True) model = SyncBatchNorm.convert_sync_batchnorm(model) model.to(device) model = DistributedDataParallel(model, device_ids=[gpu]) scaler = GradScaler() criterion = CrossEntropyLoss() # criterion = LabelSmoothLoss(smoothing=0.1) optimizer = optim.Adam(model.parameters(), lr=init_lr) # scheduler = CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=end_lr, # last_epoch=-1) # scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=2000, T_mult=1, # eta_min=end_lr, last_epoch=-1) scheduler = ShopeeScheduler(optimizer, lr_start=init_lr, lr_max=init_lr*batch_size, lr_min=end_lr) for epoch in range(n_epochs): train_loss, train_duration, train_f1 = train_one_epoch( model, train_dataloader, optimizer, criterion, device, scaler, scheduler=None, iters_to_accumulate=iters_to_accumulate) scheduler.step() if rank == 0: # valid_loss, valid_duration, valid_f1 = evaluate(model, valid_dataloader, criterion, device) embeddings = get_embeddings(model, test_dataloader, device) embeddings_f1 = validate_embeddings_f1(embeddings, test_df) wandb.log({'train_loss': train_loss, 'train_f1': train_f1, 'embeddings_f1': embeddings_f1, 'epoch': epoch}) filename = '{}_foldnum{}_epoch{}_train_loss{}_f1{}'.format( checkpoints_dir_name, fold_number+1, epoch+1, round(train_loss, 3), round(embeddings_f1, 3)) torch.save(model.module.state_dict(), os.path.join(checkpoints_dir_name, '{}.pth'.format(filename))) # np.savez_compressed(os.path.join(checkpoints_dir_name, '{}.npz'.format(filename)), embeddings=embeddings) print('FOLD NUMBER %d\tEPOCH %d:\t' 'TRAIN [duration %.3f sec, loss: %.3f, avg f1: %.3f]\t' 'VALID EMBEDDINGS [avg f1: %.3f]\tCurrent time %s' % (fold_number + 1, epoch + 1, train_duration, train_loss, train_f1, embeddings_f1, str(datetime.now(timezone('Europe/Moscow'))))) if rank == 0: wandb.finish()
def create_transform(self, input_dtype, p): return A.Sequential([stage.create_transform(input_dtype) for stage in self.stages], p=p)