batch_size = 64 * idist.get_world_size() # total batch size num_workers = 10 # ############################## # Setup Dataflow # ############################## assert "DATASET_PATH" in os.environ data_path = os.environ["DATASET_PATH"] mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] train_transforms = A.Compose([ A.RandomResizedCrop(train_crop_size, train_crop_size, scale=(0.08, 1.0)), A.HorizontalFlip(), A.CoarseDropout(max_height=32, max_width=32), A.HueSaturationValue(), A.Normalize(mean=mean, std=std), ToTensor(), ]) val_transforms = A.Compose([ # https://github.com/facebookresearch/FixRes/blob/b27575208a7c48a3a6e0fa9efb57baa4021d1305/imnet_resnet50_scratch/transforms.py#L76 A.Resize(int((256 / 224) * val_crop_size), int( (256 / 224) * val_crop_size)), A.CenterCrop(val_crop_size, val_crop_size), A.Normalize(mean=mean, std=std), ToTensor(), ])
import albumentations class CFG: img_size = 512 transform = albumentations.Compose([ albumentations.RandomResizedCrop(CFG.img_size, CFG.img_size, scale=(0.9, 1), p=1), albumentations.HorizontalFlip(p=0.5), albumentations.VerticalFlip(p=0.5), albumentations.ShiftScaleRotate(p=0.5), albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.7), albumentations.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=0.7), albumentations.CLAHE(clip_limit=(1, 4), p=0.5), albumentations.OneOf([ albumentations.OpticalDistortion(distort_limit=1.0), albumentations.GridDistortion(num_steps=5, distort_limit=1.), albumentations.ElasticTransform(alpha=3), ], p=0.2), albumentations.OneOf([ albumentations.GaussNoise(var_limit=[10, 50]),
extra_df['Filename'] = img_dir[extra] + '/' + extra_df[ 'Filename'] # .astype(str) df = pd.concat([df, extra_df], ignore_index=True) # Exclude all entries with "Missing" Died stats df = df[~df['Died'].isin(['Missing'])] df['Died'] = pd.to_numeric(df['Died']) # Augmentations A_transform = A.Compose([ A.Flip(p=1), A.RandomRotate90(p=1), A.Rotate(p=1, limit=45, interpolation=3), A.RandomResizedCrop(input_size[0], input_size[1], scale=(0.8, 1.0), ratio=(0.8, 1.2), interpolation=3, p=1), A.OneOf([ A.IAAAdditiveGaussianNoise(), A.GaussNoise(), ], p=0.25), A.OneOf([ A.MotionBlur(p=0.25), A.MedianBlur(blur_limit=3, p=0.25), A.Blur(blur_limit=3, p=0.25), A.GaussianBlur(p=0.25) ], p=0.1), A.OneOf([ A.OpticalDistortion(interpolation=3, p=0.1),
A.OneOf([A.GaussNoise(p=p), A.MultiplicativeNoise(p=p)])) if p := trans_cfg.get('hsv', False): transforms.append(A.HueSaturationValue(p=p)) # Do these last so are less likely to need to reflect etc. during skew/rotation if trans_cfg.get('centrecrop', False): crop_height, crop_width = trans_cfg['centrecrop'] transforms.append(A.PadIfNeeded(crop_height, crop_width)) transforms.append(A.CenterCrop(crop_height, crop_width)) if trans_cfg.get('randomresizedcrop', False): resize_height, resize_width = trans_cfg['randomresizedcrop'] transforms.append( A.RandomResizedCrop(resize_height, resize_width, scale=(0.5, 1.0), ratio=(0.9, 1.1), interpolation=cv2.INTER_CUBIC, p=1)) if trans_cfg.get('randomcrop', False): crop_height, crop_width = trans_cfg['randomcrop'] transforms.append(A.RandomCrop(crop_height, crop_width, p=1)) if trans_cfg.get('final_resize', False): final_resize = trans_cfg['final_resize'] transforms.append( A.Resize(height=final_resize[0], width=final_resize[1])) if normalise: transforms.append(A.Normalize(mean=mean, std=std)) else: transforms.append(A.Lambda(image=div255)) return A.Compose(transforms)
mean = [-17.398721187929123, -10.020421713800838, -12.10841437771272] std = [6.290316422115964, 5.776936185931195, 5.795418280085563] batch_size = 23 num_workers = 12 val_batch_size = 24 # According to https://arxiv.org/pdf/1906.06423.pdf # For example: Train size: 224 -> Test size: 320 = max accuracy on ImageNet with ResNet-50 val_img_size = 512 train_img_size = 480 max_value = 1.0 train_transforms = A.Compose([ A.RandomResizedCrop(train_img_size, train_img_size, scale=(0.7, 1.0), ratio=(0.9, 1.1)), A.OneOf([ A.RandomRotate90(), A.Flip(), ]), A.Normalize(mean=mean, std=std, max_pixel_value=max_value), ToTensorV2() ]) val_transforms = A.Compose([ A.Normalize(mean=mean, std=std, max_pixel_value=max_value), ToTensorV2() ])
loss = loss_fn(outputs, targets) losses.update(loss.item(), inputs.size(0)) scores.update(targets, outputs) tk0.set_postfix(loss=losses.avg) return scores.avg, losses.avg mean = (0.485, 0.456, 0.406) # RGB std = (0.229, 0.224, 0.225) # RGB albu_transforms = { 'train': A.Compose([ A.OneOf([ A.RandomResizedCrop(CFG.img_size, CFG.img_size, p=0.5), A.Resize(CFG.img_size, CFG.img_size, p=0.5), ], p=1.0), A.HorizontalFlip(p=0.5), A.OneOf([ A.Cutout(max_h_size=10, max_w_size=32), A.CoarseDropout(max_holes=4), ], p=0.5), # A.RandomBrightness(p=0.25), A.Normalize(mean, std), # T.ToTensorV2() ]), 'valid': A.Compose([
def main(): root = "./data/VOCdevkit/VOC2012" batch_size = 4 num_workers = 4 num_classes = 21 lr = 0.0025 # lr = 5e-4 # fine-tune epoches = 100 writer = SummaryWriter(comment="-fcn") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_transform = A.Compose([ A.HorizontalFlip(), # 注意这个先后顺序 A.VerticalFlip(), # A.transpose(p=0.5), A.RandomRotate90(), # A.ElasticTransform(p=1, alpha=120, # sigma=120 * 0.05, # alpha_affine=120 * 0.03), A.RandomResizedCrop(320, 480), ]) val_transform = A.Compose([ A.RandomResizedCrop(320, 480)]) train_set = VOCdataset(root, mode="train", transform=train_transform) val_set = VOCdataset(root, mode="val", transform=val_transform) train_loader = data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_loader = data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=num_workers) model = FCN(num_classes).to(device) # state_dict = torch.load("./model/best.pth") # print("loading pretrained parameters") # model.load_state_dict(state_dict) # del state_dict criteria = nn.CrossEntropyLoss() # optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=2e-4) # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, # weight_decay=2e-4) vgg_parameters = (list(map(id, model.encode1.parameters()))+ list(map(id, model.encode2.parameters()))+ list(map(id, model.encode3.parameters()))+ list(map(id, model.encode4.parameters()))+ list(map(id, model.encode5.parameters()))) encode_parameters = (list(model.encode1.parameters())+ list(model.encode2.parameters())+ list(model.encode3.parameters())+ list(model.encode4.parameters())+ list(model.encode5.parameters())) decode_parameters = filter(lambda p: id(p) not in vgg_parameters, model.parameters()) optimizer = optim.SGD([{'params': encode_parameters, 'lr': 0.1 * lr}, {'params': decode_parameters, 'lr': lr}], momentum=0.9, weight_decay=2e-3) # optimizer = optim.Adam([{'params': encode_parameters, 'lr': 0.1 * lr}, # {'params': decode_parameters, 'lr': lr}], # weight_decay=2e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.85) # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, # T_0=100, # T_mult=1, # eta_min=0.0001) # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10) best_miou = 0.0 for epoch in range(1, epoches+1): print("Epoch: ", epoch) scheduler.step() train_info = train(train_loader, model, criteria, optimizer, device, batch_size) val_info = validate(val_loader, model, criteria, device, batch_size) string = "loss: {}, pixel acc: {}, mean acc: {} miou: {}" print("train", end=' ') print(string.format(train_info['loss'], train_info["pixel acc"], train_info['mean acc'], train_info['miou'])) print("val", end=' ') print(string.format(val_info['loss'], val_info['pixel acc'], val_info['mean acc'], val_info['miou'])) writer.add_scalar("lr", optimizer.state_dict()['param_groups'][0]['lr'], epoch) writer.add_scalar('train/loss', train_info['loss'], epoch) writer.add_scalar('train/pixel acc', train_info['pixel acc'], epoch) writer.add_scalar('train/mean acc', train_info['mean acc'], epoch) writer.add_scalar('train/miou', train_info['miou'], epoch) writer.add_scalar('val/loss', val_info['loss'], epoch) writer.add_scalar('val/pixel acc', val_info['pixel acc'], epoch) writer.add_scalar('val/mean acc', val_info['mean acc'], epoch) writer.add_scalar('val/miou', val_info['miou'], epoch) if val_info['miou'] > best_miou: best_miou = val_info['miou'] torch.save(model.state_dict(), './model/best.pth') print("best model find at {} epoch".format(epoch))
def train_process(data_path, config): def _worker_init_fn_(): import random import numpy as np import torch random_seed = config.random_seed torch.manual_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) if torch.cuda.is_available(): torch.cuda.manual_seed(random_seed) input_size = (config.img_height, config.img_width) transforms = [ abm.RandomResizedCrop( scale=(0.7, 1), height=config.img_height, width=config.img_width, ratio=(1.5, 2), always_apply=True, ), abm.OneOf([abm.IAAAdditiveGaussianNoise(), abm.GaussNoise()], p=0.5), abm.OneOf( [ abm.MedianBlur(blur_limit=3), abm.GaussianBlur(blur_limit=3), abm.MotionBlur(blur_limit=3), ], p=0.1, ), abm.ShiftScaleRotate(rotate_limit=10, p=0.5, border_mode=0), abm.RandomGamma(gamma_limit=(80, 120), p=0.5), abm.RandomBrightnessContrast(brightness_limit=(-0.5, 0.5), contrast_limit=(-0.5, 0.5), p=0.5), abm.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5), abm.RandomShadow(p=0.5), abm.RandomSunFlare(p=0.5), abm.ChannelShuffle(p=0.5), abm.ChannelDropout(p=0.5), abm.HorizontalFlip(p=0.5), abm.ImageCompression(quality_lower=50, p=0.5), abm.Cutout(num_holes=100, max_w_size=8, max_h_size=8, p=0.5), ] data_transform = DataTransformBase(transforms=transforms, input_size=input_size, normalize=True) train_dataset = EgoRailDataset(data_path=data_path, phase="train", transform=data_transform) val_dataset = EgoRailDataset(data_path=data_path, phase="val", transform=data_transform) # train_dataset.weighted_class() weighted_values = [8.90560578, 1.53155476] train_data_loader = DataLoader( train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True, worker_init_fn=_worker_init_fn_(), ) val_data_loader = DataLoader( val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers, drop_last=True, ) data_loaders_dict = {"train": train_data_loader, "val": val_data_loader} model = BiSeNetV2(n_classes=config.num_classes) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") criterion = OHEMCELoss(thresh=config.ohem_ce_loss_thresh, weighted_values=weighted_values) base_lr_rate = config.lr_rate / (config.batch_size * config.batch_multiplier) base_weight_decay = config.weight_decay * (config.batch_size * config.batch_multiplier) def _lambda_epoch(epoch): import math max_epoch = config.num_epochs return math.pow((1 - epoch * 1.0 / max_epoch), 0.9) optimizer = torch.optim.SGD( model.parameters(), lr=base_lr_rate, momentum=config.momentum, weight_decay=base_weight_decay, ) scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=_lambda_epoch) trainer = BiSeNetV2Trainer( model=model, criterion=criterion, metric_func=None, optimizer=optimizer, data_loaders_dict=data_loaders_dict, config=config, scheduler=scheduler, device=device, ) if config.snapshot and os.path.isfile(config.snapshot): trainer.resume_checkpoint(config.snapshot) with torch.autograd.set_detect_anomaly(True): trainer.train()
annot_df = pd.read_csv('../ranzcr/train_annotations.csv') train_image_transforms = alb.Compose([ alb.HorizontalFlip(p=0.5), alb.CLAHE(p=0.5), alb.OneOf([ alb.GridDistortion(num_steps=8, distort_limit=0.5, p=1.0), alb.OpticalDistortion( distort_limit=0.5, shift_limit=0.5, p=1.0, ), alb.ElasticTransform(alpha=3, p=1.0) ], p=0.5), alb.RandomResizedCrop(height=int(0.8192 * width_size), width=width_size, scale=(0.5, 1.5), p=0.5), alb.ShiftScaleRotate(shift_limit=0.025, scale_limit=0.1, rotate_limit=20, p=0.5), alb.CoarseDropout(max_holes=12, min_holes=6, max_height=int(0.8192 * width_size / 6), max_width=int(width_size / 6), min_height=int(0.8192 * width_size / 20), min_width=int(width_size / 20), p=0.5), alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2() ])
def resize_transforms(image_size=IMAGE_SIZE): result = albu.Compose([albu.RandomResizedCrop(image_size, image_size)], p=1) return result
return img train_transforms = A.Compose( [ # A.RandomCrop(width=450, height=450), # A.HorizontalFlip(p=1), A.CenterCrop(1280, 1280, True, 1), # A.RandomSizedBBoxSafeCrop(384, 384), A.Resize(320, 320, p=1), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=0.5), A.RandomResizedCrop(height=320, width=320, scale=[0.95, 1.05], ratio=[0.95, 1.05], p=0.5), # A.RandomRain(p=0.1), A.pytorch.ToTensor(), # ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0.5)) ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0.99, label_fields=['labels'])) val_transforms = A.Compose([ A.CenterCrop(1280, 1280, True, 1), A.Resize(320, 320, p=1),
10010 20 01010 15 11010 20 10110 20 01110 15 10001 20 01001 15 10101 20 10011 20 11011 20""" IMG_AUG = A.Compose([ A.GaussNoise(), A.RandomRotate90(), A.Blur(), A.RandomResizedCrop(767, 1022) ]) def aug_dataset(): loader = il.DatasetLoader.initial() image_paths, tensors = loader.load_tensors(None, None, load_extend=True) tensors = il.prepare_data(tensors) indexes = list(map(lambda x: int(x, 2), repeat_list.split()[::2])) values = repeat_list.split()[1::2] for i in image_paths: i['input'] = i['input'].replace("/cached", "").replace(".torch", ".jpg") for j in p.labels_attributes:
return len(self.img_files) # %% if __name__ == '__main__': # %% import matplotlib.pyplot as plt # %% img_files = get_img_files() img_size = 224 dataset = MaskDataset( img_files, transform=A.Compose([ A.RandomResizedCrop( img_size, img_size, ), A.Rotate(13), A.HorizontalFlip(), A.RandomBrightnessContrast(), A.HueSaturationValue(), A.RGBShift(), A.RandomGamma(), # A.CLAHE(), MyCoarseDropout( min_holes=1, max_holes=8, max_height=32, max_width=32, ), # A.Resize(img_size, img_size, interpolation=cv2.INTER_CUBIC),
df = pd.read_csv(os.path.join(args.input, "train_cultivar_mapping.csv")) test_df = pd.read_csv(os.path.join(args.input, "sample_submission.csv")) unique_labels = df["cultivar"].unique() label_mapping = {label: i for i, label in enumerate(unique_labels)} rev_label_mapping = {i: label for label, i in label_mapping.items()} df.loc[:, "cultivar"] = df["cultivar"].map(label_mapping) test_df.loc[:, "cultivar"] = test_df["cultivar"].map(label_mapping) train_aug = albumentations.Compose( [ albumentations.RandomResizedCrop( height=args.image_size, width=args.image_size, p=1, ), albumentations.HorizontalFlip(p=0.5), albumentations.VerticalFlip(p=0.5), albumentations.HueSaturationValue(p=0.5), albumentations.OneOf( [ albumentations.RandomBrightnessContrast(p=0.5), albumentations.RandomGamma(p=0.5), ], p=0.5, ), albumentations.OneOf( [ albumentations.Blur(p=0.1),
device = 'cuda' transforms_train = A.Compose([ A.HorizontalFlip(p=0.2), A.ImageCompression( quality_lower=99, quality_upper=100), A.ShiftScaleRotate( shift_limit=0.2, scale_limit=0.2, rotate_limit=10, border_mode=0, p=0.5), A.ColorJitter(0.2, 0.2, 0.2, 0.2), A.Resize(args.max_size, args.max_size), A.OneOf([ A.RandomResizedCrop(args.max_size, args.max_size), A.Cutout( max_h_size=int(args.max_size*0.4), max_w_size=int(args.max_size*0.4), num_holes=1, p=0.3), ]), A.Normalize( mean=(0.4452, 0.4457, 0.4464), std=(0.2592, 0.2596, 0.2600)), ToTensorV2(), ]) print('Loading dataset...') trainset = LandmarkDataset('train', transforms_train) train_loader = DataLoader(
def prepare_data(self): # Read DataFrame df = pd.read_csv(os.path.join(self.hparams.data_dir, 'train.csv')) #Delete rows (mask dont exist) for idx in range(len(df)): image_id = df.loc[idx, 'image_id'] if not os.path.exists( os.path.join( os.path.join(self.hparams.data_dir, 'train_label_masks/'), image_id + '_mask.' + self.hparams.image_format)): df = df.drop(idx, axis=0) df = df.reset_index(drop=True) skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2020) for fold, (train_index, val_index) in enumerate( skf.split(df.values, df['isup_grade'])): df.loc[val_index, 'fold'] = int(fold) df['fold'] = df['fold'].astype(int) #print(df) train_df = df[df['fold'] != self.hparams.fold] val_df = df[df['fold'] == self.hparams.fold] #train_df, val_df = train_test_split(train_df, stratify=train_df['isup_grade']) train_transform = A.Compose([ A.Resize(height=self.hparams.image_size, width=self.hparams.image_size, interpolation=1, always_apply=False, p=1.0), A.Flip(always_apply=False, p=0.5), A.RandomResizedCrop(height=self.hparams.image_size, width=self.hparams.image_size, scale=(0.8, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=1, always_apply=False, p=1.0), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, brightness_by_max=True, always_apply=False, p=0.5), A.GaussNoise(var_limit=(10.0, 50.0), mean=0, always_apply=False, p=0.5), #A.Rotate(limit=90, interpolation=1, border_mode=4, value=None, mask_value=None, always_apply=False, p=0.5), A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=45, interpolation=1, border_mode=4, value=None, mask_value=None, always_apply=False, p=0.5), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0) ]) valid_transform = A.Compose([ A.Resize(height=self.hparams.image_size, width=self.hparams.image_size, interpolation=1, always_apply=False, p=1.0), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0) ]) self.train_dataset = PANDASegClsDataset(train_df, self.hparams.data_dir, self.hparams.image_format, transform=train_transform) self.val_dataset = PANDASegClsDataset(val_df, self.hparams.data_dir, self.hparams.image_format, transform=valid_transform)
def get_pretrained_attentive_transforms(set_name, no_augment=False, augment_type="original"): mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] scale = 256 input_shape = 224 if no_augment: train_transform = transforms.Compose([ transforms.Resize((input_shape, input_shape)), transforms.ToTensor(), transforms.Normalize(mean, std), ]) test_transform = transforms.Compose([ transforms.Resize((input_shape, input_shape)), transforms.ToTensor(), transforms.Normalize(mean, std), ]) elif augment_type == "original": train_transform = transforms.Compose([ # resize every image to scale x scale pixels transforms.Resize(scale), # crop every image to input_shape x input_shape pixels. # This is needed for the inception model. # we first scale and then crop to have translation variation, i.e. buildings is not always in the centre. # In this way model is less sensitive to translation variation in the test set. transforms.RandomResizedCrop(input_shape), # flips image horizontally with a probability of 0.5 (i.e. half of images are flipped) transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), # rotates image randomly between -90 and 90 degrees transforms.RandomRotation(degrees=90), # converts image to type Torch and normalizes [0,1] transforms.ToTensor(), # normalizes [-1,1] transforms.Normalize(mean, std), ]) test_transform = transforms.Compose([ # for testing and validation we don't want any permutations of the image, solely cropping and normalizing transforms.Resize(scale), transforms.CenterCrop(input_shape), transforms.ToTensor(), transforms.Normalize(mean, std), ]) elif augment_type == "paper": train_transform = transforms.Compose([ transforms.Resize(input_shape), # # accidentally added rotation twice, one of the tests was run with this # transforms.RandomRotation(degrees=40), transforms.RandomAffine(degrees=40, translate=(0.2, 0.2), shear=11.5), transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop(input_shape, scale=(0.8, 1)), # converts image to type Torch and normalizes [0,1] transforms.ToTensor(), # normalizes [-1,1] transforms.Normalize(mean, std), ]) test_transform = transforms.Compose([ # for testing and validation we don't want any permutations of the image, solely cropping and normalizing transforms.Resize((input_shape, input_shape)), transforms.ToTensor(), transforms.Normalize(mean, std), ]) elif augment_type == "equalization": train_transform = A.Compose([ A.Resize(scale, scale), A.RandomResizedCrop(input_shape, input_shape), A.HorizontalFlip(), A.VerticalFlip(), A.RandomRotate90(), A.CLAHE(p=1), A.Normalize(mean=mean, std=std), ToTensorV2(), ]) test_transform = A.Compose([ A.Resize(scale, scale), A.CenterCrop(input_shape, input_shape), A.CLAHE(p=1), A.Normalize(mean=mean, std=std), ToTensorV2(), ]) return { "train": train_transform, "validation": test_transform, "test": test_transform, "inference": test_transform, }[set_name]
train_image_transforms = alb.Compose([ # alb.PadIfNeeded(min_height=width_size, min_width=width_size), alb.HorizontalFlip(p=0.5), alb.CLAHE(p=0.5), alb.OneOf([ alb.GridDistortion(num_steps=8, distort_limit=0.5, p=1.0), alb.OpticalDistortion( distort_limit=0.5, shift_limit=0.5, p=1.0, ), alb.ElasticTransform(alpha=3, p=1.0) ], p=0.5), alb.RandomResizedCrop(height=width_size, width=width_size, scale=(0.5, 1.5), p=0.5), alb.ShiftScaleRotate(shift_limit=0.025, scale_limit=0.1, rotate_limit=20, p=0.5), alb.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=20, val_shift_limit=20, p=0.5), alb.RandomBrightnessContrast(brightness_limit=(-0.15, 0.15), contrast_limit=(-0.15, 0.15), p=0.5), alb.CoarseDropout(max_holes=12, min_holes=6, max_height=int(width_size / 6),
This script is more or less the same as 1_train_model_i3d.py, except we use cutmix augmentation. I trained this model on my double-GPU set up, hence the larger batch size. """ FACE_MP4_DIR = "/home/james/Data/dfdc/faces2" BATCH_SIZE = 26 N_WORKERS = 8 DEVICE = "cuda" EPOCHS = 10 CUTMIX_ALPHA = 1 train_transforms = A.Compose([ A.RandomBrightnessContrast(p=0.25), A.RandomResizedCrop(height=224, width=224, scale=(0.5, 1), ratio=(0.9, 1.1)), A.HorizontalFlip() ]) test_transforms = A.Compose([A.CenterCrop(height=224, width=224)]) test_roots = ['45', '46', '47', '48', '49'] dataset_train = RebalancedVideoDataset(FACE_MP4_DIR, "train", label_per_frame=False, test_videos=test_roots, transforms=train_transforms, framewise_transforms=True, i3d_norm=True) dataset_test = RebalancedVideoDataset(FACE_MP4_DIR,
def get_aug(aug_type="val", size=256): """aug_type (str): one of `val`, `test`, `light`, `medium`, `hard` size (int): final size of the crop""" NORM_TO_TENSOR = albu.Compose([albu.Normalize(), ToTensor()]) CROP_AUG = albu.RandomResizedCrop(size, size, scale=(0.05, 0.4)) VAL_AUG = albu.Compose([ albu.CenterCrop(size, size), NORM_TO_TENSOR, ]) TEST_AUG = albu.Compose([ albu.Resize(size, size), NORM_TO_TENSOR, ]) LIGHT_AUG = albu.Compose([ CROP_AUG, albu.Flip(), albu.RandomRotate90(), NORM_TO_TENSOR, ]) MEDIUM_AUG = albu.Compose([ CROP_AUG, albu.Flip(), albu.ShiftScaleRotate(), # border_mode=cv2.BORDER_CONSTANT # Add occasion blur/sharpening albu.OneOf([albu.GaussianBlur(), albu.IAASharpen(), albu.NoOp()]), # Spatial-preserving augmentations: # albu.OneOf([albu.CoarseDropout(), albu.MaskDropout(max_objects=5), albu.NoOp()]), albu.GaussNoise(), albu.OneOf([ albu.RandomBrightnessContrast(), albu.CLAHE(), albu.HueSaturationValue(), albu.RGBShift(), albu.RandomGamma(), ]), # Weather effects albu.RandomFog(fog_coef_lower=0.01, fog_coef_upper=0.3, p=0.1), NORM_TO_TENSOR, ]) HARD_AUG = albu.Compose([ CROP_AUG, albu.RandomRotate90(), albu.Transpose(), albu.RandomGridShuffle(p=0.2), albu.ShiftScaleRotate(scale_limit=0.1, rotate_limit=45, p=0.2), albu.ElasticTransform(alpha_affine=5, p=0.2), # Add occasion blur albu.OneOf([ albu.GaussianBlur(), albu.GaussNoise(), albu.IAAAdditiveGaussianNoise(), albu.NoOp() ]), # D4 Augmentations albu.OneOf([albu.CoarseDropout(), albu.NoOp()]), # Spatial-preserving augmentations: albu.OneOf([ albu.RandomBrightnessContrast(brightness_by_max=True), albu.CLAHE(), albu.HueSaturationValue(), albu.RGBShift(), albu.RandomGamma(), albu.NoOp(), ]), # Weather effects albu.OneOf([ albu.RandomFog(fog_coef_lower=0.01, fog_coef_upper=0.3, p=0.1), albu.NoOp() ]), NORM_TO_TENSOR, ]) types = { "val": VAL_AUG, "test": TEST_AUG, "light": LIGHT_AUG, "medium": MEDIUM_AUG, "hard": HARD_AUG, } return types[aug_type]
# Model params width, height = int(os.environ.get("IMAGE_WIDTH")), int(os.environ.get("IMAGE_HEIGHT")) n_classes = int(os.environ.get("N_CLASSES")) # Learning params batch_size = int(os.environ.get("BATCH_SIZE")) lr = float(os.environ.get("LR")) epoch = int(os.environ.get("EPOCH")) label_names, class_index_dict = create_label_list_and_dict(label_file_path) ignore_indices = [255, ] # TODO Data augmentation train_transforms = AlbumentationsDetectionWrapperTransform([ A.HorizontalFlip(), A.RandomResizedCrop(width=width, height=height, scale=(0.8, 1.)), A.OneOf([ A.Blur(blur_limit=5), A.RandomBrightnessContrast(), A.RandomGamma(), ]), ToTensorV2(), ]) test_transforms = AlbumentationsDetectionWrapperTransform([ A.Resize(width=width, height=height), ToTensorV2(), ]) # dataset/dataloader if test_images_dir == "": test_ratio = float(os.environ.get("TEST_RATIO")) root_dataset = VOCDataset.create(image_dir_path=train_images_dir, annotation_dir_path=train_annotations_dir,
A.Normalize(mean=[0.485, 0.456, 0.406, 0], std=[0.229, 0.224, 0.225, 1]), ToTensorV2() ]) def __len__(self): # return 100 return len(self.image_list) def __getitem__(self, idx): file_name = self.image_list[idx] I = np.asarray(Image.open(file_name)) # h x w x 4 # print(I.shape, I.dtype) if self.transforms is not None: I = self.transforms(image=I)['image'] # h x w x 4 I = self.to_tensor(image=I)['image'] im = I[:3, :, :] gt = I[[-1], :, :] return im, gt train_transform = A.Compose([ A.HorizontalFlip(), A.VerticalFlip(), A.RandomRotate90(), A.RandomResizedCrop(224, 224, scale=(0.5, 1.0)) ]) test_transform = A.Compose([A.Resize(224, 224)])
def seed_everything(seed): random.seed(seed) np.random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True # for faster training, but not deterministic seed_everything(cfg['seed']) """# b. augmentations """ transforms_train = albumentations.Compose([ albumentations.RandomResizedCrop(cfg['image_size'], cfg['image_size'], scale=(0.9, 1), p=1), albumentations.HorizontalFlip(p=0.5), albumentations.ShiftScaleRotate(p=0.5), albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.5), albumentations.RandomBrightnessContrast(brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2), p=0.5), albumentations.CLAHE(clip_limit=(1,4), p=0.5), albumentations.OneOf([ albumentations.OpticalDistortion(distort_limit=1.0), albumentations.GridDistortion(num_steps=5, distort_limit=1.), albumentations.ElasticTransform(alpha=3), ], p=0.2), albumentations.OneOf([ albumentations.GaussNoise(var_limit=[10, 50]), albumentations.GaussianBlur(), albumentations.MotionBlur(), albumentations.MedianBlur(),
images. """ FACE_MP4_DIR = "E:/DFDC/faces2" BATCH_SIZE = 16 N_WORKERS = 8 DEVICE = "cuda" EPOCHS = 20 if __name__ == "__main__": train_transforms = A.Compose([ A.GaussianBlur(p=0.1), A.GaussNoise(var_limit=5 / 255, p=0.1), A.RandomBrightnessContrast(p=0.25), A.RandomResizedCrop(height=112, width=112, scale=(0.33, 1), ratio=(0.9, 1.1)), A.Normalize(), A.HorizontalFlip() ]) test_transforms = A.Compose([ A.Resize(128, 128), A.CenterCrop(height=112, width=112), A.Normalize() ]) test_roots = ['45', '46', '47', '48', '49'] dataset_train = RebalancedVideoDataset(FACE_MP4_DIR, "train", label_per_frame=False,
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu # suppress printing if not master if args.multiprocessing_distributed and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model print("=> creating model '{}'".format(args.arch)) model = moco.builder.MoCo(models.__dict__[args.arch], args.moco_dim, args.moco_k, args.moco_m, args.moco_t, args.mlp) #print(model) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) # comment out the following line for debugging raise NotImplementedError("Only DistributedDataParallel is supported.") else: # AllGather implementation (batch shuffle, queue update, etc.) in # this code only supports DistributedDataParallel. raise NotImplementedError("Only DistributedDataParallel is supported.") # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.aug_plus: # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709 augmentation = [ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] else: # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978 augmentation = A.Compose([ A.RandomResizedCrop(always_apply=False, p=1, height=224, width=224, scale=(0.08, 1.0), ratio=(0.75, 1.33333333), interpolation=2), A.ColorJitter(0.4, 0.4, 0.4, 0.1, False, 0.8), A.HorizontalFlip(p=0.5), A.ToGray(p=0.2), A.Solarize(p=0.2), A.CLAHE(p=0.2), A.RandomBrightness(p=0.2), A.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616), max_pixel_value=255.0, always_apply=True, p=1.0), ToTensorV2() ]) train_dataset = ImageFolder(traindir, moco.loader2.TwoCropsTransform(augmentation)) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best=False, filename='checkpoint_{:04d}.pth.tar'.format(epoch))