예제 #1
0
    def __init__(self, config):
        width = height = config.DATA.SCALE  # 300/512

        self.train_transform = A.Compose(  # Yolo
            [
                # A.RandomSizedCrop(min_max_height=(800, 1024), height=1024, width=1024, p=0.5),
                # A.RandomScale(scale_limit=0.3, p=1.0),  # 这个有问题
                C.RandomResize(scale_limit=0.3, p=1.0),  # 调节长宽比 [1/1.3, 1.3]
                A.OneOf(
                    [
                        A.Sequential(
                            [
                                A.SmallestMaxSize(min(height, width), p=1.0),
                                A.RandomCrop(
                                    height, width,
                                    p=1.0)  # 先resize到短边544,再crop成544×544
                            ],
                            p=0.4),
                        A.LongestMaxSize(max(height, width),
                                         p=0.6),  #  resize到长边544
                    ],
                    p=1.0),

                # A.LongestMaxSize(max(height, width), p=1.0),
                A.OneOf([
                    A.HueSaturationValue(hue_shift_limit=0.4,
                                         sat_shift_limit=0.4,
                                         val_shift_limit=0.4,
                                         p=0.9),
                    A.RandomBrightnessContrast(
                        brightness_limit=0.3, contrast_limit=0.3, p=0.9),
                ],
                        p=0.9),
                # A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=(0.5,0.5,0.5), p=1.0),
                C.RandomPad(min_height=height,
                            min_width=width,
                            border_mode=0,
                            value=(123 / 255, 117 / 255, 104 / 255),
                            p=1.0),
                A.HorizontalFlip(p=0.5),
                ToTensorV2(p=1.0),
            ],
            p=1.0,
            bbox_params=A.BboxParams(format='pascal_voc',
                                     min_area=0,
                                     min_visibility=0,
                                     label_fields=['labels']),
        )

        self.val_transform = A.Compose([
            A.Resize(height=height, width=width, p=1.0),
            ToTensorV2(p=1.0),
        ],
                                       p=1.0,
                                       bbox_params=A.BboxParams(
                                           format='pascal_voc',
                                           min_area=0,
                                           min_visibility=0,
                                           label_fields=['labels']))
예제 #2
0
def make_coco_transforms(image_set='train', image_size=(640, 640)):
    normalize = A.Sequential([
        A.Normalize(), #imagenet norms default
        ToTensorV2()
    ])

    if image_set == 'train':
        transforms = A.Compose([
            A.PadIfNeeded(*image_size, border_mode=0), #pad with zeros
            A.RandomResizedCrop(*image_size),
            A.HorizontalFlip(),
            normalize
        ])

    elif image_set == 'val':
        transforms = A.Compose([
            A.Resize(*image_size),
            normalize
        ])

    else:
        raise ValueError(f'{image_set} not recognized!')

    return transforms
예제 #3
0
def train_albu_augment(record):

    verbose = record.get('verbose', False)
    image_size = record['image_size']

    image = record['image']
    mask = record['mask']

    if verbose:
        pipeline = albu.ReplayCompose
    else:
        pipeline = albu.Compose

    aug = pipeline([
        albu.Sequential([
            albu.GaussNoise(0.05, p=1),
            albu.augmentations.transforms.Blur(
                blur_limit=5, always_apply=False, p=1),
        ],
                        p=0.001),
        albu.OneOf([
            albu.ElasticTransform(p=0.5),
            albu.IAAPiecewiseAffine(p=0.5),
            albu.OpticalDistortion(p=1)
        ],
                   p=0.001),
        albu.OneOf([
            albu.RandomBrightnessContrast(brightness_limit=0.2,
                                          contrast_limit=0.2,
                                          brightness_by_max=True,
                                          always_apply=False,
                                          p=1),
            albu.RandomBrightnessContrast(brightness_limit=(-0.2, 0.6),
                                          contrast_limit=.2,
                                          brightness_by_max=True,
                                          always_apply=False,
                                          p=1),
            albu.augmentations.transforms.ColorJitter(brightness=0.2,
                                                      contrast=0.2,
                                                      saturation=0.1,
                                                      hue=0.1,
                                                      always_apply=False,
                                                      p=1),
            albu.RandomGamma(p=0.5)
        ],
                   p=0.5),
        albu.OneOf([
            albu.GaussNoise(0.02, p=.5),
            albu.IAAAffine(p=.5),
        ],
                   p=.25),
        albu.OneOf([
            albu.augmentations.transforms.Blur(
                blur_limit=15, always_apply=False, p=0.25),
            albu.augmentations.transforms.Blur(
                blur_limit=3, always_apply=False, p=0.5)
        ],
                   p=0.5),
        albu.RandomRotate90(p=.5),
        albu.HorizontalFlip(p=.5),
        albu.VerticalFlip(p=.5),
        albu.RandomCrop(width=image_size, height=image_size),
    ])
    data = aug(image=image, mask=mask)
    record['image'] = data['image']
    record['mask'] = data['mask']

    if verbose:
        for transformation in data['replay']['transforms']:
            if not isinstance(transformation, dict):
                print('not a dict')
                pass
            elif transformation.get('applied', False):
                print(30 * '-')
                if 'OneOf' in transformation['__class_fullname__']:
                    print(30 * '=')
                    for _trans in transformation['transforms']:
                        if not _trans.get('applied', False): continue
                        _name = _trans['__class_fullname__']
                        if 'Flip' in _name: continue

                        print(_trans['__class_fullname__'])
                        for k, v in _trans.items():
                            if k in [
                                    '__class_fullname__', 'applied',
                                    'always_apply'
                            ]:
                                continue
                            print(f"{k}: {v}")

                else:
                    _name = transformation['__class_fullname__']
                    if 'Flip' in _name: continue
                    print(_name)
                    for k, v in transformation.items():
                        if k in [
                                '__class_fullname__', 'applied', 'always_apply'
                        ]:
                            continue
                        print(f"{k}: {v}")

    return record
예제 #4
0
def aug(source, images_output_path, size):
    images_path = images_output_path + "/JPEGImages/"
    os.makedirs(images_path, exist_ok=True)

    xml_path = images_output_path + "/Annotations/"
    os.makedirs(xml_path, exist_ok=True)

    transform = A.Compose(
        [
            # A.CLAHE(),
            # A.RandomScale(scale_limit=[0.5, 1]),
            # A.RandomCrop(width=450, height=450),
            A.OneOf([
                A.Sequential(
                    [A.RandomCrop(width=800, height=600),
                     A.RandomRotate90()]),
                # A.Sequential(
                #     [
                #         A.RandomSizedBBoxSafeCrop(width=800, height=600),
                #         A.RandomRotate90(),
                #     ]
                # ),
                A.Sequential([
                    A.RandomScale(scale_limit=0.2),
                    A.Flip(),
                    A.RandomRotate90(),
                ],
                             # p=0.3,
                             ),
                A.Sequential(
                    [
                        A.Rotate(),
                    ],
                    p=0.3,
                ),
            ])
            # A.Transpose(),
            # A.Resize(0.9, 0.9),
            # A.Blur(blur_limit=3),
            # A.OpticalDistortion(),
            # A.GridDistortion(),
            # A.HueSaturationValue(),
        ],
        bbox_params=A.BboxParams(format="pascal_voc",
                                 min_visibility=0.5,
                                 label_fields=["class_labels"]),
    )

    rows = []
    random.seed(42)

    images_index = 1
    for name, group in source.groupby("filename"):
        row = group.iloc[0]
        print(row["filename"])
        image = cv2.imread(row["filename"])
        same = set()

        bboxes = []
        class_labels = []

        aleady_box = {}
        for _, vrow in group.iterrows():
            bboxes.append(
                [vrow["xmin"], vrow["ymin"], vrow["xmax"], vrow["ymax"]])
            class_labels.append(vrow["class"])
            aleady_box[vrow["class"]] = set()
        all_count = 0
        print(aleady_box)
        while int(all_count) < size:
            augmented = transform(
                image=image,
                bboxes=bboxes,
                class_labels=class_labels,
            )
            file_name = f"{images_index}.jpg"

            if len(augmented["bboxes"]) < 1:
                continue

            writer = Writer(file_name, augmented["image"].shape[1],
                            augmented["image"].shape[0])

            findbox = False
            for index, bbox in enumerate(augmented["bboxes"]):
                x_min, y_min, x_max, y_max = map(lambda v: int(v), bbox[:4])

                same.add(x_min)
                rows.append({
                    "filename": f"{images_path}/{file_name}",
                    "width": augmented["image"].shape[1],
                    "height": augmented["image"].shape[0],
                    "class": augmented["class_labels"][index],
                    "xmin": x_min,
                    "ymin": y_min,
                    "xmax": x_max,
                    "ymax": y_max,
                    "imageindex": str(images_index),
                })
                writer.addObject(augmented["class_labels"][index], x_min,
                                 y_min, x_max, y_max)
                if len(aleady_box[augmented["class_labels"][index]]) >= size:

                    continue
                aleady_box[augmented["class_labels"][index]].add(x_min)
                findbox = True
            if findbox:
                cv2.imwrite(f"{images_path}/{file_name}", augmented["image"])
                writer.save(f"{xml_path}/{images_index}.xml")
                images_index += 1
                print(aleady_box)

            all_count = sum([min(len(v), size)
                             for k, v in aleady_box.items()]) / len(aleady_box)
    df = pd.DataFrame(rows)
    return df
예제 #5
0
bbox_params = A.BboxParams(format='pascal_voc', min_visibility=0.3)
train_transforms = A.Compose([
    A.Resize(height=img_size[0], width=img_size[1], p=1.0),
    A.HorizontalFlip(p=0.5),
    # A.OneOf([
    #     A.Sequential([
    #         A.Resize(height=img_size[0], width=img_size[1], p=1.0),
    #     ], p=1.0),
    #     A.Sequential([
    #         A.RandomSizedBBoxSafeCrop(height=img_size[0], width=img_size[1], p=1.0),
    #     ], p=1.0)
    # ], p=1.0),

    A.OneOf([
        A.Sequential([
            A.GaussNoise(var_limit=(100, 150), p=0.5),
            A.MotionBlur(blur_limit=17, p=0.5)
        ], p=1.0),
        A.Sequential([
            A.GaussNoise(var_limit=(100, 150), p=0.5),
            A.MotionBlur(blur_limit=17, p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=0, p=0.5),
        ], p=1.0),
        A.Sequential([
            A.GaussNoise(var_limit=(100, 150), p=0.5),
            A.MotionBlur(blur_limit=17, p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=0, p=0.5),
            A.ChannelShuffle(p=0.5),
            # A.ShiftScaleRotate(shift_limit=0.1, scale_limit=(-0.15, 0.15), rotate_limit=30, p=0.5,
            #                    border_mode=cv2.BORDER_CONSTANT, value=0),
def train_function(gpu, world_size, node_rank, gpus, fold_number, group_name):
    import torch.multiprocessing
    torch.multiprocessing.set_sharing_strategy('file_system')

    torch.manual_seed(25)
    np.random.seed(25)

    rank = node_rank * gpus + gpu
    dist.init_process_group(
        backend='nccl',
        init_method='env://',
        world_size=world_size,
        rank=rank
    )

    device = torch.device("cuda:{}".format(gpu) if torch.cuda.is_available() else "cpu")

    batch_size = 64
    width_size = 416
    init_lr = 1e-4
    end_lr = 1e-6
    n_epochs = 20
    emb_size = 512
    margin = 0.5
    dropout = 0.0
    iters_to_accumulate = 1

    if rank == 0:
        wandb.init(project='shopee_effnet0', group=group_name, job_type=str(fold_number))

        checkpoints_dir_name = 'effnet0_{}_{}_{}'.format(width_size, dropout, group_name)
        os.makedirs(checkpoints_dir_name, exist_ok=True)

        wandb.config.model_name = checkpoints_dir_name
        wandb.config.batch_size = batch_size
        wandb.config.width_size = width_size
        wandb.config.init_lr = init_lr
        wandb.config.n_epochs = n_epochs
        wandb.config.emb_size = emb_size
        wandb.config.dropout = dropout
        wandb.config.iters_to_accumulate = iters_to_accumulate
        wandb.config.optimizer = 'adam'
        wandb.config.scheduler = 'ShopeeScheduler'

    df = pd.read_csv('../../dataset/reliable_validation_tm.csv')
    train_df = df[df['fold_group'] != fold_number]
    train_transforms = alb.Compose([
        alb.RandomResizedCrop(width_size, width_size),
        alb.ShiftScaleRotate(shift_limit=0.1, rotate_limit=30),
        alb.HorizontalFlip(),
        alb.OneOf([
            alb.Sequential([
                alb.HueSaturationValue(hue_shift_limit=50),
                alb.RandomBrightnessContrast(),
            ]),
            alb.FancyPCA(),
            alb.ChannelDropout(),
            alb.ChannelShuffle(),
            alb.RGBShift()
        ]),
        alb.CoarseDropout(max_height=int(width_size*0.1), max_width=int(width_size*0.1)),
        alb.OneOf([
            alb.ElasticTransform(),
            alb.OpticalDistortion(),
            alb.GridDistortion()
        ]),
        alb.Resize(width_size, width_size),
        alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    train_set = ImageDataset(train_df, train_df, '../../dataset/train_images', train_transforms)
    sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True)
    train_dataloader = DataLoader(train_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4,
                                  sampler=sampler)

    # valid_df = df[df['fold_strat'] == fold_number]
    valid_transforms = alb.Compose([
        alb.Resize(width_size, width_size),
        alb.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    # valid_set = ImageDataset(train_df, valid_df, '../../dataset/train_images', valid_transforms)
    # valid_dataloader = DataLoader(valid_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4)

    test_df = df[df['fold_group'] == fold_number]
    test_set = ImageDataset(test_df, test_df, '../../dataset/train_images', valid_transforms)
    test_dataloader = DataLoader(test_set, batch_size=batch_size // world_size, shuffle=False, num_workers=4)

    model = EfficientNetArcFace(emb_size, train_df['label_group'].nunique(), device, dropout=dropout,
                                backbone='tf_efficientnet_b0_ns', pretrained=True, margin=margin, is_amp=True)
    model = SyncBatchNorm.convert_sync_batchnorm(model)
    model.to(device)
    model = DistributedDataParallel(model, device_ids=[gpu])

    scaler = GradScaler()
    criterion = CrossEntropyLoss()
    # criterion = LabelSmoothLoss(smoothing=0.1)
    optimizer = optim.Adam(model.parameters(), lr=init_lr)
    # scheduler = CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=end_lr,
    #                               last_epoch=-1)
    # scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=2000, T_mult=1,
    #                                         eta_min=end_lr, last_epoch=-1)
    scheduler = ShopeeScheduler(optimizer, lr_start=init_lr,
                                lr_max=init_lr*batch_size, lr_min=end_lr)

    for epoch in range(n_epochs):
        train_loss, train_duration, train_f1 = train_one_epoch(
            model, train_dataloader, optimizer, criterion, device, scaler,
            scheduler=None, iters_to_accumulate=iters_to_accumulate)
        scheduler.step()

        if rank == 0:
            # valid_loss, valid_duration, valid_f1 = evaluate(model, valid_dataloader, criterion, device)
            embeddings = get_embeddings(model, test_dataloader, device)
            embeddings_f1 = validate_embeddings_f1(embeddings, test_df)

            wandb.log({'train_loss': train_loss, 'train_f1': train_f1,
                       'embeddings_f1': embeddings_f1, 'epoch': epoch})

            filename = '{}_foldnum{}_epoch{}_train_loss{}_f1{}'.format(
                checkpoints_dir_name, fold_number+1, epoch+1,
                round(train_loss, 3), round(embeddings_f1, 3))
            torch.save(model.module.state_dict(), os.path.join(checkpoints_dir_name, '{}.pth'.format(filename)))
            # np.savez_compressed(os.path.join(checkpoints_dir_name, '{}.npz'.format(filename)), embeddings=embeddings)

            print('FOLD NUMBER %d\tEPOCH %d:\t'
                  'TRAIN [duration %.3f sec, loss: %.3f, avg f1: %.3f]\t'
                  'VALID EMBEDDINGS [avg f1: %.3f]\tCurrent time %s' %
                  (fold_number + 1, epoch + 1, train_duration,
                   train_loss, train_f1, embeddings_f1,
                   str(datetime.now(timezone('Europe/Moscow')))))

    if rank == 0:
        wandb.finish()
예제 #7
0
 def create_transform(self, input_dtype, p):
     return A.Sequential([stage.create_transform(input_dtype) for stage in self.stages], p=p)