def augmentation(mode, target_size, prob=0.5, aug_m=2):
    '''
    description: augmentation
    mode: 'train' 'test'
    target_size: int or list, the shape of image ,
    aug_m: Strength of transform
    '''
    high_p = prob
    low_p = high_p / 2.0
    M = aug_m
    first_size = [int(x / 0.7) for x in target_size]

    if mode == 'train':
        return composition.Compose([
            transforms.Resize(first_size[0], first_size[1], interpolation=3),
            transforms.Flip(p=0.5),
            composition.OneOf([
                RandomCenterCut(scale=0.1 * M),
                transforms.ShiftScaleRotate(shift_limit=0.05 * M,
                                            scale_limit=0.1 * M,
                                            rotate_limit=180,
                                            border_mode=cv2.BORDER_CONSTANT,
                                            value=0),
                albumentations.imgaug.transforms.IAAAffine(
                    shear=(-10 * M, 10 * M), mode='constant')
            ],
                              p=high_p),
            transforms.RandomBrightnessContrast(
                brightness_limit=0.1 * M, contrast_limit=0.03 * M, p=high_p),
            transforms.HueSaturationValue(hue_shift_limit=5 * M,
                                          sat_shift_limit=15 * M,
                                          val_shift_limit=10 * M,
                                          p=high_p),
            transforms.OpticalDistortion(distort_limit=0.03 * M,
                                         shift_limit=0,
                                         border_mode=cv2.BORDER_CONSTANT,
                                         value=0,
                                         p=low_p),
            composition.OneOf([
                transforms.Blur(blur_limit=7),
                albumentations.imgaug.transforms.IAASharpen(),
                transforms.GaussNoise(var_limit=(2.0, 10.0), mean=0),
                transforms.ISONoise()
            ],
                              p=low_p),
            transforms.Resize(target_size[0], target_size[1], interpolation=3),
            transforms.Normalize(mean=(0.5, 0.5, 0.5),
                                 std=(0.5, 0.5, 0.5),
                                 max_pixel_value=255.0)
        ],
                                   p=1)

    else:
        return composition.Compose([
            transforms.Resize(target_size[0], target_size[1], interpolation=3),
            transforms.Normalize(mean=(0.5, 0.5, 0.5),
                                 std=(0.5, 0.5, 0.5),
                                 max_pixel_value=255.0)
        ],
                                   p=1)
Example #2
0
def get_presize_combine_transforms_V4():
    transforms_presize = A.Compose([
        transforms.PadIfNeeded(600, 800),
        geometric.Perspective(
            scale=[0, .1],
            pad_mode=cv2.BORDER_REFLECT,
            interpolation=cv2.INTER_AREA, p = .3),
        transforms.Flip(),
        geometric.ShiftScaleRotate(interpolation=cv2.INTER_LANCZOS4, p = 0.95, scale_limit=0.0),
        crops.RandomResizedCrop(
            TARGET_SIZE, TARGET_SIZE,
            scale=(config['rrc_scale_min'], config['rrc_scale_max']),
            ratio=(.70, 1.4),
            interpolation=cv2.INTER_CUBIC,
            p=1.0),
        transforms.Transpose()
        #rotate.Rotate(interpolation=cv2.INTER_LANCZOS4, p = 0.99),
    ])
    
    transforms_postsize = A.Compose([
        #imgaug.IAAPiecewiseAffine(),

        transforms.CoarseDropout(),
        transforms.CLAHE(p=.1),
        transforms.RandomToneCurve(scale=.1, p=0.2),
        transforms.RandomBrightnessContrast(
            brightness_limit=.1, 
            contrast_limit=0.4,
            p=.8),
        transforms.HueSaturationValue(
            hue_shift_limit=20, 
            sat_shift_limit=50,
            val_shift_limit=0, 
            p=0.5),
        transforms.Equalize(p=0.05),
        transforms.FancyPCA(p=0.05),
        transforms.RandomGridShuffle(p=0.1),
        A.OneOf([
                transforms.MotionBlur(blur_limit=(3, 9)),
                transforms.GaussianBlur(),
                transforms.MedianBlur()
            ], p=0.1),
        transforms.ISONoise(p=.2),
        transforms.GaussNoise(var_limit=127., p=.3),
        A.OneOf([
            transforms.GridDistortion(interpolation=cv2.INTER_AREA, distort_limit=[0.7, 0.7], p=0.5),
            transforms.OpticalDistortion(interpolation=cv2.INTER_AREA, p=.3),
        ], p=.3),
        geometric.ElasticTransform(alpha=4, sigma=4, alpha_affine=4, interpolation=cv2.INTER_AREA, p=0.3),
        transforms.CoarseDropout(),
        transforms.Normalize(),
        ToTensorV2()
    ])
    return transforms_presize, transforms_postsize
Example #3
0
def get_presize_combine_tune_transforms():
    transforms_presize = A.Compose([
        transforms.Transpose(),
        transforms.Flip(),
        #transforms.PadIfNeeded(600, 800),
        crops.RandomResizedCrop(
            TARGET_SIZE, TARGET_SIZE,
            scale=(.75, 1),
            interpolation=cv2.INTER_CUBIC,
            p=1.0),
        rotate.Rotate(interpolation=cv2.INTER_LANCZOS4, p = 0.99),
    ])
    
    transforms_postsize = A.Compose([
        transforms.CoarseDropout(),
        # transforms.CLAHE(p=.1),
        transforms.RandomToneCurve(scale=.1),
        transforms.RandomBrightnessContrast(
            brightness_limit=.1, 
            contrast_limit=0.2,
            p=.7),
        transforms.HueSaturationValue(
            hue_shift_limit=20, 
            sat_shift_limit=60,
            val_shift_limit=0, 
            p=0.6),
        #transforms.Equalize(p=0.1),
        #transforms.FancyPCA(p=0.05),
        #transforms.RandomGridShuffle(p=0.1),
        #A.OneOf([
        #        transforms.MotionBlur(blur_limit=(3, 9)),
        #        transforms.GaussianBlur(),
        #        transforms.MedianBlur()
        #    ], p=0.2),
        transforms.ISONoise(p=.3),
        transforms.GaussNoise(var_limit=255., p=.3),
        #A.OneOf([
        #     transforms.GridDistortion(interpolation=cv2.INTER_AREA, distort_limit=[0.7, 0.7], p=0.5),
        #     transforms.OpticalDistortion(interpolation=cv2.INTER_AREA, p=.3),
        # ], p=.3),
        geometric.ElasticTransform(alpha=4, sigma=100, alpha_affine=100, interpolation=cv2.INTER_AREA, p=0.3),
        transforms.CoarseDropout(),
        transforms.Normalize(),
        ToTensorV2()
    ])
    return transforms_presize, transforms_postsize
Example #4
0
def get_train_transforms():
    return A.Compose([
        transforms.PadIfNeeded(600, 800),
        geometric.ShiftScaleRotate(interpolation=cv2.INTER_LANCZOS4, p = 0.99, scale_limit=0.8),
        geometric.Perspective(pad_mode=cv2.BORDER_REFLECT,interpolation=cv2.INTER_AREA),
        crops.RandomResizedCrop(
            TARGET_SIZE, TARGET_SIZE,
            scale=(config['rrc_scale_min'], config['rrc_scale_max']),
            interpolation=cv2.INTER_CUBIC,
            p=1.0),
        transforms.Transpose(),
        transforms.Flip(),
        transforms.CoarseDropout(),
        transforms.CLAHE(p=.1),
        transforms.RandomToneCurve(scale=.1),
        transforms.RandomBrightnessContrast(
            brightness_limit=.1, 
            contrast_limit=0.3,
            p=.7),
        transforms.HueSaturationValue(
            hue_shift_limit=20, 
            sat_shift_limit=60,
            val_shift_limit=0, 
            p=0.6),
        transforms.RandomGridShuffle(p=0.1),
        A.OneOf([
                transforms.MotionBlur(blur_limit=(3, 9)),
                transforms.GaussianBlur(),
                transforms.MedianBlur()
            ], p=0.2),
        transforms.ISONoise(p=.3),
        transforms.GaussNoise(var_limit=255., p=.3),
        A.OneOf([
            transforms.GridDistortion(interpolation=cv2.INTER_AREA, distort_limit=[0.7, 0.7], p=0.5),
            transforms.OpticalDistortion(interpolation=cv2.INTER_AREA, p=.3),
        ], p=.3),
        geometric.ElasticTransform(alpha=4, sigma=100, alpha_affine=100, interpolation=cv2.INTER_AREA, p=0.3),
        transforms.CoarseDropout(),
        transforms.Normalize(),
        ToTensorV2()
    ])
def main():
    config = vars(parse_args())

    if config['name'] is None:
        config['name'] = '%s_%s' % (config['arch'],
                                    datetime.now().strftime('%m%d%H'))

    config['num_filters'] = [int(n) for n in config['num_filters'].split(',')]

    if not os.path.exists('models/detection/%s' % config['name']):
        os.makedirs('models/detection/%s' % config['name'])

    if config['resume']:
        with open('models/detection/%s/config.yml' % config['name'], 'r') as f:
            config = yaml.load(f, Loader=yaml.FullLoader)
        config['resume'] = True

    with open('models/detection/%s/config.yml' % config['name'], 'w') as f:
        yaml.dump(config, f)

    print('-' * 20)
    for key in config.keys():
        print('- %s: %s' % (key, str(config[key])))
    print('-' * 20)

    cudnn.benchmark = True

    df = pd.read_csv('inputs/train.csv')
    img_paths = np.array('inputs/train_images/' + df['ImageId'].values +
                         '.jpg')
    mask_paths = np.array('inputs/train_masks/' + df['ImageId'].values +
                          '.jpg')
    labels = np.array(
        [convert_str_to_labels(s) for s in df['PredictionString']])

    test_img_paths = None
    test_mask_paths = None
    test_outputs = None
    if config['pseudo_label'] is not None:
        test_df = pd.read_csv('inputs/sample_submission.csv')
        test_img_paths = np.array('inputs/test_images/' +
                                  test_df['ImageId'].values + '.jpg')
        test_mask_paths = np.array('inputs/test_masks/' +
                                   test_df['ImageId'].values + '.jpg')
        ext = os.path.splitext(config['pseudo_label'])[1]
        if ext == '.pth':
            test_outputs = torch.load('outputs/raw/test/%s' %
                                      config['pseudo_label'])
        elif ext == '.csv':
            test_labels = pd.read_csv('outputs/submissions/test/%s' %
                                      config['pseudo_label'])
            null_idx = test_labels.isnull().any(axis=1)
            test_img_paths = test_img_paths[~null_idx]
            test_mask_paths = test_mask_paths[~null_idx]
            test_labels = test_labels.dropna()
            test_labels = np.array([
                convert_str_to_labels(
                    s, names=['pitch', 'yaw', 'roll', 'x', 'y', 'z', 'score'])
                for s in test_labels['PredictionString']
            ])
            print(test_labels)
        else:
            raise NotImplementedError

    if config['resume']:
        checkpoint = torch.load('models/detection/%s/checkpoint.pth.tar' %
                                config['name'])

    heads = OrderedDict([
        ('hm', 1),
        ('reg', 2),
        ('depth', 1),
    ])

    if config['rot'] == 'eular':
        heads['eular'] = 3
    elif config['rot'] == 'trig':
        heads['trig'] = 6
    elif config['rot'] == 'quat':
        heads['quat'] = 4
    else:
        raise NotImplementedError

    if config['wh']:
        heads['wh'] = 2

    criterion = OrderedDict()
    for head in heads.keys():
        criterion[head] = losses.__dict__[config[head + '_loss']]().cuda()

    train_transform = Compose([
        transforms.ShiftScaleRotate(shift_limit=config['shift_limit'],
                                    scale_limit=0,
                                    rotate_limit=0,
                                    border_mode=cv2.BORDER_CONSTANT,
                                    value=0,
                                    p=config['shift_p'])
        if config['shift'] else NoOp(),
        OneOf([
            transforms.HueSaturationValue(hue_shift_limit=config['hue_limit'],
                                          sat_shift_limit=config['sat_limit'],
                                          val_shift_limit=config['val_limit'],
                                          p=config['hsv_p'])
            if config['hsv'] else NoOp(),
            transforms.RandomBrightness(
                limit=config['brightness_limit'],
                p=config['brightness_p'],
            ) if config['brightness'] else NoOp(),
            transforms.RandomContrast(
                limit=config['contrast_limit'],
                p=config['contrast_p'],
            ) if config['contrast'] else NoOp(),
        ],
              p=1),
        transforms.ISONoise(p=config['iso_noise_p'], )
        if config['iso_noise'] else NoOp(),
        transforms.CLAHE(p=config['clahe_p'], ) if config['clahe'] else NoOp(),
    ],
                              keypoint_params=KeypointParams(
                                  format='xy', remove_invisible=False))

    val_transform = None

    folds = []
    best_losses = []
    # best_scores = []

    kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41)
    for fold, (train_idx, val_idx) in enumerate(kf.split(img_paths)):
        print('Fold [%d/%d]' % (fold + 1, config['n_splits']))

        if (config['resume'] and fold < checkpoint['fold'] - 1) or (
                not config['resume']
                and os.path.exists('models/%s/model_%d.pth' %
                                   (config['name'], fold + 1))):
            log = pd.read_csv('models/detection/%s/log_%d.csv' %
                              (config['name'], fold + 1))
            best_loss = log.loc[log['val_loss'].values.argmin(), 'val_loss']
            # best_loss, best_score = log.loc[log['val_loss'].values.argmin(), ['val_loss', 'val_score']].values
            folds.append(str(fold + 1))
            best_losses.append(best_loss)
            # best_scores.append(best_score)
            continue

        train_img_paths, val_img_paths = img_paths[train_idx], img_paths[
            val_idx]
        train_mask_paths, val_mask_paths = mask_paths[train_idx], mask_paths[
            val_idx]
        train_labels, val_labels = labels[train_idx], labels[val_idx]

        if config['pseudo_label'] is not None:
            train_img_paths = np.hstack((train_img_paths, test_img_paths))
            train_mask_paths = np.hstack((train_mask_paths, test_mask_paths))
            train_labels = np.hstack((train_labels, test_labels))

        # train
        train_set = Dataset(
            train_img_paths,
            train_mask_paths,
            train_labels,
            input_w=config['input_w'],
            input_h=config['input_h'],
            transform=train_transform,
            lhalf=config['lhalf'],
            hflip=config['hflip_p'] if config['hflip'] else 0,
            scale=config['scale_p'] if config['scale'] else 0,
            scale_limit=config['scale_limit'],
            # test_img_paths=test_img_paths,
            # test_mask_paths=test_mask_paths,
            # test_outputs=test_outputs,
        )
        train_loader = torch.utils.data.DataLoader(
            train_set,
            batch_size=config['batch_size'],
            shuffle=True,
            num_workers=config['num_workers'],
            # pin_memory=True,
        )

        val_set = Dataset(val_img_paths,
                          val_mask_paths,
                          val_labels,
                          input_w=config['input_w'],
                          input_h=config['input_h'],
                          transform=val_transform,
                          lhalf=config['lhalf'])
        val_loader = torch.utils.data.DataLoader(
            val_set,
            batch_size=config['batch_size'],
            shuffle=False,
            num_workers=config['num_workers'],
            # pin_memory=True,
        )

        # create model
        model = get_model(config['arch'],
                          heads=heads,
                          head_conv=config['head_conv'],
                          num_filters=config['num_filters'],
                          dcn=config['dcn'],
                          gn=config['gn'],
                          ws=config['ws'],
                          freeze_bn=config['freeze_bn'])
        model = model.cuda()

        if config['load_model'] is not None:
            model.load_state_dict(
                torch.load('models/detection/%s/model_%d.pth' %
                           (config['load_model'], fold + 1)))

        params = filter(lambda p: p.requires_grad, model.parameters())
        if config['optimizer'] == 'Adam':
            optimizer = optim.Adam(params,
                                   lr=config['lr'],
                                   weight_decay=config['weight_decay'])
        elif config['optimizer'] == 'AdamW':
            optimizer = optim.AdamW(params,
                                    lr=config['lr'],
                                    weight_decay=config['weight_decay'])
        elif config['optimizer'] == 'RAdam':
            optimizer = RAdam(params,
                              lr=config['lr'],
                              weight_decay=config['weight_decay'])
        elif config['optimizer'] == 'SGD':
            optimizer = optim.SGD(params,
                                  lr=config['lr'],
                                  momentum=config['momentum'],
                                  nesterov=config['nesterov'],
                                  weight_decay=config['weight_decay'])
        else:
            raise NotImplementedError

        if config['apex']:
            amp.initialize(model, optimizer, opt_level='O1')

        if config['scheduler'] == 'CosineAnnealingLR':
            scheduler = lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=config['epochs'], eta_min=config['min_lr'])
        elif config['scheduler'] == 'ReduceLROnPlateau':
            scheduler = lr_scheduler.ReduceLROnPlateau(
                optimizer,
                factor=config['factor'],
                patience=config['patience'],
                verbose=1,
                min_lr=config['min_lr'])
        elif config['scheduler'] == 'MultiStepLR':
            scheduler = lr_scheduler.MultiStepLR(
                optimizer,
                milestones=[int(e) for e in config['milestones'].split(',')],
                gamma=config['gamma'])
        else:
            raise NotImplementedError

        log = {
            'epoch': [],
            'loss': [],
            # 'score': [],
            'val_loss': [],
            # 'val_score': [],
        }

        best_loss = float('inf')
        # best_score = float('inf')

        start_epoch = 0

        if config['resume'] and fold == checkpoint['fold'] - 1:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            scheduler.load_state_dict(checkpoint['scheduler'])
            start_epoch = checkpoint['epoch']
            log = pd.read_csv(
                'models/detection/%s/log_%d.csv' %
                (config['name'], fold + 1)).to_dict(orient='list')
            best_loss = checkpoint['best_loss']

        for epoch in range(start_epoch, config['epochs']):
            print('Epoch [%d/%d]' % (epoch + 1, config['epochs']))

            # train for one epoch
            train_loss = train(config, heads, train_loader, model, criterion,
                               optimizer, epoch)
            # evaluate on validation set
            val_loss = validate(config, heads, val_loader, model, criterion)

            if config['scheduler'] == 'CosineAnnealingLR':
                scheduler.step()
            elif config['scheduler'] == 'ReduceLROnPlateau':
                scheduler.step(val_loss)

            print('loss %.4f - val_loss %.4f' % (train_loss, val_loss))
            # print('loss %.4f - score %.4f - val_loss %.4f - val_score %.4f'
            #       % (train_loss, train_score, val_loss, val_score))

            log['epoch'].append(epoch)
            log['loss'].append(train_loss)
            # log['score'].append(train_score)
            log['val_loss'].append(val_loss)
            # log['val_score'].append(val_score)

            pd.DataFrame(log).to_csv('models/detection/%s/log_%d.csv' %
                                     (config['name'], fold + 1),
                                     index=False)

            if val_loss < best_loss:
                torch.save(
                    model.state_dict(), 'models/detection/%s/model_%d.pth' %
                    (config['name'], fold + 1))
                best_loss = val_loss
                # best_score = val_score
                print("=> saved best model")

            state = {
                'fold': fold + 1,
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_loss': best_loss,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
            }
            torch.save(
                state,
                'models/detection/%s/checkpoint.pth.tar' % config['name'])

        print('val_loss:  %f' % best_loss)
        # print('val_score: %f' % best_score)

        folds.append(str(fold + 1))
        best_losses.append(best_loss)
        # best_scores.append(best_score)

        results = pd.DataFrame({
            'fold':
            folds + ['mean'],
            'best_loss':
            best_losses + [np.mean(best_losses)],
            # 'best_score': best_scores + [np.mean(best_scores)],
        })

        print(results)
        results.to_csv('models/detection/%s/results.csv' % config['name'],
                       index=False)

        del model
        torch.cuda.empty_cache()

        del train_set, train_loader
        del val_set, val_loader
        gc.collect()

        if not config['cv']:
            break
Example #6
0
def main():
    config = vars(parse_args())

    if config['name'] is None:
        config['name'] = '%s_%s' % (config['arch'], datetime.now().strftime('%m%d%H'))

    if not os.path.exists('models/pose/%s' % config['name']):
        os.makedirs('models/pose/%s' % config['name'])

    if config['resume']:
        with open('models/pose/%s/config.yml' % config['name'], 'r') as f:
            config = yaml.load(f, Loader=yaml.FullLoader)
        config['resume'] = True

    with open('models/pose/%s/config.yml' % config['name'], 'w') as f:
        yaml.dump(config, f)

    print('-'*20)
    for key in config.keys():
        print('- %s: %s' % (key, str(config[key])))
    print('-'*20)

    cudnn.benchmark = True

    df = pd.read_csv('inputs/train.csv')
    img_ids = df['ImageId'].values
    pose_df = pd.read_csv('processed/pose_train.csv')
    pose_df['img_path'] = 'processed/pose_images/train/' + pose_df['img_path']

    if config['resume']:
        checkpoint = torch.load('models/pose/%s/checkpoint.pth.tar' % config['name'])

    if config['rot'] == 'eular':
        num_outputs = 3
    elif config['rot'] == 'trig':
        num_outputs = 6
    elif config['rot'] == 'quat':
        num_outputs = 4
    else:
        raise NotImplementedError

    if config['loss'] == 'L1Loss':
        criterion = nn.L1Loss().cuda()
    elif config['loss'] == 'MSELoss':
        criterion = nn.MSELoss().cuda()
    else:
        raise NotImplementedError

    train_transform = Compose([
        transforms.ShiftScaleRotate(
            shift_limit=config['shift_limit'],
            scale_limit=0,
            rotate_limit=0,
            border_mode=cv2.BORDER_CONSTANT,
            value=0,
            p=config['shift_p']
        ) if config['shift'] else NoOp(),
        OneOf([
            transforms.HueSaturationValue(
                hue_shift_limit=config['hue_limit'],
                sat_shift_limit=config['sat_limit'],
                val_shift_limit=config['val_limit'],
                p=config['hsv_p']
            ) if config['hsv'] else NoOp(),
            transforms.RandomBrightness(
                limit=config['brightness_limit'],
                p=config['brightness_p'],
            ) if config['brightness'] else NoOp(),
            transforms.RandomContrast(
                limit=config['contrast_limit'],
                p=config['contrast_p'],
            ) if config['contrast'] else NoOp(),
        ], p=1),
        transforms.ISONoise(
            p=config['iso_noise_p'],
        ) if config['iso_noise'] else NoOp(),
        transforms.CLAHE(
            p=config['clahe_p'],
        ) if config['clahe'] else NoOp(),
        transforms.Resize(config['input_w'], config['input_h']),
        transforms.Normalize(),
        ToTensor(),
    ])

    val_transform = Compose([
        transforms.Resize(config['input_w'], config['input_h']),
        transforms.Normalize(),
        ToTensor(),
    ])

    folds = []
    best_losses = []

    kf = KFold(n_splits=config['n_splits'], shuffle=True, random_state=41)
    for fold, (train_idx, val_idx) in enumerate(kf.split(img_ids)):
        print('Fold [%d/%d]' %(fold + 1, config['n_splits']))

        if (config['resume'] and fold < checkpoint['fold'] - 1) or (not config['resume'] and os.path.exists('pose_models/%s/model_%d.pth' % (config['name'], fold+1))):
            log = pd.read_csv('models/pose/%s/log_%d.csv' %(config['name'], fold+1))
            best_loss = log.loc[log['val_loss'].values.argmin(), 'val_loss']
            # best_loss, best_score = log.loc[log['val_loss'].values.argmin(), ['val_loss', 'val_score']].values
            folds.append(str(fold + 1))
            best_losses.append(best_loss)
            # best_scores.append(best_score)
            continue

        train_img_ids, val_img_ids = img_ids[train_idx], img_ids[val_idx]

        train_img_paths = []
        train_labels = []
        for img_id in train_img_ids:
            tmp = pose_df.loc[pose_df.ImageId == img_id]

            img_path = tmp['img_path'].values
            train_img_paths.append(img_path)

            yaw = tmp['yaw'].values
            pitch = tmp['pitch'].values
            roll = tmp['roll'].values
            roll = rotate(roll, np.pi)

            if config['rot'] == 'eular':
                label = np.array([
                    yaw,
                    pitch,
                    roll
                ]).T
            elif config['rot'] == 'trig':
                label = np.array([
                    np.cos(yaw),
                    np.sin(yaw),
                    np.cos(pitch),
                    np.sin(pitch),
                    np.cos(roll),
                    np.sin(roll),
                ]).T
            elif config['rot'] == 'quat':
                raise NotImplementedError
            else:
                raise NotImplementedError

            train_labels.append(label)
        train_img_paths = np.hstack(train_img_paths)
        train_labels = np.vstack(train_labels)

        val_img_paths = []
        val_labels = []
        for img_id in val_img_ids:
            tmp = pose_df.loc[pose_df.ImageId == img_id]

            img_path = tmp['img_path'].values
            val_img_paths.append(img_path)

            yaw = tmp['yaw'].values
            pitch = tmp['pitch'].values
            roll = tmp['roll'].values
            roll = rotate(roll, np.pi)

            if config['rot'] == 'eular':
                label = np.array([
                    yaw,
                    pitch,
                    roll
                ]).T
            elif config['rot'] == 'trig':
                label = np.array([
                    np.cos(yaw),
                    np.sin(yaw),
                    np.cos(pitch),
                    np.sin(pitch),
                    np.cos(roll),
                    np.sin(roll),
                ]).T
            elif config['rot'] == 'quat':
                raise NotImplementedError
            else:
                raise NotImplementedError

            val_labels.append(label)
        val_img_paths = np.hstack(val_img_paths)
        val_labels = np.vstack(val_labels)

        # train
        train_set = PoseDataset(
            train_img_paths,
            train_labels,
            transform=train_transform,
        )
        train_loader = torch.utils.data.DataLoader(
            train_set,
            batch_size=config['batch_size'],
            shuffle=True,
            num_workers=config['num_workers'],
            # pin_memory=True,
        )

        val_set = PoseDataset(
            val_img_paths,
            val_labels,
            transform=val_transform,
        )
        val_loader = torch.utils.data.DataLoader(
            val_set,
            batch_size=config['batch_size'],
            shuffle=False,
            num_workers=config['num_workers'],
            # pin_memory=True,
        )

        # create model
        model = get_pose_model(config['arch'],
                          num_outputs=num_outputs,
                          freeze_bn=config['freeze_bn'])
        model = model.cuda()

        params = filter(lambda p: p.requires_grad, model.parameters())
        if config['optimizer'] == 'Adam':
            optimizer = optim.Adam(params, lr=config['lr'], weight_decay=config['weight_decay'])
        elif config['optimizer'] == 'AdamW':
            optimizer = optim.AdamW(params, lr=config['lr'], weight_decay=config['weight_decay'])
        elif config['optimizer'] == 'RAdam':
            optimizer = RAdam(params, lr=config['lr'], weight_decay=config['weight_decay'])
        elif config['optimizer'] == 'SGD':
            optimizer = optim.SGD(params, lr=config['lr'], momentum=config['momentum'],
                                  nesterov=config['nesterov'], weight_decay=config['weight_decay'])
        else:
            raise NotImplementedError

        if config['scheduler'] == 'CosineAnnealingLR':
            scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['epochs'], eta_min=config['min_lr'])
        elif config['scheduler'] == 'ReduceLROnPlateau':
            scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=config['factor'], patience=config['patience'],
                                                       verbose=1, min_lr=config['min_lr'])
        elif config['scheduler'] == 'MultiStepLR':
            scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[int(e) for e in config['milestones'].split(',')], gamma=config['gamma'])
        else:
            raise NotImplementedError

        log = {
            'epoch': [],
            'loss': [],
            # 'score': [],
            'val_loss': [],
            # 'val_score': [],
        }

        best_loss = float('inf')
        # best_score = float('inf')

        start_epoch = 0

        if config['resume'] and fold == checkpoint['fold'] - 1:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            scheduler.load_state_dict(checkpoint['scheduler'])
            start_epoch = checkpoint['epoch']
            log = pd.read_csv('models/pose/%s/log_%d.csv' % (config['name'], fold+1)).to_dict(orient='list')
            best_loss = checkpoint['best_loss']

        for epoch in range(start_epoch, config['epochs']):
            print('Epoch [%d/%d]' % (epoch + 1, config['epochs']))

            # train for one epoch
            train_loss = train(config, train_loader, model, criterion, optimizer, epoch)
            # evaluate on validation set
            val_loss = validate(config, val_loader, model, criterion)

            if config['scheduler'] == 'CosineAnnealingLR':
                scheduler.step()
            elif config['scheduler'] == 'ReduceLROnPlateau':
                scheduler.step(val_loss)

            print('loss %.4f - val_loss %.4f' % (train_loss, val_loss))
            # print('loss %.4f - score %.4f - val_loss %.4f - val_score %.4f'
            #       % (train_loss, train_score, val_loss, val_score))

            log['epoch'].append(epoch)
            log['loss'].append(train_loss)
            # log['score'].append(train_score)
            log['val_loss'].append(val_loss)
            # log['val_score'].append(val_score)

            pd.DataFrame(log).to_csv('models/pose/%s/log_%d.csv' % (config['name'], fold+1), index=False)

            if val_loss < best_loss:
                torch.save(model.state_dict(), 'models/pose/%s/model_%d.pth' % (config['name'], fold+1))
                best_loss = val_loss
                # best_score = val_score
                print("=> saved best model")

            state = {
                'fold': fold + 1,
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_loss': best_loss,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
            }
            torch.save(state, 'models/pose/%s/checkpoint.pth.tar' % config['name'])

        print('val_loss:  %f' % best_loss)
        # print('val_score: %f' % best_score)

        folds.append(str(fold + 1))
        best_losses.append(best_loss)
        # best_scores.append(best_score)

        results = pd.DataFrame({
            'fold': folds + ['mean'],
            'best_loss': best_losses + [np.mean(best_losses)],
            # 'best_score': best_scores + [np.mean(best_scores)],
        })

        print(results)
        results.to_csv('models/pose/%s/results.csv' % config['name'], index=False)

        del model
        torch.cuda.empty_cache()

        del train_set, train_loader
        del val_set, val_loader
        gc.collect()

        if not config['cv']:
            break