Exemple #1
0
def generate_model_submission(snapshot_name: str, config_name: str, postprocess=morphology_postprocess, mine_on_val=True, export_png=False):
    print('Generating model submission for session', snapshot_name)

    snapshot_basename = os.path.splitext(os.path.basename(snapshot_name))[0]
    config_file = auto_file(config_name)
    save_file = auto_file(snapshot_name)
    working_dir = os.path.dirname(config_file)

    # OOF
    # stratify = config['stratify']
    # fold = config['fold']
    #
    # train_ids = D.all_train_ids()
    # train_indexes, test_indexes = D.get_train_test_split_for_fold(stratify, fold, train_ids)
    # train_predictions = np.load(train_predictions)

    # Predictions for train dataset
    train_predictions, train_dataset = predict_masks_auto(config_file, save_file, test_or_train='train')
    train_predictions_file = os.path.join(working_dir, f'{snapshot_basename}_train_predictions.npz')
    np.savez_compressed(train_predictions_file, **dict((image_id, image) for image_id, image in zip(train_dataset.ids, train_predictions)))

    # Predictions for test dataset
    test_predictions, test_dataset = predict_masks_auto(config_file, save_file, test_or_train='test')
    test_predictions_file = os.path.join(working_dir, f'{snapshot_basename}_test_predictions.npz')
    np.savez_compressed(test_predictions_file, **dict((image_id, image) for image_id, image in zip(test_dataset.ids, test_predictions)))

    # Save prediction as unit8 masks
    if export_png:
        convert_predictions_to_images(train_predictions_file, os.path.join(working_dir, 'train_predictions'))
        convert_predictions_to_images(test_predictions_file, os.path.join(working_dir, 'test_predictions'))

    # Threshold mining
    if mine_on_val:
        config = json.load(open(config_file))
        valid_ids = np.array(config['valid_set'])
        valid_mask = D.get_selection_mask(train_dataset.ids, valid_ids)
        true_masks = D.read_train_masks(valid_ids)
        threshold, lb_score = threshold_mining(train_predictions[valid_mask], true_masks, min_threshold=0.15, max_threshold=0.85, step=0.005)
    else:
        true_masks = D.read_train_masks(train_dataset.ids)
        threshold, lb_score = threshold_mining(train_predictions, true_masks, min_threshold=0.15, max_threshold=0.85, step=0.005)

    i = np.argmax(lb_score)
    threshold, lb_score = float(threshold[i]), float(lb_score[i])

    suffix = '_mine_on_val' if mine_on_val else ''
    submit_file = os.path.join(working_dir, '{}_LB{:.4f}_TH{:.4f}{}.csv.gz'.format(snapshot_basename, lb_score, threshold, suffix))

    test_predictions = test_predictions > threshold

    if postprocess is not None:
        final_masks = []
        for image, mask in zip(D.read_test_images(test_dataset.ids), test_predictions):
            mask = postprocess(image, mask)
            final_masks.append(mask)
        test_predictions = np.array(final_masks)

    create_submission(test_dataset.ids, test_predictions).to_csv(submit_file, compression='gzip', index=False)
    print('Saved submission to ', working_dir)
Exemple #2
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--snapshot', required=True, default=None, type=str, help='')
    parser.add_argument('-c', '--config', required=True, default=None, type=str, help='')
    parser.add_argument('-mov', '--mine-on-val', action='store_true')

    args = parser.parse_args()

    snapshot_file = auto_file(args.snapshot)
    config_file = auto_file(args.config)

    generate_model_submission(snapshot_file, config_file, mine_on_val=args.mine_on_val)
def test_rle_encode_decode():
    train_pred = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_train_predictions.npz'
    )
    train_pred = np.load(train_pred)

    train_ids = D.all_train_ids()
    true_masks = D.read_train_masks(train_ids)
    pred_masks = np.array([train_pred[id] for id in train_ids])
    pred_masks = (pred_masks > 0.45).astype(np.uint8)

    submit = create_submission(train_ids, pred_masks)
    submit.to_csv('test_rle_encode_decode.csv.gz',
                  compression='gzip',
                  index=False)

    decoded_ids, decoded_masks = decode_submission(
        'test_rle_encode_decode.csv.gz')
    decoded_masks = dict(zip(decoded_ids, decoded_masks))
    assert set(decoded_ids) == set(train_ids)

    decoded_masks = np.array([decoded_masks[id] for id in train_ids])

    p1, r1, _ = do_kaggle_metric(pred_masks, true_masks)
    p2, r2, _ = do_kaggle_metric(decoded_masks, true_masks)

    assert np.array_equal(p1, p2)
    assert np.array_equal(r1, r2)
    print(np.mean(p1), np.mean(p2))
Exemple #4
0
def extract_oof_predictions(model) -> dict:
    test_predictions = auto_file(f'{model}_test_predictions.npz')
    train_predictions = auto_file(f'{model}_train_predictions.npz')

    experiment_dir = os.path.dirname(test_predictions)

    json_config = [fname for fname in sorted(os.listdir(experiment_dir)) if os.path.splitext(fname)[1] == '.json']
    json_config = auto_file(json_config[0])

    config = json.load(open(json_config))
    stratify = config['stratify']
    fold = config['fold']

    train_ids = D.all_train_ids()
    train_indexes, test_indexes = D.get_train_test_split_for_fold(stratify, fold, train_ids)
    train_predictions = np.load(train_predictions)

    valid_ids = train_ids[test_indexes]
    valid_predictions = np.array([train_predictions[id] for id in valid_ids])
    oof_predictions = dict(zip(valid_ids, valid_predictions))

    np.savez_compressed(os.path.join(experiment_dir, f'{model}_oof_predictions.npz'), **oof_predictions)
    return oof_predictions
Exemple #5
0
def make_cv_submit(inputs, prefix, output_dir='submits'):
    os.makedirs(output_dir, exist_ok=True)

    test_predictions = [auto_file(f'{model}_test_predictions.npz') for model in inputs]
    oof_predictions = [auto_file(f'{model}_oof_predictions.npz') for model in inputs]

    train_ids = D.all_train_ids()
    true_masks = D.read_train_masks(train_ids)
    test_ids = D.all_test_ids()

    pred_masks = merge_oof(oof_predictions, train_ids)
    threshold, lb_score = threshold_mining(pred_masks, true_masks, min_threshold=0.1, max_threshold=0.9, step=0.001)

    i = np.argmax(lb_score)
    threshold, lb_score = float(threshold[i]), float(lb_score[i])
    print('Threshold', threshold, 'CV score', lb_score)

    # Arithmetic
    ensembled_test_pred = ensemble(test_predictions, test_ids, averaging=ArithmeticMean)
    ensembled_test_pred = ensembled_test_pred > threshold

    submit_file = f'{prefix}_a_mean_CV_{lb_score:.4f}_TH{threshold:.4f}.csv.gz'
    create_submission(test_ids, ensembled_test_pred).to_csv(os.path.join(output_dir, submit_file), compression='gzip', index=False)
    print('Saved submission', submit_file)

    postprocess = morphology_postprocess
    if postprocess is not None:
        final_masks = []
        for image, mask in zip(D.read_test_images(test_ids), ensembled_test_pred):
            mask = postprocess(image, mask)
            final_masks.append(mask)
        test_predictions = np.array(final_masks)

        submit_file = f'{prefix}_a_mean_PPC_CV_{lb_score:.4f}_TH{threshold:.4f}.csv.gz'
        create_submission(test_ids, test_predictions).to_csv(os.path.join(output_dir, submit_file), compression='gzip', index=False)
        print('Saved submission', submit_file)
Exemple #6
0
def decode_submission(submission):
    if isinstance(submission, str):
        submission = pd.read_csv(auto_file(submission))

    submission = submission.sort_values('id')
    images = []
    ids = []

    for index, row in submission.iterrows():
        rle_mask = row['rle_mask']


        images.append(rle_decode(rle_mask))
        ids.append(row['id'])

    return ids, images
def average_predictions(predictions, dst_file=None):
    predictions = [auto_file(p) for p in predictions]
    dir = os.path.dirname(predictions[0])

    scale = float(1. / len(predictions))

    avg = {}

    pred = np.load(predictions[0])
    ids = pred.keys()
    for id in ids:
        avg[id] = pred[id] * scale

    for pred_file in predictions[1:]:
        pred = np.load(pred_file)

        for id in ids:
            avg[id] += pred[id] * scale

    if dst_file is not None:
        np.savez_compressed(os.path.join(dir, dst_file), **avg)
        print(f'Saved {dst_file}')
    return avg
Exemple #8
0
def main():
    parser = U.get_argparser()
    args = parser.parse_args()
    U.set_manual_seed(args.seed)

    train_session_args = vars(args)
    train_session = U.get_random_name()
    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model}_{args.prepare}_{args.augmentation}_{train_session}'
    if args.fold is not None:
        prefix += f'_fold_{args.stratify}_{args.fold}'

    log_dir = os.path.join('runs', prefix)
    exp_dir = os.path.join('experiments', args.model, args.prepare,
                           args.augmentation, prefix)
    os.makedirs(exp_dir, exist_ok=True)

    train_ids = D.all_train_ids()
    depths = D.read_depths(train_ids)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)

    if args.fix_masks:
        masks, changed_ids = D.fix_masks(masks, train_ids)
        with open(os.path.join(exp_dir, 'fixed_masks.txt'), 'w') as f:
            for sample_id in changed_ids:
                f.write(sample_id)
                f.write('\n')
        print(f'Fixed {len(changed_ids)} masks')

    if args.fold is not None:
        train_indexes, test_indexes = D.get_train_test_split_for_fold(
            args.stratify, args.fold, train_ids)
    else:
        train_indexes, test_indexes = train_test_split(
            np.arange(len(train_ids)),
            shuffle=False,
            random_state=args.split_seed,
            test_size=0.2)

    ids_train, ids_test = train_ids[train_indexes], train_ids[test_indexes]
    img_train, img_test = images[train_indexes], images[test_indexes]
    mask_train, mask_test = masks[train_indexes], masks[test_indexes]
    depth_train, depth_test = depths[train_indexes], depths[test_indexes]

    # Here we can exclude some images from training, but keep in validation
    train_mask = D.drop_some(img_train,
                             mask_train,
                             drop_black=True,
                             drop_vstrips=args.drop_vstrips,
                             drop_few=args.drop_few)
    ids_train = ids_train[train_mask]
    img_train = img_train[train_mask]
    mask_train = mask_train[train_mask]
    depth_train = depth_train[train_mask]

    if not is_sorted(ids_train):
        raise RuntimeError("ids_train is not sorted")
    if not is_sorted(ids_test):
        raise RuntimeError("ids_test_sorted is not sorted")

    prepare_fn = D.get_prepare_fn(args.prepare, **train_session_args)

    # This line valid if we apply prepare_fn first and then do augmentation
    target_size = prepare_fn.target_size if prepare_fn is not None else D.ORIGINAL_SIZE
    # target_size = D.ORIGINAL_SIZE

    build_augmentation_fn = D.AUGMENTATION_MODES[args.augmentation]
    aug = build_augmentation_fn(target_size, border_mode=args.border_mode)

    train_transform_list = []
    valid_transform_list = []
    if prepare_fn is not None:
        train_transform_list.append(prepare_fn.t_forward)
        valid_transform_list.append(prepare_fn.t_forward)

    train_transform_list.append(aug)

    trainset = D.ImageAndMaskDataset(ids_train,
                                     img_train,
                                     mask_train,
                                     depth_train,
                                     augment=A.Compose(train_transform_list))

    validset = D.ImageAndMaskDataset(ids_test,
                                     img_test,
                                     mask_test,
                                     depth_test,
                                     augment=A.Compose(valid_transform_list))

    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True,
                             shuffle=True)

    validloader = DataLoader(validset,
                             batch_size=args.batch_size,
                             pin_memory=True,
                             drop_last=False,
                             shuffle=False)

    # Save train/val split for future use
    train_session_args.update({
        'train_set': list(ids_train),
        'valid_set': list(ids_test)
    })

    # Declare variables we will use during training
    start_epoch = 0
    train_history = pd.DataFrame()

    target_metric = args.target_metric
    target_metric_mode = 'max'
    best_metric_val = 0
    best_lb_checkpoint = os.path.join(exp_dir, f'{prefix}_{target_metric}.pth')

    model = U.get_model(args.model,
                        num_classes=args.num_classes,
                        num_channels=trainset.channels(),
                        abn=args.abn,
                        use_dropout=not args.no_dropout,
                        pretrained=not args.no_pretrain).cuda()

    print('Train set size :', len(ids_train), 'batch size',
          trainloader.batch_size)
    print('Valid set size :', len(ids_test), 'batch size',
          validloader.batch_size)
    print('Tile transform :', prepare_fn if prepare_fn is not None else "None")
    print('Model          :', args.model, count_parameters(model))
    print('Augmentations  :', args.augmentation, args.border_mode)
    print('Input channels :', trainset.channels())
    print('Output classes :', args.num_classes)
    print('Optimizer      :', args.optimizer, 'wd', args.weight_decay)
    print('Use of dropout :', not args.no_dropout)
    print('Train session  :', train_session)
    print('Freeze encoder :', args.freeze_encoder)
    print('Seed           :', args.seed, args.split_seed)
    print('Restart every  :', args.restart_every)
    print('Fold           :', args.fold, args.stratify)
    print('Fine-tune      :', args.fine_tune)
    print('ABN Mode       :', args.abn)
    print('Fix masks      :', args.fix_masks)

    if args.resume:
        fname = U.auto_file(args.resume)
        start_epoch, train_history, best_score = U.restore_checkpoint(
            fname, model)
        print(train_history)
        print('Resuming training from epoch', start_epoch, ' and score',
              best_score, args.resume)

    if args.fine_tune and args.freeze_encoder > 0:
        raise ValueError(
            'Incompatible options --fune-tune and --freeze-encoder')

    writer = SummaryWriter(log_dir)
    writer.add_text('train/params',
                    '```' + json.dumps(train_session_args, indent=2) + '```',
                    0)

    config_fname = os.path.join(exp_dir, f'{train_session}.json')
    with open(config_fname, 'w') as f:
        f.write(json.dumps(train_session_args, indent=2))

    weights = {
        'mask': 1.0,
        'class': 0.05,
        'dsv': 0.1,
    }

    bce = U.get_loss('bce')
    bce_lovasz = U.get_loss('bce_lovasz')
    bce_jaccard = U.get_loss('bce_jaccard')

    losses = {
        'warmup': {
            'mask': bce,
            'class': bce,
            'dsv': bce,
        },
        'main': {
            'mask': bce_jaccard,
            'class': bce,
            'dsv': bce,
        },
        'annealing': {
            'mask': bce_lovasz,
            'class': bce,
            'dsv': bce,
        }
    }

    epochs = {'warmup': 50, 'main': 250, 'annealing': 50}

    if args.fast:
        for key in epochs.keys():
            epochs[key] = 1

    learning_rates = {
        'warmup': args.learning_rate,
        'main': 1e-3,
        'annealing': 1e-2
    }

    # Warmup phase
    if epochs['warmup']:
        print(torch.cuda.max_memory_allocated(),
              torch.cuda.max_memory_cached())
        trainable_parameters = filter(lambda p: p.requires_grad,
                                      model.parameters())
        optimizer = U.get_optimizer(args.optimizer,
                                    trainable_parameters,
                                    learning_rates['warmup'],
                                    weight_decay=args.weight_decay)
        scheduler = None  # StepLR(optimizer, gamma=0.5, step_size=50)

        train_history, best_metric_val, start_epoch = train(
            model,
            losses['warmup'],
            weights,
            optimizer,
            scheduler,
            trainloader,
            validloader,
            writer,
            start_epoch,
            epochs=epochs['warmup'],
            early_stopping=args.early_stopping,
            train_history=train_history,
            experiment_dir=exp_dir,
            target_metric=target_metric,
            best_metric_val=best_metric_val,
            target_metric_mode=target_metric_mode,
            checkpoint_filename=best_lb_checkpoint)
        U.save_checkpoint(os.path.join(exp_dir, f'{prefix}_warmup.pth'),
                          model,
                          start_epoch,
                          train_history,
                          metric_name=target_metric,
                          metric_score=best_metric_val)

        del trainable_parameters, optimizer, scheduler
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

        print('Finished warmup phase. Main train loop.')

    # Main training phase
    print(torch.cuda.max_memory_allocated(), torch.cuda.max_memory_cached())
    trainable_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
    optimizer = U.get_optimizer(args.optimizer,
                                trainable_parameters,
                                learning_rates['main'],
                                weight_decay=args.weight_decay)
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='max',
                                  patience=50,
                                  factor=0.5,
                                  min_lr=1e-5)

    train_history, best_metric_val, start_epoch = train(
        model,
        losses['main'],
        weights,
        optimizer,
        scheduler,
        trainloader,
        validloader,
        writer,
        start_epoch,
        epochs=epochs['main'],
        early_stopping=args.early_stopping,
        train_history=train_history,
        experiment_dir=exp_dir,
        target_metric=target_metric,
        best_metric_val=best_metric_val,
        target_metric_mode=target_metric_mode,
        checkpoint_filename=best_lb_checkpoint)
    del trainable_parameters, optimizer, scheduler
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    snapshots = [best_lb_checkpoint]

    U.save_checkpoint(os.path.join(exp_dir, f'{prefix}_main.pth'),
                      model,
                      start_epoch,
                      train_history,
                      metric_name=target_metric,
                      metric_score=best_metric_val)

    print('Finished train phase.')

    # Cosine annealing
    if epochs['annealing']:

        for snapshot in range(5):
            print(f'Starting annealing phase {snapshot}')
            print(torch.cuda.max_memory_allocated(),
                  torch.cuda.max_memory_cached())
            # model.set_fine_tune(True)
            trainable_parameters = filter(lambda p: p.requires_grad,
                                          model.parameters())
            optimizer = U.get_optimizer('sgd',
                                        trainable_parameters,
                                        learning_rates['annealing'],
                                        weight_decay=args.weight_decay)
            scheduler = CosineAnnealingLR(optimizer,
                                          epochs['annealing'],
                                          eta_min=1e-7)

            snapshot_name = os.path.join(
                exp_dir, f'{prefix}_{target_metric}_snapshot_{snapshot}.pth')
            snapshots.append(snapshot_name)
            train_history, best_metric_val, start_epoch = train(
                model,
                losses['annealing'],
                weights,
                optimizer,
                scheduler,
                trainloader,
                validloader,
                writer,
                start_epoch,
                epochs=epochs['annealing'],
                early_stopping=args.early_stopping,
                train_history=train_history,
                experiment_dir=exp_dir,
                target_metric=target_metric,
                best_metric_val=0,
                target_metric_mode=target_metric_mode,
                checkpoint_filename=snapshot_name)
            del trainable_parameters, optimizer, scheduler
            torch.cuda.empty_cache()
            torch.cuda.synchronize()

    print('Training finished')
    train_history.to_csv(os.path.join(exp_dir, 'train_history.csv'),
                         index=False)

    for snapshot_file in snapshots:
        generate_model_submission(snapshot_file,
                                  config_fname,
                                  mine_on_val=True)
Exemple #9
0
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

from lib.train_utils import find_optimal_lr, auto_file
import torch_train as TT

if __name__ == '__main__':
    dd = 'e:/datasets/inria/train'

    model = TT.get_model('linknet34', patch_size=512, num_channels=3).cuda()
    loss = TT.get_loss('bce').cuda()
    optimizer = TT.get_optimizer('sgd', model.parameters(), 1e-4)
    trainset, validset, num_classes = TT.get_dataset('inria', dd, grayscale=False, patch_size=512)

    TT.restore_snapshot(model, None, auto_file('linknet34_checkpoint.pth'))
    trainloader = DataLoader(trainset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True, drop_last=True)

    lr, loss = find_optimal_lr(model, loss, optimizer, trainloader)

    loss = np.convolve(loss, np.ones((4,)) / 4, mode='same')

    fig, ax = plt.subplots(figsize=(16, 12))
    ax.plot(lr, loss)
    ax.set(xlabel='lr', ylabel='loss', title='LR')
    ax.set_xscale("log", nonposx='clip')
    ax.grid()
    fig.show()

    plt.savefig('loss_plot.png')
    print(lr, loss)
Exemple #10
0
def main():
    parser = U.get_argparser()
    args = parser.parse_args()
    U.set_manual_seed(args.seed)

    train_session_args = vars(args)
    train_session = U.get_random_name()
    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model}_{args.prepare}_{args.augmentation}_{train_session}'
    if args.fold is not None:
        prefix += f'_fold_{args.stratify}_{args.fold}'

    log_dir = os.path.join('runs', prefix)
    exp_dir = os.path.join('experiments', args.model, args.prepare,
                           args.augmentation, prefix)
    os.makedirs(exp_dir, exist_ok=True)

    train_ids = D.get_train_ids(drop_black=True,
                                drop_vstrips=args.drop_vstrips,
                                drop_empty=args.drop_empty,
                                drop_few=args.drop_few,
                                fast=args.fast)
    depths = D.read_depths(train_ids)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)

    if args.fix_masks:
        masks, changed_ids = D.fix_masks(masks, train_ids)
        with open(os.path.join(exp_dir, 'fixed_masks.txt'), 'w') as f:
            for sample_id in changed_ids:
                f.write(sample_id)
                f.write('\n')
        print(f'Fixed {len(changed_ids)} masks')

    if args.fold is not None:
        train_indexes, test_indexes = D.get_train_test_split_for_fold(
            args.stratify, args.fold, train_ids)
    else:
        train_indexes, test_indexes = train_test_split(
            np.arange(len(train_ids)),
            shuffle=False,
            random_state=args.split_seed,
            test_size=0.2)

    ids_train, ids_test = train_ids[train_indexes], train_ids[test_indexes]
    if not is_sorted(ids_train):
        raise RuntimeError("ids_train is not sorted")
    if not is_sorted(ids_test):
        raise RuntimeError("ids_test_sorted is not sorted")

    img_train, img_test = images[train_indexes], images[test_indexes]
    mask_train, mask_test = masks[train_indexes], masks[test_indexes]
    depth_train, depth_test = depths[train_indexes], depths[test_indexes]

    prepare_fn = D.get_prepare_fn(args.prepare, **train_session_args)

    # This line valid if we apply prepare_fn first and then do augmentation
    target_size = prepare_fn.target_size if prepare_fn is not None else D.ORIGINAL_SIZE
    # target_size = D.ORIGINAL_SIZE

    build_augmentation_fn = D.AUGMENTATION_MODES[args.augmentation]
    aug = build_augmentation_fn(target_size, border_mode=args.border_mode)

    train_transform_list = []
    valid_transform_list = []
    if prepare_fn is not None:
        train_transform_list.append(prepare_fn.t_forward)
        valid_transform_list.append(prepare_fn.t_forward)

    train_transform_list.append(aug)

    trainset = D.ImageAndMaskDataset(ids_train,
                                     img_train,
                                     mask_train,
                                     depth_train,
                                     augment=A.Compose(train_transform_list))

    validset = D.ImageAndMaskDataset(ids_test,
                                     img_test,
                                     mask_test,
                                     depth_test,
                                     augment=A.Compose(valid_transform_list))

    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True,
                             shuffle=True)

    validloader = DataLoader(validset,
                             batch_size=args.batch_size,
                             pin_memory=True,
                             drop_last=False,
                             shuffle=False)

    # Save train/val split for future use
    train_session_args.update({
        'train_set': list(ids_train),
        'valid_set': list(ids_test)
    })

    # Declare variables we will use during training
    start_epoch = 0
    train_history = pd.DataFrame()
    scheduler = None
    optimizer = None

    target_metric = args.target_metric
    target_metric_mode = 'max'
    best_metric_val = 0
    best_lb_checkpoint = os.path.join(exp_dir, f'{prefix}_{target_metric}.pth')

    model = U.get_model(args.model,
                        num_classes=args.num_classes,
                        num_channels=trainset.channels(),
                        abn=args.abn,
                        use_dropout=not args.no_dropout,
                        pretrained=not args.no_pretrain).cuda()

    print('Train set size :', len(trainloader), 'batch size',
          trainloader.batch_size)
    print('Valid set size :', len(validloader), 'batch size',
          validloader.batch_size)
    print('Tile transform :', prepare_fn if prepare_fn is not None else "None")
    print('Model          :', args.model, count_parameters(model))
    print('Augmentations  :', args.augmentation, args.border_mode)
    print('Input channels :', trainset.channels())
    print('Output classes :', args.num_classes)
    print('Criterion      :', args.loss),
    print('Optimizer      :', args.optimizer, args.learning_rate,
          args.weight_decay)
    print('Use of dropout :', not args.no_dropout)
    print('Train session  :', train_session)
    print('Freeze encoder :', args.freeze_encoder)
    print('Seed           :', args.seed, args.split_seed)
    print('Restart every  :', args.restart_every)
    print('Fold           :', args.fold, args.stratify)
    print('Fine-tune      :', args.fine_tune)
    print('ABN Mode       :', args.abn)
    print('Fix masks      :', args.fix_masks)

    if args.resume:
        fname = U.auto_file(args.resume)
        start_epoch, train_history, best_score = U.restore_checkpoint(
            fname, model)
        print(train_history)
        print('Resuming training from epoch', start_epoch, ' and score',
              best_score, args.resume)

    segmentation_loss = U.get_loss(args.loss)

    if args.fine_tune and args.freeze_encoder > 0:
        raise ValueError(
            'Incompatible options --fune-tune and --freeze-encoder')

    writer = SummaryWriter(log_dir)
    writer.add_text('train/params',
                    '```' + json.dumps(train_session_args, indent=2) + '```',
                    0)

    config_fname = os.path.join(exp_dir, f'{train_session}.json')
    with open(config_fname, 'w') as f:
        f.write(json.dumps(train_session_args, indent=2))

    # Start training loop
    no_improvement_epochs = 0

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # On Epoch begin
        if U.should_quit(exp_dir) or (
                args.early_stopping is not None
                and no_improvement_epochs > args.early_stopping):
            break

        epochs_trained = epoch - start_epoch
        should_restart_optimizer = (
            args.restart_every > 0 and epochs_trained % args.restart_every
            == 0) or (epochs_trained
                      == args.freeze_encoder) or optimizer is None

        if should_restart_optimizer:
            del optimizer
            if args.fine_tune:
                model.set_fine_tune(args.fine_tune)
            else:
                model.set_encoder_training_enabled(
                    epochs_trained >= args.freeze_encoder)

            trainable_parameters = filter(lambda p: p.requires_grad,
                                          model.parameters())
            optimizer = U.get_optimizer(args.optimizer,
                                        trainable_parameters,
                                        args.learning_rate,
                                        weight_decay=args.weight_decay)

            print('Restarting optimizer state', epoch, count_parameters(model))

            if args.lr_scheduler:
                scheduler = U.get_lr_scheduler(args.lr_scheduler, optimizer,
                                               args.epochs)

        if scheduler is not None and not isinstance(scheduler,
                                                    ReduceLROnPlateau):
            scheduler.step(epochs_trained)

        U.log_learning_rate(writer, optimizer, epoch)

        # Epoch
        train_metrics = process_epoch(model,
                                      segmentation_loss,
                                      optimizer,
                                      trainloader,
                                      epoch,
                                      True,
                                      writer,
                                      mask_postprocess=prepare_fn.backward)
        valid_metrics = process_epoch(model,
                                      segmentation_loss,
                                      None,
                                      validloader,
                                      epoch,
                                      False,
                                      writer,
                                      mask_postprocess=prepare_fn.backward)

        all_metrics = {}
        all_metrics.update(train_metrics)
        all_metrics.update(valid_metrics)

        # On Epoch End
        summary = {
            'epoch': [int(epoch)],
            'lr': [float(optimizer.param_groups[0]['lr'])]
        }
        for k, v in all_metrics.items():
            summary[k] = [v]

        train_history = train_history.append(pd.DataFrame.from_dict(summary),
                                             ignore_index=True)
        print(epoch, summary)

        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(all_metrics[target_metric], epochs_trained)

        if U.is_better(all_metrics[target_metric], best_metric_val,
                       target_metric_mode):
            best_metric_val = all_metrics[target_metric]
            U.save_checkpoint(best_lb_checkpoint,
                              model,
                              epoch,
                              train_history,
                              metric_name=target_metric,
                              metric_score=best_metric_val)
            print('Checkpoint saved', epoch, best_metric_val,
                  best_lb_checkpoint)
            no_improvement_epochs = 0
        else:
            no_improvement_epochs += 1

    print('Training finished')

    generate_model_submission(best_lb_checkpoint,
                              config_fname,
                              mine_on_val=True)
Exemple #11
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-g',
                        '--grayscale',
                        action='store_true',
                        help='Whether to use grayscale image instead of RGB')
    parser.add_argument('-m',
                        '--model',
                        required=True,
                        type=str,
                        help='Name of the model')
    parser.add_argument('-p', '--patch-size', type=int, default=224)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=1,
                        help='Batch Size during training, e.g. -b 64')
    parser.add_argument('-lr',
                        '--learning-rate',
                        type=float,
                        default=1e-3,
                        help='Initial learning rate')
    parser.add_argument('-l',
                        '--loss',
                        type=str,
                        default='bce',
                        help='Target loss')
    parser.add_argument('-o',
                        '--optimizer',
                        default='SGD',
                        help='Name of the optimizer')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=100,
                        help='Epoch to run')
    parser.add_argument('-d',
                        '--dataset',
                        type=str,
                        help='Name of the dataset to use for training.')
    parser.add_argument('-dd',
                        '--data-dir',
                        type=str,
                        default='data',
                        help='Root directory where datasets are located.')
    parser.add_argument('-s',
                        '--steps',
                        type=int,
                        default=128,
                        help='Steps per epoch')
    parser.add_argument('-x',
                        '--experiment',
                        type=str,
                        help='Name of the experiment')
    parser.add_argument('-w',
                        '--workers',
                        default=0,
                        type=int,
                        help='Num workers')
    parser.add_argument('-r', '--resume', action='store_true')
    parser.add_argument('-mem', '--memory', action='store_true')

    args = parser.parse_args()
    cudnn.benchmark = True

    if args.experiment is None:
        args.experiment = 'torch_%s_%s_afterburn_%d_%s_%s' % (
            args.dataset, args.model, args.patch_size,
            'gray' if args.grayscale else 'rgb', args.loss)

    experiment_dir = os.path.join('experiments', args.dataset, args.loss,
                                  args.experiment)
    os.makedirs(experiment_dir, exist_ok=True)

    writer = SummaryWriter(comment=args.experiment)

    with open(os.path.join(experiment_dir, 'arguments.txt'), 'w') as f:
        f.write(' '.join(sys.argv[1:]))

    trainset, validset, num_classes = TT.get_dataset(
        args.dataset,
        args.data_dir,
        grayscale=args.grayscale,
        patch_size=args.patch_size,
        keep_in_mem=args.memory)
    print('Train set size', len(trainset))
    print('Valid set size', len(validset))

    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True)
    validloader = DataLoader(validset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True)

    head_model = TT.get_model(args.model,
                              patch_size=args.patch_size,
                              num_channels=1 if args.grayscale else 3).cuda()
    TT.restore_snapshot(head_model, None,
                        auto_file('linknet34_checkpoint.pth'))

    # Freeze model training
    for param in head_model.parameters():
        param.requires_grad = False

    afterburner = Afterburner()
    model = nn.Sequential(head_model, nn.Sigmoid(), afterburner).cuda()
    optimizer = TT.get_optimizer(args.optimizer, afterburner.parameters(),
                                 args.learning_rate)

    loss = TT.get_loss(args.loss).cuda()
    metrics = {
        'iou': JaccardScore().cuda(),
        'accuracy': PixelAccuracy().cuda()
    }

    start_epoch = 0
    best_loss = np.inf
    train_history = pd.DataFrame()

    checkpoint_filename = os.path.join(experiment_dir,
                                       f'{args.model}_checkpoint.pth')
    if args.resume:
        start_epoch, train_history, best_loss = restore_snapshot(
            model, optimizer, checkpoint_filename)
        print('Resuming training from epoch', start_epoch, ' and loss',
              best_loss)
        print(train_history)

    print('Head       :', count_parameters(head_model))
    print('Afterburner:', count_parameters(afterburner))

    for epoch in range(start_epoch, args.epochs):
        train_loss, train_scores = train(model,
                                         loss,
                                         optimizer,
                                         trainloader,
                                         epoch,
                                         metrics,
                                         summary_writer=writer)
        valid_loss, valid_scores = validate(model,
                                            loss,
                                            validloader,
                                            epoch,
                                            metrics,
                                            summary_writer=writer)

        summary = {
            'epoch': [epoch],
            'loss': [train_loss.avg],
            'val_loss': [valid_loss.avg]
        }

        for key, value in train_scores.items():
            summary[key] = [value.avg]

        for key, value in valid_scores.items():
            summary['val_' + key] = [value.avg]

        train_history = train_history.append(pd.DataFrame.from_dict(summary),
                                             ignore_index=True)

        print(epoch, summary)

        if valid_loss.avg < best_loss:
            save_snapshot(model, optimizer, valid_loss.avg, epoch,
                          train_history, checkpoint_filename)
            best_loss = valid_loss.avg
            print('Checkpoint saved', epoch, best_loss)

    print('Training is finished...')

    train_history.to_csv(os.path.join(experiment_dir,
                                      args.experiment + '.csv'),
                         index=False,
                         mode='a' if args.resume else 'w',
                         header=not args.resume)
Exemple #12
0
def main():
    cudnn.benchmark = True

    parser = argparse.ArgumentParser()

    parser.add_argument('-g',
                        '--grayscale',
                        action='store_true',
                        help='Whether to use grayscale image instead of RGB')
    parser.add_argument('-m',
                        '--model',
                        required=True,
                        type=str,
                        help='Name of the model')
    parser.add_argument('-c',
                        '--checkpoint',
                        required=True,
                        type=str,
                        help='Name of the model checkpoint')
    parser.add_argument('-p', '--patch-size', type=int, default=224)
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=1,
                        help='Batch Size during training, e.g. -b 64')
    parser.add_argument('-dd',
                        '--data-dir',
                        type=str,
                        default='data',
                        help='Root directory where datasets are located.')
    parser.add_argument('-x',
                        '--experiment',
                        type=str,
                        help='Name of the experiment')
    parser.add_argument('-f', '--full', action='store_true')

    args = parser.parse_args()

    if args.experiment is None:
        args.experiment = 'inria_%s_%d_%s' % (
            args.model, args.patch_size, 'gray' if args.grayscale else 'rgb')

    experiment_dir = os.path.join('submits', args.experiment)
    os.makedirs(experiment_dir, exist_ok=True)

    model = TT.get_model(args.model,
                         patch_size=args.patch_size,
                         num_channels=1 if args.grayscale else 3).cuda()
    start_epoch, train_history, best_loss = TT.restore_snapshot(
        model, None, auto_file(args.checkpoint))
    print('Using weights from epoch', start_epoch - 1, best_loss)

    test_transform = aug.Sequential([
        aug.ImageOnly(aug.NormalizeImage(mean=INRIA_MEAN, std=INRIA_STD)),
    ])

    x = sorted(find_in_dir(os.path.join(args.data_dir, 'images')))
    # x = x[:10]

    model.eval()
    with torch.no_grad():

        for test_fname in tqdm(x, total=len(x)):
            image = read_rgb(test_fname)
            basename = os.path.splitext(os.path.basename(test_fname))[0]

            if args.full:
                mask = predict_full(image, model, test_transform)
            else:
                mask = predict_tiled(image, model, test_transform,
                                     args.patch_size, args.batch_size)

            mask = ((mask > 0.5) * 255).astype(np.uint8)
            cv2.imwrite(os.path.join(experiment_dir, basename + '.tif'), mask)
def test_prediction_pipeline_tta_pre():
    from lib import tta

    device = 'cuda'
    config = auto_file('infallible_lamport.json')
    snapshot = auto_file(
        'Oct09_23_17_wider_unet_224pad_medium_infallible_lamport_val_lb.pth')

    config = json.load(open(config))
    snapshot = torch.load(snapshot)

    prepare_fn = D.get_prepare_fn(config['prepare'], **config)
    dataset = get_test_dataset(dataset=config['dataset'],
                               prepare=prepare_fn,
                               test_or_train='train')

    model = get_model(config['model'],
                      num_classes=config['num_classes'],
                      num_channels=dataset.channels(),
                      pretrained=False).to(device)

    if device == 'cpu':
        warnings.warn('Using CPU for prediction. It will be SLOW.')

    model.load_state_dict(snapshot['model'])
    model.eval()

    batch_size = config['batch_size']
    collate_fn = tta.tta_fliplr_collate
    batch_size = max(1, batch_size // 2)

    pred_masks = []
    with torch.no_grad():
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            pin_memory=True,
                            collate_fn=collate_fn)
        for images, image_ids in tqdm(loader,
                                      total=len(loader),
                                      desc=f'Predicting'):
            images = images.to(device, non_blocking=True)

            output = model(images)
            is_raw_mask = isinstance(output, torch.Tensor)
            is_mask_and_class = isinstance(output, tuple) and len(output) == 2

            if is_raw_mask:
                masks = output
            elif is_mask_and_class:
                masks, presence = output
            else:
                raise RuntimeError('Unknown output type')

            masks = dataset.resize_fn.backward(masks)
            masks = np.array([np.squeeze(x) for x in masks.cpu().numpy()])
            masks = tta.tta_fliplr_deaug(masks)
            masks = sigmoid(masks)

            if is_mask_and_class:
                presence = presence.softmax(dim=1).cpu().numpy()
                presence = tta.average_classes(presence, 2)
                presence = np.argmax(presence, axis=1)
                masks = zero_masks_inplace(masks, presence == 0)

            for mask, image_id in zip(masks, image_ids):
                mask = cv2.resize(mask, (D.ORIGINAL_SIZE, D.ORIGINAL_SIZE),
                                  interpolation=cv2.INTER_LANCZOS4)
                pred_masks.append(mask)

    del model, loader

    pred_masks = np.array(pred_masks)
    true_masks = D.read_train_masks(dataset.ids)

    plt.figure()
    binarization_thresholds, scores = threshold_mining(pred_masks,
                                                       true_masks,
                                                       min_threshold=0,
                                                       max_threshold=1)
    plt.plot(binarization_thresholds, scores)
    plt.title("test_prediction_pipeline_tta_pre")
    plt.show()
    return pred_masks, dataset
def test_inspect_train_predictions():
    train_ids = D.all_train_ids()
    train_images = D.read_train_images(train_ids)
    train_masks = D.read_train_masks(train_ids)
    print(train_ids.shape, train_images.shape, train_masks.shape)

    CONFIG = auto_file('wonderful_goldberg.json')
    WEIGHT_TRAIN = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_train_predictions.npz'
    )
    WEIGHT_TEST = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_test_predictions.npz'
    )

    convert_predictions_to_images(WEIGHT_TEST,
                                  os.path.join('test', 'test_predictions'))
    convert_predictions_to_images(WEIGHT_TRAIN,
                                  os.path.join('test', 'train_predictions'))

    train_predictions = auto_file(WEIGHT_TRAIN)
    train_predictions = np.load(train_predictions)

    # image = train_predictions['0aab0afa9c']

    train_predictions = np.array([train_predictions[id] for id in train_ids])
    print(train_predictions.shape)

    threshold, lb_score = threshold_mining(train_predictions,
                                           train_masks,
                                           min_threshold=0.15,
                                           max_threshold=0.85,
                                           step=0.005)

    plt.figure()
    plt.plot(threshold, lb_score)
    plt.tight_layout()

    i = np.argmax(lb_score)
    best_threshold, best_lb_score = float(threshold[i]), float(lb_score[i])
    print(best_threshold, best_lb_score)

    config_file = auto_file(CONFIG)

    config = json.load(open(config_file))
    valid_ids = np.array(config['valid_set'])
    valid_mask = D.get_selection_mask(train_ids, valid_ids)
    val_threshold, val_lb_score = threshold_mining(
        train_predictions[valid_mask],
        train_masks[valid_mask],
        min_threshold=0.15,
        max_threshold=0.85,
        step=0.005)

    plt.figure()
    plt.plot(val_threshold, val_lb_score)
    plt.tight_layout()
    plt.show()

    val_i = np.argmax(val_lb_score)
    val_th = val_threshold[val_i]
    print(val_threshold[val_i], val_lb_score[val_i])

    precision, result, threshold = do_kaggle_metric(train_predictions,
                                                    train_masks, val_th)

    x = []
    y = []
    for prec, true_mask in zip(precision, train_masks):
        x.append(prec)
        y.append(cv2.countNonZero(true_mask))

    plt.figure()
    plt.scatter(x, y)
    plt.tight_layout()
    plt.show()