Example #1
0
def generate_model_submission(snapshot_name: str, config_name: str, postprocess=morphology_postprocess, mine_on_val=True, export_png=False):
    print('Generating model submission for session', snapshot_name)

    snapshot_basename = os.path.splitext(os.path.basename(snapshot_name))[0]
    config_file = auto_file(config_name)
    save_file = auto_file(snapshot_name)
    working_dir = os.path.dirname(config_file)

    # OOF
    # stratify = config['stratify']
    # fold = config['fold']
    #
    # train_ids = D.all_train_ids()
    # train_indexes, test_indexes = D.get_train_test_split_for_fold(stratify, fold, train_ids)
    # train_predictions = np.load(train_predictions)

    # Predictions for train dataset
    train_predictions, train_dataset = predict_masks_auto(config_file, save_file, test_or_train='train')
    train_predictions_file = os.path.join(working_dir, f'{snapshot_basename}_train_predictions.npz')
    np.savez_compressed(train_predictions_file, **dict((image_id, image) for image_id, image in zip(train_dataset.ids, train_predictions)))

    # Predictions for test dataset
    test_predictions, test_dataset = predict_masks_auto(config_file, save_file, test_or_train='test')
    test_predictions_file = os.path.join(working_dir, f'{snapshot_basename}_test_predictions.npz')
    np.savez_compressed(test_predictions_file, **dict((image_id, image) for image_id, image in zip(test_dataset.ids, test_predictions)))

    # Save prediction as unit8 masks
    if export_png:
        convert_predictions_to_images(train_predictions_file, os.path.join(working_dir, 'train_predictions'))
        convert_predictions_to_images(test_predictions_file, os.path.join(working_dir, 'test_predictions'))

    # Threshold mining
    if mine_on_val:
        config = json.load(open(config_file))
        valid_ids = np.array(config['valid_set'])
        valid_mask = D.get_selection_mask(train_dataset.ids, valid_ids)
        true_masks = D.read_train_masks(valid_ids)
        threshold, lb_score = threshold_mining(train_predictions[valid_mask], true_masks, min_threshold=0.15, max_threshold=0.85, step=0.005)
    else:
        true_masks = D.read_train_masks(train_dataset.ids)
        threshold, lb_score = threshold_mining(train_predictions, true_masks, min_threshold=0.15, max_threshold=0.85, step=0.005)

    i = np.argmax(lb_score)
    threshold, lb_score = float(threshold[i]), float(lb_score[i])

    suffix = '_mine_on_val' if mine_on_val else ''
    submit_file = os.path.join(working_dir, '{}_LB{:.4f}_TH{:.4f}{}.csv.gz'.format(snapshot_basename, lb_score, threshold, suffix))

    test_predictions = test_predictions > threshold

    if postprocess is not None:
        final_masks = []
        for image, mask in zip(D.read_test_images(test_dataset.ids), test_predictions):
            mask = postprocess(image, mask)
            final_masks.append(mask)
        test_predictions = np.array(final_masks)

    create_submission(test_dataset.ids, test_predictions).to_csv(submit_file, compression='gzip', index=False)
    print('Saved submission to ', working_dir)
Example #2
0
def test_rle_encode_decode():
    train_pred = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_train_predictions.npz'
    )
    train_pred = np.load(train_pred)

    train_ids = D.all_train_ids()
    true_masks = D.read_train_masks(train_ids)
    pred_masks = np.array([train_pred[id] for id in train_ids])
    pred_masks = (pred_masks > 0.45).astype(np.uint8)

    submit = create_submission(train_ids, pred_masks)
    submit.to_csv('test_rle_encode_decode.csv.gz',
                  compression='gzip',
                  index=False)

    decoded_ids, decoded_masks = decode_submission(
        'test_rle_encode_decode.csv.gz')
    decoded_masks = dict(zip(decoded_ids, decoded_masks))
    assert set(decoded_ids) == set(train_ids)

    decoded_masks = np.array([decoded_masks[id] for id in train_ids])

    p1, r1, _ = do_kaggle_metric(pred_masks, true_masks)
    p2, r2, _ = do_kaggle_metric(decoded_masks, true_masks)

    assert np.array_equal(p1, p2)
    assert np.array_equal(r1, r2)
    print(np.mean(p1), np.mean(p2))
Example #3
0
def test_pixel_acc():
    ids = D.all_train_ids()
    y_true = D.read_train_masks(ids)
    y_pred = np.load('experiments/Sep14_18_14_ternaus_netv3_naughty_roentgen/Sep14_18_14_ternaus_netv3_naughty_roentgen_best_lb.pth_train_predictions.npz')
    y_pred = np.array([y_pred[x] for x in ids])

    acc = M.PixelAccuracy()
    acc.update(torch.from_numpy(y_pred), torch.from_numpy(y_true))
    print(acc.value())
Example #4
0
def test_kaggle_metric():
    ids = D.all_train_ids()
    y_true = D.read_train_masks(ids)
    # y_pred = np.load('experiments/Sep14_18_14_ternaus_netv3_naughty_roentgen/Sep14_18_14_ternaus_netv3_naughty_roentgen_best_lb.pth_train_predictions.npz')
    # y_pred = np.array([y_pred[x] for x in ids])
    # y_pred = y_true.copy()
    # print(y_pred.min(), y_pred.max())

    # print(np.count_nonzero(y_pred > 0), np.count_nonzero(y_true))
    # print(np.sum(y_true == (y_pred > 0)) / float(np.prod(y_true.shape)))


    precision, result, threshold = do_kaggle_metric(y_true, y_true, 0.5)
    print(np.mean(precision))
Example #5
0
def test_fix_masks():
    train_ids = D.all_train_ids()
    masks = D.read_train_masks(train_ids)
    new_masks, changed_ids = D.fix_masks(masks, train_ids)
    print(len(changed_ids))

    dst = 'test/out/test_fix_masks'
    os.makedirs(dst, exist_ok=True)

    idx = D.get_selection_mask(train_ids, changed_ids)

    for id, old, new in zip(changed_ids, masks[idx], new_masks[idx]):
        image = np.concatenate((old, new), 1)
        fname = f'{id}.png'
        image = cv2.resize(image, (image.shape[1] * 2, image.shape[0] * 2),
                           interpolation=cv2.INTER_NEAREST)
        cv2.imwrite(os.path.join(dst, fname), image * 255)
Example #6
0
def make_cv_submit(inputs, prefix, output_dir='submits'):
    os.makedirs(output_dir, exist_ok=True)

    test_predictions = [auto_file(f'{model}_test_predictions.npz') for model in inputs]
    oof_predictions = [auto_file(f'{model}_oof_predictions.npz') for model in inputs]

    train_ids = D.all_train_ids()
    true_masks = D.read_train_masks(train_ids)
    test_ids = D.all_test_ids()

    pred_masks = merge_oof(oof_predictions, train_ids)
    threshold, lb_score = threshold_mining(pred_masks, true_masks, min_threshold=0.1, max_threshold=0.9, step=0.001)

    i = np.argmax(lb_score)
    threshold, lb_score = float(threshold[i]), float(lb_score[i])
    print('Threshold', threshold, 'CV score', lb_score)

    # Arithmetic
    ensembled_test_pred = ensemble(test_predictions, test_ids, averaging=ArithmeticMean)
    ensembled_test_pred = ensembled_test_pred > threshold

    submit_file = f'{prefix}_a_mean_CV_{lb_score:.4f}_TH{threshold:.4f}.csv.gz'
    create_submission(test_ids, ensembled_test_pred).to_csv(os.path.join(output_dir, submit_file), compression='gzip', index=False)
    print('Saved submission', submit_file)

    postprocess = morphology_postprocess
    if postprocess is not None:
        final_masks = []
        for image, mask in zip(D.read_test_images(test_ids), ensembled_test_pred):
            mask = postprocess(image, mask)
            final_masks.append(mask)
        test_predictions = np.array(final_masks)

        submit_file = f'{prefix}_a_mean_PPC_CV_{lb_score:.4f}_TH{threshold:.4f}.csv.gz'
        create_submission(test_ids, test_predictions).to_csv(os.path.join(output_dir, submit_file), compression='gzip', index=False)
        print('Saved submission', submit_file)
Example #7
0
def test_folds_coverage():
    train_ids = D.all_train_ids()
    depths = D.read_depths(train_ids)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)

    n_folds = 10
    coverage = np.array([cv2.countNonZero(x) for x in masks], dtype=np.int)
    folds_d = D.get_folds_vector('coverage',
                                 images,
                                 masks,
                                 depths,
                                 n_folds=n_folds)

    f, ax = plt.subplots(1, 2)

    for fold in range(n_folds):
        train = coverage[folds_d != fold]
        val = coverage[folds_d == fold]

        ax[0].hist(train, label=f'Fold {fold}')
        ax[1].hist(val, label=f'Fold {fold}')

    f.show()
Example #8
0
def main():
    parser = U.get_argparser()
    args = parser.parse_args()
    U.set_manual_seed(args.seed)

    train_session_args = vars(args)
    train_session = U.get_random_name()
    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model}_{args.prepare}_{args.augmentation}_{train_session}'
    if args.fold is not None:
        prefix += f'_fold_{args.stratify}_{args.fold}'

    log_dir = os.path.join('runs', prefix)
    exp_dir = os.path.join('experiments', args.model, args.prepare,
                           args.augmentation, prefix)
    os.makedirs(exp_dir, exist_ok=True)

    train_ids = D.all_train_ids()
    depths = D.read_depths(train_ids)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)

    if args.fix_masks:
        masks, changed_ids = D.fix_masks(masks, train_ids)
        with open(os.path.join(exp_dir, 'fixed_masks.txt'), 'w') as f:
            for sample_id in changed_ids:
                f.write(sample_id)
                f.write('\n')
        print(f'Fixed {len(changed_ids)} masks')

    if args.fold is not None:
        train_indexes, test_indexes = D.get_train_test_split_for_fold(
            args.stratify, args.fold, train_ids)
    else:
        train_indexes, test_indexes = train_test_split(
            np.arange(len(train_ids)),
            shuffle=False,
            random_state=args.split_seed,
            test_size=0.2)

    ids_train, ids_test = train_ids[train_indexes], train_ids[test_indexes]
    img_train, img_test = images[train_indexes], images[test_indexes]
    mask_train, mask_test = masks[train_indexes], masks[test_indexes]
    depth_train, depth_test = depths[train_indexes], depths[test_indexes]

    # Here we can exclude some images from training, but keep in validation
    train_mask = D.drop_some(img_train,
                             mask_train,
                             drop_black=True,
                             drop_vstrips=args.drop_vstrips,
                             drop_few=args.drop_few)
    ids_train = ids_train[train_mask]
    img_train = img_train[train_mask]
    mask_train = mask_train[train_mask]
    depth_train = depth_train[train_mask]

    if not is_sorted(ids_train):
        raise RuntimeError("ids_train is not sorted")
    if not is_sorted(ids_test):
        raise RuntimeError("ids_test_sorted is not sorted")

    prepare_fn = D.get_prepare_fn(args.prepare, **train_session_args)

    # This line valid if we apply prepare_fn first and then do augmentation
    target_size = prepare_fn.target_size if prepare_fn is not None else D.ORIGINAL_SIZE
    # target_size = D.ORIGINAL_SIZE

    build_augmentation_fn = D.AUGMENTATION_MODES[args.augmentation]
    aug = build_augmentation_fn(target_size, border_mode=args.border_mode)

    train_transform_list = []
    valid_transform_list = []
    if prepare_fn is not None:
        train_transform_list.append(prepare_fn.t_forward)
        valid_transform_list.append(prepare_fn.t_forward)

    train_transform_list.append(aug)

    trainset = D.ImageAndMaskDataset(ids_train,
                                     img_train,
                                     mask_train,
                                     depth_train,
                                     augment=A.Compose(train_transform_list))

    validset = D.ImageAndMaskDataset(ids_test,
                                     img_test,
                                     mask_test,
                                     depth_test,
                                     augment=A.Compose(valid_transform_list))

    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True,
                             shuffle=True)

    validloader = DataLoader(validset,
                             batch_size=args.batch_size,
                             pin_memory=True,
                             drop_last=False,
                             shuffle=False)

    # Save train/val split for future use
    train_session_args.update({
        'train_set': list(ids_train),
        'valid_set': list(ids_test)
    })

    # Declare variables we will use during training
    start_epoch = 0
    train_history = pd.DataFrame()

    target_metric = args.target_metric
    target_metric_mode = 'max'
    best_metric_val = 0
    best_lb_checkpoint = os.path.join(exp_dir, f'{prefix}_{target_metric}.pth')

    model = U.get_model(args.model,
                        num_classes=args.num_classes,
                        num_channels=trainset.channels(),
                        abn=args.abn,
                        use_dropout=not args.no_dropout,
                        pretrained=not args.no_pretrain).cuda()

    print('Train set size :', len(ids_train), 'batch size',
          trainloader.batch_size)
    print('Valid set size :', len(ids_test), 'batch size',
          validloader.batch_size)
    print('Tile transform :', prepare_fn if prepare_fn is not None else "None")
    print('Model          :', args.model, count_parameters(model))
    print('Augmentations  :', args.augmentation, args.border_mode)
    print('Input channels :', trainset.channels())
    print('Output classes :', args.num_classes)
    print('Optimizer      :', args.optimizer, 'wd', args.weight_decay)
    print('Use of dropout :', not args.no_dropout)
    print('Train session  :', train_session)
    print('Freeze encoder :', args.freeze_encoder)
    print('Seed           :', args.seed, args.split_seed)
    print('Restart every  :', args.restart_every)
    print('Fold           :', args.fold, args.stratify)
    print('Fine-tune      :', args.fine_tune)
    print('ABN Mode       :', args.abn)
    print('Fix masks      :', args.fix_masks)

    if args.resume:
        fname = U.auto_file(args.resume)
        start_epoch, train_history, best_score = U.restore_checkpoint(
            fname, model)
        print(train_history)
        print('Resuming training from epoch', start_epoch, ' and score',
              best_score, args.resume)

    if args.fine_tune and args.freeze_encoder > 0:
        raise ValueError(
            'Incompatible options --fune-tune and --freeze-encoder')

    writer = SummaryWriter(log_dir)
    writer.add_text('train/params',
                    '```' + json.dumps(train_session_args, indent=2) + '```',
                    0)

    config_fname = os.path.join(exp_dir, f'{train_session}.json')
    with open(config_fname, 'w') as f:
        f.write(json.dumps(train_session_args, indent=2))

    weights = {
        'mask': 1.0,
        'class': 0.05,
        'dsv': 0.1,
    }

    bce = U.get_loss('bce')
    bce_lovasz = U.get_loss('bce_lovasz')
    bce_jaccard = U.get_loss('bce_jaccard')

    losses = {
        'warmup': {
            'mask': bce,
            'class': bce,
            'dsv': bce,
        },
        'main': {
            'mask': bce_jaccard,
            'class': bce,
            'dsv': bce,
        },
        'annealing': {
            'mask': bce_lovasz,
            'class': bce,
            'dsv': bce,
        }
    }

    epochs = {'warmup': 50, 'main': 250, 'annealing': 50}

    if args.fast:
        for key in epochs.keys():
            epochs[key] = 1

    learning_rates = {
        'warmup': args.learning_rate,
        'main': 1e-3,
        'annealing': 1e-2
    }

    # Warmup phase
    if epochs['warmup']:
        print(torch.cuda.max_memory_allocated(),
              torch.cuda.max_memory_cached())
        trainable_parameters = filter(lambda p: p.requires_grad,
                                      model.parameters())
        optimizer = U.get_optimizer(args.optimizer,
                                    trainable_parameters,
                                    learning_rates['warmup'],
                                    weight_decay=args.weight_decay)
        scheduler = None  # StepLR(optimizer, gamma=0.5, step_size=50)

        train_history, best_metric_val, start_epoch = train(
            model,
            losses['warmup'],
            weights,
            optimizer,
            scheduler,
            trainloader,
            validloader,
            writer,
            start_epoch,
            epochs=epochs['warmup'],
            early_stopping=args.early_stopping,
            train_history=train_history,
            experiment_dir=exp_dir,
            target_metric=target_metric,
            best_metric_val=best_metric_val,
            target_metric_mode=target_metric_mode,
            checkpoint_filename=best_lb_checkpoint)
        U.save_checkpoint(os.path.join(exp_dir, f'{prefix}_warmup.pth'),
                          model,
                          start_epoch,
                          train_history,
                          metric_name=target_metric,
                          metric_score=best_metric_val)

        del trainable_parameters, optimizer, scheduler
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

        print('Finished warmup phase. Main train loop.')

    # Main training phase
    print(torch.cuda.max_memory_allocated(), torch.cuda.max_memory_cached())
    trainable_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
    optimizer = U.get_optimizer(args.optimizer,
                                trainable_parameters,
                                learning_rates['main'],
                                weight_decay=args.weight_decay)
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='max',
                                  patience=50,
                                  factor=0.5,
                                  min_lr=1e-5)

    train_history, best_metric_val, start_epoch = train(
        model,
        losses['main'],
        weights,
        optimizer,
        scheduler,
        trainloader,
        validloader,
        writer,
        start_epoch,
        epochs=epochs['main'],
        early_stopping=args.early_stopping,
        train_history=train_history,
        experiment_dir=exp_dir,
        target_metric=target_metric,
        best_metric_val=best_metric_val,
        target_metric_mode=target_metric_mode,
        checkpoint_filename=best_lb_checkpoint)
    del trainable_parameters, optimizer, scheduler
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    snapshots = [best_lb_checkpoint]

    U.save_checkpoint(os.path.join(exp_dir, f'{prefix}_main.pth'),
                      model,
                      start_epoch,
                      train_history,
                      metric_name=target_metric,
                      metric_score=best_metric_val)

    print('Finished train phase.')

    # Cosine annealing
    if epochs['annealing']:

        for snapshot in range(5):
            print(f'Starting annealing phase {snapshot}')
            print(torch.cuda.max_memory_allocated(),
                  torch.cuda.max_memory_cached())
            # model.set_fine_tune(True)
            trainable_parameters = filter(lambda p: p.requires_grad,
                                          model.parameters())
            optimizer = U.get_optimizer('sgd',
                                        trainable_parameters,
                                        learning_rates['annealing'],
                                        weight_decay=args.weight_decay)
            scheduler = CosineAnnealingLR(optimizer,
                                          epochs['annealing'],
                                          eta_min=1e-7)

            snapshot_name = os.path.join(
                exp_dir, f'{prefix}_{target_metric}_snapshot_{snapshot}.pth')
            snapshots.append(snapshot_name)
            train_history, best_metric_val, start_epoch = train(
                model,
                losses['annealing'],
                weights,
                optimizer,
                scheduler,
                trainloader,
                validloader,
                writer,
                start_epoch,
                epochs=epochs['annealing'],
                early_stopping=args.early_stopping,
                train_history=train_history,
                experiment_dir=exp_dir,
                target_metric=target_metric,
                best_metric_val=0,
                target_metric_mode=target_metric_mode,
                checkpoint_filename=snapshot_name)
            del trainable_parameters, optimizer, scheduler
            torch.cuda.empty_cache()
            torch.cuda.synchronize()

    print('Training finished')
    train_history.to_csv(os.path.join(exp_dir, 'train_history.csv'),
                         index=False)

    for snapshot_file in snapshots:
        generate_model_submission(snapshot_file,
                                  config_fname,
                                  mine_on_val=True)
Example #9
0
def main():
    parser = U.get_argparser()
    args = parser.parse_args()
    U.set_manual_seed(args.seed)

    train_session_args = vars(args)
    train_session = U.get_random_name()
    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model}_{args.prepare}_{args.augmentation}_{train_session}'
    if args.fold is not None:
        prefix += f'_fold_{args.stratify}_{args.fold}'

    log_dir = os.path.join('runs', prefix)
    exp_dir = os.path.join('experiments', args.model, args.prepare,
                           args.augmentation, prefix)
    os.makedirs(exp_dir, exist_ok=True)

    train_ids = D.get_train_ids(drop_black=True,
                                drop_vstrips=args.drop_vstrips,
                                drop_empty=args.drop_empty,
                                drop_few=args.drop_few,
                                fast=args.fast)
    depths = D.read_depths(train_ids)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)

    if args.fix_masks:
        masks, changed_ids = D.fix_masks(masks, train_ids)
        with open(os.path.join(exp_dir, 'fixed_masks.txt'), 'w') as f:
            for sample_id in changed_ids:
                f.write(sample_id)
                f.write('\n')
        print(f'Fixed {len(changed_ids)} masks')

    if args.fold is not None:
        train_indexes, test_indexes = D.get_train_test_split_for_fold(
            args.stratify, args.fold, train_ids)
    else:
        train_indexes, test_indexes = train_test_split(
            np.arange(len(train_ids)),
            shuffle=False,
            random_state=args.split_seed,
            test_size=0.2)

    ids_train, ids_test = train_ids[train_indexes], train_ids[test_indexes]
    if not is_sorted(ids_train):
        raise RuntimeError("ids_train is not sorted")
    if not is_sorted(ids_test):
        raise RuntimeError("ids_test_sorted is not sorted")

    img_train, img_test = images[train_indexes], images[test_indexes]
    mask_train, mask_test = masks[train_indexes], masks[test_indexes]
    depth_train, depth_test = depths[train_indexes], depths[test_indexes]

    prepare_fn = D.get_prepare_fn(args.prepare, **train_session_args)

    # This line valid if we apply prepare_fn first and then do augmentation
    target_size = prepare_fn.target_size if prepare_fn is not None else D.ORIGINAL_SIZE
    # target_size = D.ORIGINAL_SIZE

    build_augmentation_fn = D.AUGMENTATION_MODES[args.augmentation]
    aug = build_augmentation_fn(target_size, border_mode=args.border_mode)

    train_transform_list = []
    valid_transform_list = []
    if prepare_fn is not None:
        train_transform_list.append(prepare_fn.t_forward)
        valid_transform_list.append(prepare_fn.t_forward)

    train_transform_list.append(aug)

    trainset = D.ImageAndMaskDataset(ids_train,
                                     img_train,
                                     mask_train,
                                     depth_train,
                                     augment=A.Compose(train_transform_list))

    validset = D.ImageAndMaskDataset(ids_test,
                                     img_test,
                                     mask_test,
                                     depth_test,
                                     augment=A.Compose(valid_transform_list))

    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             num_workers=args.workers,
                             pin_memory=True,
                             drop_last=True,
                             shuffle=True)

    validloader = DataLoader(validset,
                             batch_size=args.batch_size,
                             pin_memory=True,
                             drop_last=False,
                             shuffle=False)

    # Save train/val split for future use
    train_session_args.update({
        'train_set': list(ids_train),
        'valid_set': list(ids_test)
    })

    # Declare variables we will use during training
    start_epoch = 0
    train_history = pd.DataFrame()
    scheduler = None
    optimizer = None

    target_metric = args.target_metric
    target_metric_mode = 'max'
    best_metric_val = 0
    best_lb_checkpoint = os.path.join(exp_dir, f'{prefix}_{target_metric}.pth')

    model = U.get_model(args.model,
                        num_classes=args.num_classes,
                        num_channels=trainset.channels(),
                        abn=args.abn,
                        use_dropout=not args.no_dropout,
                        pretrained=not args.no_pretrain).cuda()

    print('Train set size :', len(trainloader), 'batch size',
          trainloader.batch_size)
    print('Valid set size :', len(validloader), 'batch size',
          validloader.batch_size)
    print('Tile transform :', prepare_fn if prepare_fn is not None else "None")
    print('Model          :', args.model, count_parameters(model))
    print('Augmentations  :', args.augmentation, args.border_mode)
    print('Input channels :', trainset.channels())
    print('Output classes :', args.num_classes)
    print('Criterion      :', args.loss),
    print('Optimizer      :', args.optimizer, args.learning_rate,
          args.weight_decay)
    print('Use of dropout :', not args.no_dropout)
    print('Train session  :', train_session)
    print('Freeze encoder :', args.freeze_encoder)
    print('Seed           :', args.seed, args.split_seed)
    print('Restart every  :', args.restart_every)
    print('Fold           :', args.fold, args.stratify)
    print('Fine-tune      :', args.fine_tune)
    print('ABN Mode       :', args.abn)
    print('Fix masks      :', args.fix_masks)

    if args.resume:
        fname = U.auto_file(args.resume)
        start_epoch, train_history, best_score = U.restore_checkpoint(
            fname, model)
        print(train_history)
        print('Resuming training from epoch', start_epoch, ' and score',
              best_score, args.resume)

    segmentation_loss = U.get_loss(args.loss)

    if args.fine_tune and args.freeze_encoder > 0:
        raise ValueError(
            'Incompatible options --fune-tune and --freeze-encoder')

    writer = SummaryWriter(log_dir)
    writer.add_text('train/params',
                    '```' + json.dumps(train_session_args, indent=2) + '```',
                    0)

    config_fname = os.path.join(exp_dir, f'{train_session}.json')
    with open(config_fname, 'w') as f:
        f.write(json.dumps(train_session_args, indent=2))

    # Start training loop
    no_improvement_epochs = 0

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # On Epoch begin
        if U.should_quit(exp_dir) or (
                args.early_stopping is not None
                and no_improvement_epochs > args.early_stopping):
            break

        epochs_trained = epoch - start_epoch
        should_restart_optimizer = (
            args.restart_every > 0 and epochs_trained % args.restart_every
            == 0) or (epochs_trained
                      == args.freeze_encoder) or optimizer is None

        if should_restart_optimizer:
            del optimizer
            if args.fine_tune:
                model.set_fine_tune(args.fine_tune)
            else:
                model.set_encoder_training_enabled(
                    epochs_trained >= args.freeze_encoder)

            trainable_parameters = filter(lambda p: p.requires_grad,
                                          model.parameters())
            optimizer = U.get_optimizer(args.optimizer,
                                        trainable_parameters,
                                        args.learning_rate,
                                        weight_decay=args.weight_decay)

            print('Restarting optimizer state', epoch, count_parameters(model))

            if args.lr_scheduler:
                scheduler = U.get_lr_scheduler(args.lr_scheduler, optimizer,
                                               args.epochs)

        if scheduler is not None and not isinstance(scheduler,
                                                    ReduceLROnPlateau):
            scheduler.step(epochs_trained)

        U.log_learning_rate(writer, optimizer, epoch)

        # Epoch
        train_metrics = process_epoch(model,
                                      segmentation_loss,
                                      optimizer,
                                      trainloader,
                                      epoch,
                                      True,
                                      writer,
                                      mask_postprocess=prepare_fn.backward)
        valid_metrics = process_epoch(model,
                                      segmentation_loss,
                                      None,
                                      validloader,
                                      epoch,
                                      False,
                                      writer,
                                      mask_postprocess=prepare_fn.backward)

        all_metrics = {}
        all_metrics.update(train_metrics)
        all_metrics.update(valid_metrics)

        # On Epoch End
        summary = {
            'epoch': [int(epoch)],
            'lr': [float(optimizer.param_groups[0]['lr'])]
        }
        for k, v in all_metrics.items():
            summary[k] = [v]

        train_history = train_history.append(pd.DataFrame.from_dict(summary),
                                             ignore_index=True)
        print(epoch, summary)

        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(all_metrics[target_metric], epochs_trained)

        if U.is_better(all_metrics[target_metric], best_metric_val,
                       target_metric_mode):
            best_metric_val = all_metrics[target_metric]
            U.save_checkpoint(best_lb_checkpoint,
                              model,
                              epoch,
                              train_history,
                              metric_name=target_metric,
                              metric_score=best_metric_val)
            print('Checkpoint saved', epoch, best_metric_val,
                  best_lb_checkpoint)
            no_improvement_epochs = 0
        else:
            no_improvement_epochs += 1

    print('Training finished')

    generate_model_submission(best_lb_checkpoint,
                              config_fname,
                              mine_on_val=True)
Example #10
0
from lib import dataset as D
import pandas as pd

if __name__ == '__main__':
    train_ids = D.get_train_ids(drop_black=False,
                                drop_vstrips=False,
                                drop_empty=False,
                                drop_few=False)
    images = D.read_train_images(train_ids)
    masks = D.read_train_masks(train_ids)
    depths = D.read_depths(train_ids)

    folds_by_salt = D.get_folds_vector('coverage',
                                       images,
                                       masks,
                                       depths,
                                       n_folds=5)
    folds_by_depth = D.get_folds_vector('depth',
                                        images,
                                        masks,
                                        depths,
                                        n_folds=5)
    folds_by_rnd = D.get_folds_vector(None,
                                      images,
                                      masks,
                                      depths,
                                      n_folds=5,
                                      random_state=42)

    pd.DataFrame.from_dict({
        'id': train_ids,
Example #11
0
def test_prediction_pipeline_tta_pre():
    from lib import tta

    device = 'cuda'
    config = auto_file('infallible_lamport.json')
    snapshot = auto_file(
        'Oct09_23_17_wider_unet_224pad_medium_infallible_lamport_val_lb.pth')

    config = json.load(open(config))
    snapshot = torch.load(snapshot)

    prepare_fn = D.get_prepare_fn(config['prepare'], **config)
    dataset = get_test_dataset(dataset=config['dataset'],
                               prepare=prepare_fn,
                               test_or_train='train')

    model = get_model(config['model'],
                      num_classes=config['num_classes'],
                      num_channels=dataset.channels(),
                      pretrained=False).to(device)

    if device == 'cpu':
        warnings.warn('Using CPU for prediction. It will be SLOW.')

    model.load_state_dict(snapshot['model'])
    model.eval()

    batch_size = config['batch_size']
    collate_fn = tta.tta_fliplr_collate
    batch_size = max(1, batch_size // 2)

    pred_masks = []
    with torch.no_grad():
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            pin_memory=True,
                            collate_fn=collate_fn)
        for images, image_ids in tqdm(loader,
                                      total=len(loader),
                                      desc=f'Predicting'):
            images = images.to(device, non_blocking=True)

            output = model(images)
            is_raw_mask = isinstance(output, torch.Tensor)
            is_mask_and_class = isinstance(output, tuple) and len(output) == 2

            if is_raw_mask:
                masks = output
            elif is_mask_and_class:
                masks, presence = output
            else:
                raise RuntimeError('Unknown output type')

            masks = dataset.resize_fn.backward(masks)
            masks = np.array([np.squeeze(x) for x in masks.cpu().numpy()])
            masks = tta.tta_fliplr_deaug(masks)
            masks = sigmoid(masks)

            if is_mask_and_class:
                presence = presence.softmax(dim=1).cpu().numpy()
                presence = tta.average_classes(presence, 2)
                presence = np.argmax(presence, axis=1)
                masks = zero_masks_inplace(masks, presence == 0)

            for mask, image_id in zip(masks, image_ids):
                mask = cv2.resize(mask, (D.ORIGINAL_SIZE, D.ORIGINAL_SIZE),
                                  interpolation=cv2.INTER_LANCZOS4)
                pred_masks.append(mask)

    del model, loader

    pred_masks = np.array(pred_masks)
    true_masks = D.read_train_masks(dataset.ids)

    plt.figure()
    binarization_thresholds, scores = threshold_mining(pred_masks,
                                                       true_masks,
                                                       min_threshold=0,
                                                       max_threshold=1)
    plt.plot(binarization_thresholds, scores)
    plt.title("test_prediction_pipeline_tta_pre")
    plt.show()
    return pred_masks, dataset
Example #12
0
def test_map():
    train_id = D.all_train_ids()
    masks = D.read_train_masks(train_id)
    print(M.threshold_mining(masks, masks))
def test_inspect_train_predictions():
    train_ids = D.all_train_ids()
    train_images = D.read_train_images(train_ids)
    train_masks = D.read_train_masks(train_ids)
    print(train_ids.shape, train_images.shape, train_masks.shape)

    CONFIG = auto_file('wonderful_goldberg.json')
    WEIGHT_TRAIN = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_train_predictions.npz'
    )
    WEIGHT_TEST = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_test_predictions.npz'
    )

    convert_predictions_to_images(WEIGHT_TEST,
                                  os.path.join('test', 'test_predictions'))
    convert_predictions_to_images(WEIGHT_TRAIN,
                                  os.path.join('test', 'train_predictions'))

    train_predictions = auto_file(WEIGHT_TRAIN)
    train_predictions = np.load(train_predictions)

    # image = train_predictions['0aab0afa9c']

    train_predictions = np.array([train_predictions[id] for id in train_ids])
    print(train_predictions.shape)

    threshold, lb_score = threshold_mining(train_predictions,
                                           train_masks,
                                           min_threshold=0.15,
                                           max_threshold=0.85,
                                           step=0.005)

    plt.figure()
    plt.plot(threshold, lb_score)
    plt.tight_layout()

    i = np.argmax(lb_score)
    best_threshold, best_lb_score = float(threshold[i]), float(lb_score[i])
    print(best_threshold, best_lb_score)

    config_file = auto_file(CONFIG)

    config = json.load(open(config_file))
    valid_ids = np.array(config['valid_set'])
    valid_mask = D.get_selection_mask(train_ids, valid_ids)
    val_threshold, val_lb_score = threshold_mining(
        train_predictions[valid_mask],
        train_masks[valid_mask],
        min_threshold=0.15,
        max_threshold=0.85,
        step=0.005)

    plt.figure()
    plt.plot(val_threshold, val_lb_score)
    plt.tight_layout()
    plt.show()

    val_i = np.argmax(val_lb_score)
    val_th = val_threshold[val_i]
    print(val_threshold[val_i], val_lb_score[val_i])

    precision, result, threshold = do_kaggle_metric(train_predictions,
                                                    train_masks, val_th)

    x = []
    y = []
    for prec, true_mask in zip(precision, train_masks):
        x.append(prec)
        y.append(cv2.countNonZero(true_mask))

    plt.figure()
    plt.scatter(x, y)
    plt.tight_layout()
    plt.show()