Esempio n. 1
0
def generate_model_submission(snapshot_name: str, config_name: str, postprocess=morphology_postprocess, mine_on_val=True, export_png=False):
    print('Generating model submission for session', snapshot_name)

    snapshot_basename = os.path.splitext(os.path.basename(snapshot_name))[0]
    config_file = auto_file(config_name)
    save_file = auto_file(snapshot_name)
    working_dir = os.path.dirname(config_file)

    # OOF
    # stratify = config['stratify']
    # fold = config['fold']
    #
    # train_ids = D.all_train_ids()
    # train_indexes, test_indexes = D.get_train_test_split_for_fold(stratify, fold, train_ids)
    # train_predictions = np.load(train_predictions)

    # Predictions for train dataset
    train_predictions, train_dataset = predict_masks_auto(config_file, save_file, test_or_train='train')
    train_predictions_file = os.path.join(working_dir, f'{snapshot_basename}_train_predictions.npz')
    np.savez_compressed(train_predictions_file, **dict((image_id, image) for image_id, image in zip(train_dataset.ids, train_predictions)))

    # Predictions for test dataset
    test_predictions, test_dataset = predict_masks_auto(config_file, save_file, test_or_train='test')
    test_predictions_file = os.path.join(working_dir, f'{snapshot_basename}_test_predictions.npz')
    np.savez_compressed(test_predictions_file, **dict((image_id, image) for image_id, image in zip(test_dataset.ids, test_predictions)))

    # Save prediction as unit8 masks
    if export_png:
        convert_predictions_to_images(train_predictions_file, os.path.join(working_dir, 'train_predictions'))
        convert_predictions_to_images(test_predictions_file, os.path.join(working_dir, 'test_predictions'))

    # Threshold mining
    if mine_on_val:
        config = json.load(open(config_file))
        valid_ids = np.array(config['valid_set'])
        valid_mask = D.get_selection_mask(train_dataset.ids, valid_ids)
        true_masks = D.read_train_masks(valid_ids)
        threshold, lb_score = threshold_mining(train_predictions[valid_mask], true_masks, min_threshold=0.15, max_threshold=0.85, step=0.005)
    else:
        true_masks = D.read_train_masks(train_dataset.ids)
        threshold, lb_score = threshold_mining(train_predictions, true_masks, min_threshold=0.15, max_threshold=0.85, step=0.005)

    i = np.argmax(lb_score)
    threshold, lb_score = float(threshold[i]), float(lb_score[i])

    suffix = '_mine_on_val' if mine_on_val else ''
    submit_file = os.path.join(working_dir, '{}_LB{:.4f}_TH{:.4f}{}.csv.gz'.format(snapshot_basename, lb_score, threshold, suffix))

    test_predictions = test_predictions > threshold

    if postprocess is not None:
        final_masks = []
        for image, mask in zip(D.read_test_images(test_dataset.ids), test_predictions):
            mask = postprocess(image, mask)
            final_masks.append(mask)
        test_predictions = np.array(final_masks)

    create_submission(test_dataset.ids, test_predictions).to_csv(submit_file, compression='gzip', index=False)
    print('Saved submission to ', working_dir)
Esempio n. 2
0
def make_cv_submit(inputs, prefix, output_dir='submits'):
    os.makedirs(output_dir, exist_ok=True)

    test_predictions = [auto_file(f'{model}_test_predictions.npz') for model in inputs]
    oof_predictions = [auto_file(f'{model}_oof_predictions.npz') for model in inputs]

    train_ids = D.all_train_ids()
    true_masks = D.read_train_masks(train_ids)
    test_ids = D.all_test_ids()

    pred_masks = merge_oof(oof_predictions, train_ids)
    threshold, lb_score = threshold_mining(pred_masks, true_masks, min_threshold=0.1, max_threshold=0.9, step=0.001)

    i = np.argmax(lb_score)
    threshold, lb_score = float(threshold[i]), float(lb_score[i])
    print('Threshold', threshold, 'CV score', lb_score)

    # Arithmetic
    ensembled_test_pred = ensemble(test_predictions, test_ids, averaging=ArithmeticMean)
    ensembled_test_pred = ensembled_test_pred > threshold

    submit_file = f'{prefix}_a_mean_CV_{lb_score:.4f}_TH{threshold:.4f}.csv.gz'
    create_submission(test_ids, ensembled_test_pred).to_csv(os.path.join(output_dir, submit_file), compression='gzip', index=False)
    print('Saved submission', submit_file)

    postprocess = morphology_postprocess
    if postprocess is not None:
        final_masks = []
        for image, mask in zip(D.read_test_images(test_ids), ensembled_test_pred):
            mask = postprocess(image, mask)
            final_masks.append(mask)
        test_predictions = np.array(final_masks)

        submit_file = f'{prefix}_a_mean_PPC_CV_{lb_score:.4f}_TH{threshold:.4f}.csv.gz'
        create_submission(test_ids, test_predictions).to_csv(os.path.join(output_dir, submit_file), compression='gzip', index=False)
        print('Saved submission', submit_file)
Esempio n. 3
0
def process_epoch(model,
                  criterions: dict,
                  criterion_weights: dict,
                  optimizer,
                  dataloader,
                  epoch: int,
                  is_train,
                  summary_writer,
                  tag=None) -> dict:
    avg_loss = AverageMeter()

    metrics = {
        'iou': JaccardIndex(0.5),
        'acc': PixelAccuracy(),
    }

    if tag is None:
        tag = 'train' if is_train else 'val'

    epoch_ids = []
    epoch_image = []
    epoch_mask_labels = []
    epoch_mask_pred = []
    epoch_mask_true = []
    epoch_losses = {}
    for key, _ in criterions.items():
        epoch_losses[key] = []

    with torch.set_grad_enabled(is_train):
        if is_train:
            model.train()
        else:
            model.eval()

        n_batches = len(dataloader)
        with tqdm(total=n_batches) as tq:
            tq.set_description(f'{tag} epoch %d' % epoch)

            batch = None
            total_loss = None

            for batch_index, batch in enumerate(dataloader):

                epoch_ids.extend(batch['id'])
                epoch_mask_true.extend(to_numpy(batch['mask']))
                epoch_mask_labels.extend(compute_mask_class(batch['mask']))
                epoch_image.extend(to_numpy(batch['image'])[:, 0:1, :, :])

                # Move all data to GPU
                for key, value in batch.items():
                    if isinstance(value, torch.Tensor):
                        batch[key] = value.cuda(non_blocking=True)

                if is_train:
                    with torch.autograd.detect_anomaly():
                        optimizer.zero_grad()
                        predictions = model(batch)
                        losses = dict((key, criterions[key](
                            predictions[key], batch[target_for_dsv(key)]))
                                      for key in predictions.keys())
                        total_loss = compute_total_loss(
                            losses, criterion_weights)
                        total_loss.mean().backward()
                        optimizer.step()
                else:
                    predictions = model(batch)
                    losses = dict((key, criterions[key](
                        predictions[key], batch[target_for_dsv(key)]))
                                  for key in predictions.keys())
                    total_loss = compute_total_loss(losses, criterion_weights)

                mask_pred_activate = logit_to_prob(predictions['mask'],
                                                   criterions['mask'])

                epoch_mask_pred.extend(to_numpy(mask_pred_activate))

                # Add losses
                for loss_name in predictions.keys():
                    epoch_losses[loss_name].extend(to_numpy(losses[loss_name]))

                # Log metrics
                for name, metric in metrics.items():
                    metric.update(mask_pred_activate, batch['mask'])

                if is_train and batch_index == 0:
                    # Log gradients at the first batch of epoch
                    for name, param in model.named_parameters():
                        if param.grad is not None:
                            summary_writer.add_histogram(
                                f'{tag}/grad/{name}', to_numpy(param.grad),
                                epoch)

                avg_loss.extend(to_numpy(total_loss))
                tq.set_postfix(loss='{:.3f}'.format(avg_loss.avg))
                tq.update()

            del batch, total_loss

    for key, metric in metrics.items():
        metric.log_to_tensorboard(summary_writer, f'{tag}/epoch/' + key, epoch)

    epoch_ids = np.array(epoch_ids)
    epoch_image = np.array(epoch_image)
    epoch_mask_true = np.array(epoch_mask_true)
    epoch_mask_pred = np.array(epoch_mask_pred)

    # End of train epoch

    # Log losses
    for loss_name, epoch_losses in epoch_losses.items():
        if len(epoch_losses):
            summary_writer.add_scalar(f'{tag}/loss/{loss_name}',
                                      np.mean(epoch_losses), epoch)
            summary_writer.add_histogram(f'{tag}/loss/{loss_name}/histogram',
                                         np.array(epoch_losses), epoch)

    # epoch_mask_labels = np.array(epoch_mask_labels)
    # for cls in np.unique(epoch_mask_labels):
    #     summary_writer.add_scalar(f'{tag}/epoch/seg_loss_class_{cls}', np.mean(epoch_losses[epoch_mask_labels == cls]), epoch)

    # Mine thresholds on val
    if not is_train:
        thresholds, scores = threshold_mining(epoch_mask_pred, epoch_mask_true)
        i = np.argmax(scores)
        optimal_threshold = float(thresholds[i])
        lb_at_optimal_threshold = float(scores[i])
        summary_writer.add_scalar(f'{tag}/epoch/lb/optimal_threshold',
                                  optimal_threshold, epoch)
        summary_writer.add_scalar(f'{tag}/epoch/lb/optimal_score',
                                  lb_at_optimal_threshold, epoch)

    # Compute LB metric
    precision, result, threshold = do_kaggle_metric(epoch_mask_pred,
                                                    epoch_mask_true, 0.50)
    lb_50 = np.mean(precision)
    summary_writer.add_scalar(f'{tag}/epoch/lb', lb_50, epoch)

    # Plot negative examples (LB)
    iou_metric_asc = np.argsort(precision)[:64]
    iou_negatives = pd.DataFrame.from_dict({
        'id':
        epoch_ids[iou_metric_asc],
        'iou_score':
        precision[iou_metric_asc]
    })

    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/image',
        make_grid(torch.from_numpy(epoch_image[iou_metric_asc]),
                  nrow=4,
                  normalize=True), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/y_true',
        make_grid(torch.from_numpy(epoch_mask_true[iou_metric_asc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/y_pred',
        make_grid(torch.from_numpy(epoch_mask_pred[iou_metric_asc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_text(f'{tag}/hard_negatives/lb/ids',
                            '```' + iou_negatives.to_csv(index=False) + '```',
                            epoch)

    if is_train:
        # Plot histogram of parameters after each epoch
        for name, param in model.named_parameters():
            if param.grad is not None:
                summary_writer.add_histogram('model/' + name,
                                             to_numpy(param.data), epoch)

    metric_scores = {f'{tag}_lb': lb_50, f'{tag}_loss': avg_loss.avg}

    for key, metric in metrics.items():
        metric_scores[f'{tag}_{key}'] = metric.value()

    return metric_scores
Esempio n. 4
0
def process_epoch(model,
                  seg_criterion,
                  optimizer,
                  dataloader,
                  epoch: int,
                  is_train,
                  summary_writer,
                  mask_postprocess=noop,
                  tag=None) -> dict:
    avg_seg_loss = AverageMeter()

    metrics = {
        'iou': JaccardIndex(0.5),
        'acc': PixelAccuracy(),
    }

    if tag is None:
        tag = 'train' if is_train else 'val'

    epoch_ids = []

    epoch_seg_losses = []
    epoch_msk_labels = []

    epoch_image = []

    epoch_mask_pred = []
    epoch_mask_true = []

    with torch.set_grad_enabled(is_train):
        if is_train:
            model.train()
        else:
            model.eval()

        n_batches = len(dataloader)
        with tqdm(total=n_batches) as tq:
            tq.set_description(f'{tag} epoch %d' % epoch)

            image = None
            mask_true = None
            msk_pred = None
            seg_loss = None

            for batch_index, (image, mask_true,
                              sample_ids) in enumerate(dataloader):
                mask_true = mask_postprocess(mask_true)

                epoch_ids.extend(sample_ids)
                epoch_image.extend(image.detach().numpy()[:, 0:1, :, :])
                epoch_mask_true.extend(mask_true.detach().numpy())

                mask_class_labels = compute_mask_class(mask_true)
                image, mask_true = image.cuda(
                    non_blocking=True), mask_true.cuda(non_blocking=True)

                if isinstance(seg_criterion, CELoss):
                    mask_true = mask_true.long().squeeze()

                if is_train:
                    with torch.autograd.detect_anomaly():
                        optimizer.zero_grad()
                        msk_pred = mask_postprocess(model(image))
                        seg_loss = seg_criterion(msk_pred, mask_true)
                        seg_loss.mean().backward()
                        optimizer.step()
                else:
                    msk_pred = mask_postprocess(model(image))
                    seg_loss = seg_criterion(msk_pred, mask_true)

                mask_pred_activate = logit_to_prob(msk_pred, seg_criterion)

                seg_loss_np = seg_loss.detach().cpu().numpy()
                epoch_mask_pred.extend(mask_pred_activate.cpu().numpy())
                epoch_seg_losses.extend(seg_loss_np)
                epoch_msk_labels.extend(mask_class_labels.numpy())

                # Log metrics
                for name, metric in metrics.items():
                    metric.update(mask_pred_activate, mask_true)

                if is_train and batch_index == 0:
                    # Log gradients at the first batch of epoch
                    for name, param in model.named_parameters():
                        if param.grad is not None:
                            summary_writer.add_histogram(
                                f'{tag}/grad/{name}', param.grad.cpu(), epoch)

                avg_seg_loss.extend(seg_loss_np)

                tq.set_postfix(seg_loss='{:.3f}'.format(avg_seg_loss.avg))
                tq.update()

            del image, mask_true, msk_pred, seg_loss

    for key, metric in metrics.items():
        metric.log_to_tensorboard(summary_writer, f'{tag}/epoch/' + key, epoch)

    epoch_ids = np.array(epoch_ids)
    epoch_image = np.array(epoch_image)
    epoch_mask_true = np.array(epoch_mask_true)
    epoch_mask_pred = np.array(epoch_mask_pred)
    epoch_seg_losses = np.array(epoch_seg_losses)
    epoch_msk_labels = np.array(epoch_msk_labels)

    # End of train epoch

    # Mine thresholds on val
    if not is_train:
        thresholds, scores = threshold_mining(epoch_mask_pred, epoch_mask_true)
        i = np.argmax(scores)
        optimal_threshold = float(thresholds[i])
        lb_at_optimal_threshold = float(scores[i])
        summary_writer.add_scalar(f'{tag}/epoch/lb/optimal_threshold',
                                  optimal_threshold, epoch)
        summary_writer.add_scalar(f'{tag}/epoch/lb/optimal_score',
                                  lb_at_optimal_threshold, epoch)

    precision, result, threshold = do_kaggle_metric(epoch_mask_pred,
                                                    epoch_mask_true, 0.50)
    lb_50 = np.mean(precision)
    summary_writer.add_scalar(f'{tag}/epoch/lb', lb_50, epoch)

    # Log losses
    summary_writer.add_scalar(f'{tag}/epoch/seg_loss', epoch_seg_losses.mean(),
                              epoch)
    summary_writer.add_histogram(f'{tag}/epoch/seg_loss/histogram',
                                 epoch_seg_losses, epoch)

    for cls in np.unique(epoch_msk_labels):
        summary_writer.add_scalar(
            f'{tag}/epoch/seg_loss_class_{cls}',
            np.mean(epoch_seg_losses[epoch_msk_labels == cls]), epoch)

    # Plot segmentation negatives (loss)
    seg_losses_desc = np.argsort(-epoch_seg_losses)[:64]
    seg_negatives = pd.DataFrame.from_dict({
        'id':
        epoch_ids[seg_losses_desc],
        'seg_loss':
        epoch_seg_losses[seg_losses_desc]
    })

    summary_writer.add_image(
        f'{tag}/hard_negatives/loss/image',
        make_grid(torch.from_numpy(epoch_image[seg_losses_desc]),
                  nrow=4,
                  normalize=True), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/loss/y_true',
        make_grid(torch.from_numpy(epoch_mask_true[seg_losses_desc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/loss/y_pred',
        make_grid(torch.from_numpy(epoch_mask_pred[seg_losses_desc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_text(f'{tag}/hard_negatives/loss/ids',
                            '```' + seg_negatives.to_csv(index=False) + '```',
                            epoch)

    # Plot negative examples (LB)
    iou_losses_desc = np.argsort(precision)[:64]
    iou_negatives = pd.DataFrame.from_dict({
        'id':
        epoch_ids[iou_losses_desc],
        'iou_score':
        epoch_seg_losses[iou_losses_desc]
    })
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/image',
        make_grid(torch.from_numpy(epoch_image[iou_losses_desc]),
                  nrow=4,
                  normalize=True), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/y_true',
        make_grid(torch.from_numpy(epoch_mask_true[iou_losses_desc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/y_pred',
        make_grid(torch.from_numpy(epoch_mask_pred[iou_losses_desc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_text(f'{tag}/hard_negatives/lb/ids',
                            '```' + iou_negatives.to_csv(index=False) + '```',
                            epoch)

    if is_train:
        # Plot histogram of parameters after each epoch
        for name, param in model.named_parameters():
            if param.grad is not None:
                param_data = param.data.cpu().numpy()
                summary_writer.add_histogram('model/' + name, param_data,
                                             epoch)

    metric_scores = {
        f'{tag}_seg_loss': epoch_seg_losses.mean(),
        f'{tag}_lb': lb_50
    }

    for key, metric in metrics.items():
        metric_scores[f'{tag}_{key}'] = metric.value()

    return metric_scores
Esempio n. 5
0
def test_prediction_pipeline_tta_pre():
    from lib import tta

    device = 'cuda'
    config = auto_file('infallible_lamport.json')
    snapshot = auto_file(
        'Oct09_23_17_wider_unet_224pad_medium_infallible_lamport_val_lb.pth')

    config = json.load(open(config))
    snapshot = torch.load(snapshot)

    prepare_fn = D.get_prepare_fn(config['prepare'], **config)
    dataset = get_test_dataset(dataset=config['dataset'],
                               prepare=prepare_fn,
                               test_or_train='train')

    model = get_model(config['model'],
                      num_classes=config['num_classes'],
                      num_channels=dataset.channels(),
                      pretrained=False).to(device)

    if device == 'cpu':
        warnings.warn('Using CPU for prediction. It will be SLOW.')

    model.load_state_dict(snapshot['model'])
    model.eval()

    batch_size = config['batch_size']
    collate_fn = tta.tta_fliplr_collate
    batch_size = max(1, batch_size // 2)

    pred_masks = []
    with torch.no_grad():
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            pin_memory=True,
                            collate_fn=collate_fn)
        for images, image_ids in tqdm(loader,
                                      total=len(loader),
                                      desc=f'Predicting'):
            images = images.to(device, non_blocking=True)

            output = model(images)
            is_raw_mask = isinstance(output, torch.Tensor)
            is_mask_and_class = isinstance(output, tuple) and len(output) == 2

            if is_raw_mask:
                masks = output
            elif is_mask_and_class:
                masks, presence = output
            else:
                raise RuntimeError('Unknown output type')

            masks = dataset.resize_fn.backward(masks)
            masks = np.array([np.squeeze(x) for x in masks.cpu().numpy()])
            masks = tta.tta_fliplr_deaug(masks)
            masks = sigmoid(masks)

            if is_mask_and_class:
                presence = presence.softmax(dim=1).cpu().numpy()
                presence = tta.average_classes(presence, 2)
                presence = np.argmax(presence, axis=1)
                masks = zero_masks_inplace(masks, presence == 0)

            for mask, image_id in zip(masks, image_ids):
                mask = cv2.resize(mask, (D.ORIGINAL_SIZE, D.ORIGINAL_SIZE),
                                  interpolation=cv2.INTER_LANCZOS4)
                pred_masks.append(mask)

    del model, loader

    pred_masks = np.array(pred_masks)
    true_masks = D.read_train_masks(dataset.ids)

    plt.figure()
    binarization_thresholds, scores = threshold_mining(pred_masks,
                                                       true_masks,
                                                       min_threshold=0,
                                                       max_threshold=1)
    plt.plot(binarization_thresholds, scores)
    plt.title("test_prediction_pipeline_tta_pre")
    plt.show()
    return pred_masks, dataset
Esempio n. 6
0
def test_map():
    train_id = D.all_train_ids()
    masks = D.read_train_masks(train_id)
    print(M.threshold_mining(masks, masks))