Ejemplo n.º 1
0
def test_rle_encode_decode():
    train_pred = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_train_predictions.npz'
    )
    train_pred = np.load(train_pred)

    train_ids = D.all_train_ids()
    true_masks = D.read_train_masks(train_ids)
    pred_masks = np.array([train_pred[id] for id in train_ids])
    pred_masks = (pred_masks > 0.45).astype(np.uint8)

    submit = create_submission(train_ids, pred_masks)
    submit.to_csv('test_rle_encode_decode.csv.gz',
                  compression='gzip',
                  index=False)

    decoded_ids, decoded_masks = decode_submission(
        'test_rle_encode_decode.csv.gz')
    decoded_masks = dict(zip(decoded_ids, decoded_masks))
    assert set(decoded_ids) == set(train_ids)

    decoded_masks = np.array([decoded_masks[id] for id in train_ids])

    p1, r1, _ = do_kaggle_metric(pred_masks, true_masks)
    p2, r2, _ = do_kaggle_metric(decoded_masks, true_masks)

    assert np.array_equal(p1, p2)
    assert np.array_equal(r1, r2)
    print(np.mean(p1), np.mean(p2))
Ejemplo n.º 2
0
def test_kaggle_metric():
    ids = D.all_train_ids()
    y_true = D.read_train_masks(ids)
    # y_pred = np.load('experiments/Sep14_18_14_ternaus_netv3_naughty_roentgen/Sep14_18_14_ternaus_netv3_naughty_roentgen_best_lb.pth_train_predictions.npz')
    # y_pred = np.array([y_pred[x] for x in ids])
    # y_pred = y_true.copy()
    # print(y_pred.min(), y_pred.max())

    # print(np.count_nonzero(y_pred > 0), np.count_nonzero(y_true))
    # print(np.sum(y_true == (y_pred > 0)) / float(np.prod(y_true.shape)))


    precision, result, threshold = do_kaggle_metric(y_true, y_true, 0.5)
    print(np.mean(precision))
Ejemplo n.º 3
0
def process_epoch(model,
                  criterions: dict,
                  criterion_weights: dict,
                  optimizer,
                  dataloader,
                  epoch: int,
                  is_train,
                  summary_writer,
                  tag=None) -> dict:
    avg_loss = AverageMeter()

    metrics = {
        'iou': JaccardIndex(0.5),
        'acc': PixelAccuracy(),
    }

    if tag is None:
        tag = 'train' if is_train else 'val'

    epoch_ids = []
    epoch_image = []
    epoch_mask_labels = []
    epoch_mask_pred = []
    epoch_mask_true = []
    epoch_losses = {}
    for key, _ in criterions.items():
        epoch_losses[key] = []

    with torch.set_grad_enabled(is_train):
        if is_train:
            model.train()
        else:
            model.eval()

        n_batches = len(dataloader)
        with tqdm(total=n_batches) as tq:
            tq.set_description(f'{tag} epoch %d' % epoch)

            batch = None
            total_loss = None

            for batch_index, batch in enumerate(dataloader):

                epoch_ids.extend(batch['id'])
                epoch_mask_true.extend(to_numpy(batch['mask']))
                epoch_mask_labels.extend(compute_mask_class(batch['mask']))
                epoch_image.extend(to_numpy(batch['image'])[:, 0:1, :, :])

                # Move all data to GPU
                for key, value in batch.items():
                    if isinstance(value, torch.Tensor):
                        batch[key] = value.cuda(non_blocking=True)

                if is_train:
                    with torch.autograd.detect_anomaly():
                        optimizer.zero_grad()
                        predictions = model(batch)
                        losses = dict((key, criterions[key](
                            predictions[key], batch[target_for_dsv(key)]))
                                      for key in predictions.keys())
                        total_loss = compute_total_loss(
                            losses, criterion_weights)
                        total_loss.mean().backward()
                        optimizer.step()
                else:
                    predictions = model(batch)
                    losses = dict((key, criterions[key](
                        predictions[key], batch[target_for_dsv(key)]))
                                  for key in predictions.keys())
                    total_loss = compute_total_loss(losses, criterion_weights)

                mask_pred_activate = logit_to_prob(predictions['mask'],
                                                   criterions['mask'])

                epoch_mask_pred.extend(to_numpy(mask_pred_activate))

                # Add losses
                for loss_name in predictions.keys():
                    epoch_losses[loss_name].extend(to_numpy(losses[loss_name]))

                # Log metrics
                for name, metric in metrics.items():
                    metric.update(mask_pred_activate, batch['mask'])

                if is_train and batch_index == 0:
                    # Log gradients at the first batch of epoch
                    for name, param in model.named_parameters():
                        if param.grad is not None:
                            summary_writer.add_histogram(
                                f'{tag}/grad/{name}', to_numpy(param.grad),
                                epoch)

                avg_loss.extend(to_numpy(total_loss))
                tq.set_postfix(loss='{:.3f}'.format(avg_loss.avg))
                tq.update()

            del batch, total_loss

    for key, metric in metrics.items():
        metric.log_to_tensorboard(summary_writer, f'{tag}/epoch/' + key, epoch)

    epoch_ids = np.array(epoch_ids)
    epoch_image = np.array(epoch_image)
    epoch_mask_true = np.array(epoch_mask_true)
    epoch_mask_pred = np.array(epoch_mask_pred)

    # End of train epoch

    # Log losses
    for loss_name, epoch_losses in epoch_losses.items():
        if len(epoch_losses):
            summary_writer.add_scalar(f'{tag}/loss/{loss_name}',
                                      np.mean(epoch_losses), epoch)
            summary_writer.add_histogram(f'{tag}/loss/{loss_name}/histogram',
                                         np.array(epoch_losses), epoch)

    # epoch_mask_labels = np.array(epoch_mask_labels)
    # for cls in np.unique(epoch_mask_labels):
    #     summary_writer.add_scalar(f'{tag}/epoch/seg_loss_class_{cls}', np.mean(epoch_losses[epoch_mask_labels == cls]), epoch)

    # Mine thresholds on val
    if not is_train:
        thresholds, scores = threshold_mining(epoch_mask_pred, epoch_mask_true)
        i = np.argmax(scores)
        optimal_threshold = float(thresholds[i])
        lb_at_optimal_threshold = float(scores[i])
        summary_writer.add_scalar(f'{tag}/epoch/lb/optimal_threshold',
                                  optimal_threshold, epoch)
        summary_writer.add_scalar(f'{tag}/epoch/lb/optimal_score',
                                  lb_at_optimal_threshold, epoch)

    # Compute LB metric
    precision, result, threshold = do_kaggle_metric(epoch_mask_pred,
                                                    epoch_mask_true, 0.50)
    lb_50 = np.mean(precision)
    summary_writer.add_scalar(f'{tag}/epoch/lb', lb_50, epoch)

    # Plot negative examples (LB)
    iou_metric_asc = np.argsort(precision)[:64]
    iou_negatives = pd.DataFrame.from_dict({
        'id':
        epoch_ids[iou_metric_asc],
        'iou_score':
        precision[iou_metric_asc]
    })

    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/image',
        make_grid(torch.from_numpy(epoch_image[iou_metric_asc]),
                  nrow=4,
                  normalize=True), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/y_true',
        make_grid(torch.from_numpy(epoch_mask_true[iou_metric_asc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/y_pred',
        make_grid(torch.from_numpy(epoch_mask_pred[iou_metric_asc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_text(f'{tag}/hard_negatives/lb/ids',
                            '```' + iou_negatives.to_csv(index=False) + '```',
                            epoch)

    if is_train:
        # Plot histogram of parameters after each epoch
        for name, param in model.named_parameters():
            if param.grad is not None:
                summary_writer.add_histogram('model/' + name,
                                             to_numpy(param.data), epoch)

    metric_scores = {f'{tag}_lb': lb_50, f'{tag}_loss': avg_loss.avg}

    for key, metric in metrics.items():
        metric_scores[f'{tag}_{key}'] = metric.value()

    return metric_scores
Ejemplo n.º 4
0
def process_epoch(model,
                  seg_criterion,
                  optimizer,
                  dataloader,
                  epoch: int,
                  is_train,
                  summary_writer,
                  mask_postprocess=noop,
                  tag=None) -> dict:
    avg_seg_loss = AverageMeter()

    metrics = {
        'iou': JaccardIndex(0.5),
        'acc': PixelAccuracy(),
    }

    if tag is None:
        tag = 'train' if is_train else 'val'

    epoch_ids = []

    epoch_seg_losses = []
    epoch_msk_labels = []

    epoch_image = []

    epoch_mask_pred = []
    epoch_mask_true = []

    with torch.set_grad_enabled(is_train):
        if is_train:
            model.train()
        else:
            model.eval()

        n_batches = len(dataloader)
        with tqdm(total=n_batches) as tq:
            tq.set_description(f'{tag} epoch %d' % epoch)

            image = None
            mask_true = None
            msk_pred = None
            seg_loss = None

            for batch_index, (image, mask_true,
                              sample_ids) in enumerate(dataloader):
                mask_true = mask_postprocess(mask_true)

                epoch_ids.extend(sample_ids)
                epoch_image.extend(image.detach().numpy()[:, 0:1, :, :])
                epoch_mask_true.extend(mask_true.detach().numpy())

                mask_class_labels = compute_mask_class(mask_true)
                image, mask_true = image.cuda(
                    non_blocking=True), mask_true.cuda(non_blocking=True)

                if isinstance(seg_criterion, CELoss):
                    mask_true = mask_true.long().squeeze()

                if is_train:
                    with torch.autograd.detect_anomaly():
                        optimizer.zero_grad()
                        msk_pred = mask_postprocess(model(image))
                        seg_loss = seg_criterion(msk_pred, mask_true)
                        seg_loss.mean().backward()
                        optimizer.step()
                else:
                    msk_pred = mask_postprocess(model(image))
                    seg_loss = seg_criterion(msk_pred, mask_true)

                mask_pred_activate = logit_to_prob(msk_pred, seg_criterion)

                seg_loss_np = seg_loss.detach().cpu().numpy()
                epoch_mask_pred.extend(mask_pred_activate.cpu().numpy())
                epoch_seg_losses.extend(seg_loss_np)
                epoch_msk_labels.extend(mask_class_labels.numpy())

                # Log metrics
                for name, metric in metrics.items():
                    metric.update(mask_pred_activate, mask_true)

                if is_train and batch_index == 0:
                    # Log gradients at the first batch of epoch
                    for name, param in model.named_parameters():
                        if param.grad is not None:
                            summary_writer.add_histogram(
                                f'{tag}/grad/{name}', param.grad.cpu(), epoch)

                avg_seg_loss.extend(seg_loss_np)

                tq.set_postfix(seg_loss='{:.3f}'.format(avg_seg_loss.avg))
                tq.update()

            del image, mask_true, msk_pred, seg_loss

    for key, metric in metrics.items():
        metric.log_to_tensorboard(summary_writer, f'{tag}/epoch/' + key, epoch)

    epoch_ids = np.array(epoch_ids)
    epoch_image = np.array(epoch_image)
    epoch_mask_true = np.array(epoch_mask_true)
    epoch_mask_pred = np.array(epoch_mask_pred)
    epoch_seg_losses = np.array(epoch_seg_losses)
    epoch_msk_labels = np.array(epoch_msk_labels)

    # End of train epoch

    # Mine thresholds on val
    if not is_train:
        thresholds, scores = threshold_mining(epoch_mask_pred, epoch_mask_true)
        i = np.argmax(scores)
        optimal_threshold = float(thresholds[i])
        lb_at_optimal_threshold = float(scores[i])
        summary_writer.add_scalar(f'{tag}/epoch/lb/optimal_threshold',
                                  optimal_threshold, epoch)
        summary_writer.add_scalar(f'{tag}/epoch/lb/optimal_score',
                                  lb_at_optimal_threshold, epoch)

    precision, result, threshold = do_kaggle_metric(epoch_mask_pred,
                                                    epoch_mask_true, 0.50)
    lb_50 = np.mean(precision)
    summary_writer.add_scalar(f'{tag}/epoch/lb', lb_50, epoch)

    # Log losses
    summary_writer.add_scalar(f'{tag}/epoch/seg_loss', epoch_seg_losses.mean(),
                              epoch)
    summary_writer.add_histogram(f'{tag}/epoch/seg_loss/histogram',
                                 epoch_seg_losses, epoch)

    for cls in np.unique(epoch_msk_labels):
        summary_writer.add_scalar(
            f'{tag}/epoch/seg_loss_class_{cls}',
            np.mean(epoch_seg_losses[epoch_msk_labels == cls]), epoch)

    # Plot segmentation negatives (loss)
    seg_losses_desc = np.argsort(-epoch_seg_losses)[:64]
    seg_negatives = pd.DataFrame.from_dict({
        'id':
        epoch_ids[seg_losses_desc],
        'seg_loss':
        epoch_seg_losses[seg_losses_desc]
    })

    summary_writer.add_image(
        f'{tag}/hard_negatives/loss/image',
        make_grid(torch.from_numpy(epoch_image[seg_losses_desc]),
                  nrow=4,
                  normalize=True), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/loss/y_true',
        make_grid(torch.from_numpy(epoch_mask_true[seg_losses_desc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/loss/y_pred',
        make_grid(torch.from_numpy(epoch_mask_pred[seg_losses_desc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_text(f'{tag}/hard_negatives/loss/ids',
                            '```' + seg_negatives.to_csv(index=False) + '```',
                            epoch)

    # Plot negative examples (LB)
    iou_losses_desc = np.argsort(precision)[:64]
    iou_negatives = pd.DataFrame.from_dict({
        'id':
        epoch_ids[iou_losses_desc],
        'iou_score':
        epoch_seg_losses[iou_losses_desc]
    })
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/image',
        make_grid(torch.from_numpy(epoch_image[iou_losses_desc]),
                  nrow=4,
                  normalize=True), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/y_true',
        make_grid(torch.from_numpy(epoch_mask_true[iou_losses_desc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_image(
        f'{tag}/hard_negatives/lb/y_pred',
        make_grid(torch.from_numpy(epoch_mask_pred[iou_losses_desc]),
                  normalize=False,
                  nrow=4), epoch)
    summary_writer.add_text(f'{tag}/hard_negatives/lb/ids',
                            '```' + iou_negatives.to_csv(index=False) + '```',
                            epoch)

    if is_train:
        # Plot histogram of parameters after each epoch
        for name, param in model.named_parameters():
            if param.grad is not None:
                param_data = param.data.cpu().numpy()
                summary_writer.add_histogram('model/' + name, param_data,
                                             epoch)

    metric_scores = {
        f'{tag}_seg_loss': epoch_seg_losses.mean(),
        f'{tag}_lb': lb_50
    }

    for key, metric in metrics.items():
        metric_scores[f'{tag}_{key}'] = metric.value()

    return metric_scores
def test_inspect_train_predictions():
    train_ids = D.all_train_ids()
    train_images = D.read_train_images(train_ids)
    train_masks = D.read_train_masks(train_ids)
    print(train_ids.shape, train_images.shape, train_masks.shape)

    CONFIG = auto_file('wonderful_goldberg.json')
    WEIGHT_TRAIN = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_train_predictions.npz'
    )
    WEIGHT_TEST = auto_file(
        'Oct10_20_28_dpn_128_medium_wonderful_goldberg_val_lb.pth_test_predictions.npz'
    )

    convert_predictions_to_images(WEIGHT_TEST,
                                  os.path.join('test', 'test_predictions'))
    convert_predictions_to_images(WEIGHT_TRAIN,
                                  os.path.join('test', 'train_predictions'))

    train_predictions = auto_file(WEIGHT_TRAIN)
    train_predictions = np.load(train_predictions)

    # image = train_predictions['0aab0afa9c']

    train_predictions = np.array([train_predictions[id] for id in train_ids])
    print(train_predictions.shape)

    threshold, lb_score = threshold_mining(train_predictions,
                                           train_masks,
                                           min_threshold=0.15,
                                           max_threshold=0.85,
                                           step=0.005)

    plt.figure()
    plt.plot(threshold, lb_score)
    plt.tight_layout()

    i = np.argmax(lb_score)
    best_threshold, best_lb_score = float(threshold[i]), float(lb_score[i])
    print(best_threshold, best_lb_score)

    config_file = auto_file(CONFIG)

    config = json.load(open(config_file))
    valid_ids = np.array(config['valid_set'])
    valid_mask = D.get_selection_mask(train_ids, valid_ids)
    val_threshold, val_lb_score = threshold_mining(
        train_predictions[valid_mask],
        train_masks[valid_mask],
        min_threshold=0.15,
        max_threshold=0.85,
        step=0.005)

    plt.figure()
    plt.plot(val_threshold, val_lb_score)
    plt.tight_layout()
    plt.show()

    val_i = np.argmax(val_lb_score)
    val_th = val_threshold[val_i]
    print(val_threshold[val_i], val_lb_score[val_i])

    precision, result, threshold = do_kaggle_metric(train_predictions,
                                                    train_masks, val_th)

    x = []
    y = []
    for prec, true_mask in zip(precision, train_masks):
        x.append(prec)
        y.append(cv2.countNonZero(true_mask))

    plt.figure()
    plt.scatter(x, y)
    plt.tight_layout()
    plt.show()