Exemple #1
0
        cbs.append(OverSamplingCallback(learn))

    if lr_sched == "one_cycle":
        learn.fit_one_cycle(epochs,
                            max_lr=max_lr,
                            div_factor=100,
                            pct_start=0.0,
                            callbacks=callbacks)
    elif lr_sched == "flat_and_anneal":
        learn.fit_fc(tot_epochs=epochs, lr=max_lr, callbacks=cbs)
    else:
        raise Exception("lr_sched", f'unknown schedule: {lr_sched}')


for fold in range(n_folds):
    data = sampler.get_data(fold)
    model = model_func()

    if is_pretrained:
        state_dict = load_weights(pretrained_dir, fold)
        model.load_state_dict(state_dict, strict=False)

    learn = Learner(data,
                    model,
                    loss_func=loss_func,
                    opt_func=opt_func,
                    metrics=default_metrics,
                    callback_fns=default_callback_fns()).to_fp16()

    learn.clip_grad = 1.0
Exemple #2
0
def evaluate_model_dir(model_dir,
                       sampler=None,
                       TRAIN=None,
                       LABELS=None,
                       **kwargs):
    """
    Evaluates CV models in out-of-fold fashion and saves some stats to the model dir

    Provide either sampler or TRAIN and LABELS.
    model_dir: directory containing models
    sampler (FoldSampler): optional data sampler instance
    TRAIN: optional training images folder
    LABELS: optional train.csv path
    """
    # load config
    config = ModelConfig.fromDir(model_dir)
    # load models
    models = load_models_from_dir(model_dir)
    model_name = config.getField('model_name')
    regr = "regr" in model_name

    n_folds = len(models)
    sz = config.getField('sz')
    mean = torch.tensor(np.array(config.getField('mean')).astype(np.float32))
    std = torch.tensor(np.array(config.getField('std')).astype(np.float32))
    N = config.getField('N')
    is_ordinal = config.getMetaField('is_ordinal')

    if sampler is None:
        assert (TRAIN is not None and LABELS is not None
                ), "Either sampler or TRAIN + LABELS must be provided"

        sampler = FoldSampler(TRAIN,
                              LABELS,
                              mean,
                              std,
                              N,
                              tfms=[],
                              sz=sz,
                              bs=1,
                              n_folds=n_folds,
                              is_ordinal=is_ordinal,
                              model_name=model_name)

    # evaluate out of fold
    val_qwks = []
    karolinska_preds = []
    karolinska_targets = []
    radboud_preds = []
    radboud_targets = []
    all_preds = []
    all_targets = []
    score_dict = {}
    for fold, model in zip(range(n_folds), models):
        data = sampler.get_data(fold)
        default_metrics, monitor_metric = get_default_metrics(
            model_name, data=data, is_ordinal=is_ordinal)
        learn = Learner(data,
                        model,
                        metrics=default_metrics,
                        opt_func=Over9000).to_fp16()
        learn.create_opt(1e-3, 0.9)

        # calculate data provider specific scores
        preds, targets, losses = learn.get_preds(with_loss=True)
        targets = targets.numpy()
        if is_ordinal:
            targets = ordinalRegs2cat(targets)
            losses = torch.sum(losses.view(preds.shape[0], preds.shape[1]),
                               axis=1)

        if not regr:
            if is_ordinal:
                preds = ordinalRegs2cat((preds > 0.5).numpy())
            else:
                preds = np.argmax(preds.numpy(), axis=1)
        else:
            # convert to categories
            preds = regrPreds2cat(preds)

        all_preds += list(preds)
        all_targets += list(targets)

        # fold qwk
        val_qwk = cohen_kappa_score(preds, targets, weights="quadratic")
        val_qwks.append(val_qwk)
        score_dict[f'{fold}_qwk'] = str(val_qwk)

        # get 'karolinska' 'radboud' labels
        data_providers = [
            sampler.df[sampler.df.image_id == os.path.basename(
                _id)].data_provider.values[0] for _id in data.valid_ds.items
        ]
        for pred, target, provider in zip(preds, targets, data_providers):
            if provider == "karolinska":
                karolinska_preds.append(pred)
                karolinska_targets.append(target)
            else:
                radboud_preds.append(pred)
                radboud_targets.append(target)

        # plot top and min losses
        plot_samples(data, losses, preds,
                     sampler.df[sampler.df.split == fold].image_id.values)
        plt.savefig(os.path.join(model_dir,
                                 "losses_fold-{0}.png".format(fold)),
                    transparent=False)

        # confusion matrices
        if not regr:
            _ = plot_confusion_matrix_scipy(
                preds,
                targets,
                normalize=False,
                title='fold:{0} - qwk:{1:.3f}'.format(fold, val_qwk))
            plt.savefig(os.path.join(model_dir,
                                     "cm_fold-{0}.png".format(fold)),
                        transparent=False)

            cm = plot_confusion_matrix_scipy(
                preds,
                targets,
                normalize=True,
                title='Norm. fold:{0} - qwk:{1:.3f}'.format(fold, val_qwk))
            plt.savefig(os.path.join(model_dir,
                                     "cm_fold-{0}-norm.png".format(fold)),
                        transparent=False)
        else:
            _ = plot_confusion_matrix_scipy(
                preds,
                targets,
                normalize=False,
                title='fold:{0} - qwk:{1:.3f}'.format(fold, val_qwk))
            plt.savefig(os.path.join(model_dir,
                                     "cm_fold-{0}.png".format(fold)),
                        transparent=False)

            cm = plot_confusion_matrix_scipy(
                preds,
                targets,
                normalize=True,
                title='Norm. fold:{0} - qwk:{1:.3f}'.format(fold, val_qwk))
            plt.savefig(os.path.join(model_dir,
                                     "cm_fold-{0}-norm.png".format(fold)),
                        transparent=False)

        # save confusion matrix values
        np.save(os.path.join(model_dir, "cm_fold-{0}.npy".format(fold)), cm)

    cv_qwk = cohen_kappa_score(np.array(all_preds),
                               np.array(all_targets),
                               weights="quadratic")
    score_dict['cv_qwk'] = str(cv_qwk)
    score_dict['karolinska_qwk'] = str(
        cohen_kappa_score(karolinska_preds,
                          karolinska_targets,
                          weights="quadratic"))
    score_dict['radboud_qwk'] = str(
        cohen_kappa_score(radboud_preds, radboud_targets, weights="quadratic"))

    # save out-of-fold predictions
    np.save(os.path.join(model_dir, 'oof_preds.npy'), np.array(all_preds))
    np.save(os.path.join(model_dir, 'oof_trues.npy'), np.array(all_targets))

    with open(os.path.join(model_dir, 'eval.json'), 'w') as outfile:
        json.dump(score_dict, outfile, indent=4)

    # record for the notebook
    print(score_dict)
    plt.close('all')