Esempio n. 1
0
def predict(learn: Learner, name: str):
    # submission.csv
    preds, _ = learn.get_preds(ds_type=DatasetType.Test)
    test['has_cactus'] = preds.numpy()[:, 0]
    test.to_csv('submission_{}.csv'.format(name), index=False)
    print('Finish creating submission_{}.csv'.format(name))
    # loss.csv
    id_ = range(len(learn.recorder.losses))
    loss_df = pd.DataFrame({
        'id': id_,
        'loss': np.array(learn.recorder.losses)
    })
    loss_df.to_csv('loss_{}.csv'.format(name), index=False)
    print('Finish creating loss_{}.csv'.format(name))
    # Calculate some metrics on the training set
    preds, targets = learn.get_preds(ds_type=DatasetType.Train)
    preds_label = np.argmax(preds.numpy(), axis=1)
    id_ = range(len(preds))
    train_pred_df = pd.DataFrame({
        'id': id_,
        'preds': preds.numpy()[:, 0],
        'preds_label': preds_label,
        'targets': targets.numpy()
    })
    train_pred_df.to_csv('./train_pred_{}.csv'.format(name))
    print('Finish creating train_pred_{}.csv'.format(name))
    correct_count = np.equal(preds_label, targets.numpy()).sum()
    len_preds = len(preds)
    incorrect_count = len_preds - correct_count
    fpr, tpr = metrics.roc_curve(preds[:, 0], targets)
    fpr, tpr = fpr.numpy(), tpr.numpy()
    FP = np.floor(fpr * len_preds)
    FN = incorrect_count - FP
    TP = np.floor(tpr * len_preds)
    TN = correct_count - TP
    id_ = range(len(fpr))
    train_index_df = pd.DataFrame({
        'id': id_,
        'fpr': fpr,
        'tpr': tpr,
        'TP': TP,
        'TN': TN,
        'FP': FP,
        'FN': FN
    })
    train_index_df.to_csv('./train_index_{}.csv'.format(name))
    print('Finish creating train_index_{}.csv'.format(name))
    # Destroy learn and save the model
    learn.export('./model_{}.pth'.format(name), destroy=True)
Esempio n. 2
0
def save_preds(input_csv, output_csv):
    df = pd.read_csv(input_csv)
    try:
        df = df[['Study']]
    except:
        try:
            df = df[['Path']]
        except:
            raise ValueError('csv has no attribute for path/study.')

    for lbl in ALL_LBLS:
        df[lbl] = np.zeros(len(df))

    test = ImageDataBunch.from_df(
        path=folder_path,
        df=df,
        folder=chexpert_folder,
        seed=0,
        label_col=ALL_LBLS,
        suffix='',
        valid_pct=1,
        ds_tfms=data_tfms,
        bs=BS,
        size=IMG_SZ)  #.normalize([IMG_MEAN, IMG_STD])

    IDs, outputs = test.valid_ds.x.items, []

    learn = cnn_learner(test,
                        models.densenet121,
                        model_dir=model_path,
                        pretrained=False)
    learn.load(model_names[0])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn.load(model_names[1])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn = cnn_learner(test,
                        models.resnet152,
                        model_dir=model_path,
                        pretrained=False)
    learn.load(model_names[2])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn.load(model_names[3])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    model = resnext101_64x4d(pretrained=None)
    model.last_linear = nn.Sequential(nn.Linear(32768, 2048), nn.ReLU(True),
                                      nn.Dropout(), nn.Linear(2048, 14))
    learn = Learner(test, model, model_dir=model_path)
    learn.load(model_names[4])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn = cnn_learner(test,
                        models.vgg19_bn,
                        model_dir=model_path,
                        pretrained=False)
    learn.load(model_names[5])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn.load(model_names[6])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    learn = cnn_learner(test,
                        models.densenet121,
                        model_dir=model_path,
                        pretrained=False)
    learn.load(model_names[7])
    output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True)
    outputs.append(output)

    output = ensemble_method(outputs, mode='avg')
    if torch.cuda.is_available():
        output = output.cpu()
    output = output.numpy()

    df = pd.DataFrame({
        'Path': IDs,
        EVAL_LBLS[0]: output[:, 1],
        EVAL_LBLS[1]: output[:, 2],
        EVAL_LBLS[2]: output[:, 3],
        EVAL_LBLS[3]: output[:, 4],
        EVAL_LBLS[4]: output[:, 5]
    })

    df.to_csv(output_csv, index=False)
    print('submission saved.')