Ejemplo n.º 1
0
def auroc(model, show_plot=False):
    # Load data
    data = np.load(os.path.join('data', 'arrs_48_20.npy')).item()
    models = [f for f in os.listdir('results') if f.startswith(model)]

    # Test set
    test_dataset = TensorDataset(torch.tensor(data['X_test']))
    test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True)

    base_fpr = np.linspace(0, 1, 101)
    tprs = np.zeros((len(models), 101))
    aucs = np.zeros((len(models)))
    for i, model in enumerate(models):
        # Load model
        model_dir = os.path.join('results', model)
        model = load_model(model_dir)
        metadata = load_metadata(model_dir)

        # Predict
        preds = predict(test_loader, model)
        fpr, tpr, _ = metrics.roc_curve(data['Y_test'], preds[:, -1])
        aucs[i] = metrics.auc(fpr, tpr)

        # Interpolate for bootstrap
        tpr = interp(base_fpr, fpr, tpr)
        tpr[0] = 0.0
        tprs[i] = tpr

    # Plot
    mean_tprs = tprs.mean(axis=0)
    std_tprs = tprs.std(axis=0)
    tprs_upper = np.minimum(mean_tprs + 2 * std_tprs, 1)
    tprs_lower = mean_tprs - 2 * std_tprs

    plt.plot(base_fpr, mean_tprs, 'k', label=f'Ours: {np.mean(aucs):.4f}')
    plt.fill_between(base_fpr,
                     tprs_lower,
                     tprs_upper,
                     color='red',
                     alpha=0.5,
                     label='95% CI')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.xlabel('False Positive Rate', fontsize=15)
    plt.ylabel('True Positive Rate', fontsize=15)
    plt.legend(loc='lower right')

    if show_plot:
        plt.show()
    else:
        np.save(os.path.join('figs', 'auroc_info'),
                np.stack((base_fpr, tprs_lower, mean_tprs, tprs_upper)))
        plt.savefig(os.path.join('figs', f'auroc_48_20bins.pdf'))
Ejemplo n.º 2
0
def auc_vs_time(model, show_plot=False):
    # Load data
    data = np.load(os.path.join('data', 'arrs_48_20.npy')).item()
    models = [f for f in os.listdir('results') if f.startswith(model)]

    # Test set
    test_dataset = TensorDataset(torch.tensor(data['X_test']))
    test_loader = DataLoader(test_dataset, batch_size=128, pin_memory=True)

    aucs = np.zeros((len(models), 48))
    for i, model in enumerate(models):        
        # Load model
        model_dir = os.path.join('results', model)
        model = load_model(model_dir)
        metadata = load_metadata(model_dir)

        # Predict
        preds = predict(test_loader, model)
        for j in np.arange(48):
            fpr, tpr, _ = metrics.roc_curve(data['Y_test'], preds[:, j])
            aucs[i, j] = metrics.auc(fpr, tpr)

    # Bootstrap values
    mu, lb, ub = bootstrap2d(aucs, low=0.025, high=0.975, n_samples=10000)
    print(lb)
    print(mu)
    print(ub)

    # Plot
    # OASIS & SAPS II
    plt.plot([0, 48], [0.6631, 0.6631], '--b', markersize=5, lw=1)
    p1 = plt.scatter(24, 0.6631, c='b', marker='^', label='OASIS')
    plt.plot([0, 48], [0.7048, 0.7048], '--g', markersize=5, lw=1)
    p2 = plt.scatter(24, 0.7048, c='g', marker='s', label='SAPS II')

    p3 = plt.fill_between(range(1, len(mu)+1), lb, ub, color='r', alpha=0.5, label='95% CI')
    p4 = plt.plot(range(1, len(mu)+1), mu, 'k', lw=1, label='Ours')

    plt.xlim(0, 48)
    plt.xticks([0, 12, 24, 36, 48], fontsize=12)
    plt.xlabel('In-Hospital Hours After Admission', fontsize=15)

    plt.ylim(0.6, 0.9)
    plt.yticks(np.linspace(0.6, 0.9, 7), fontsize=12)
    plt.ylabel('AUROC', fontsize=15)

    leg = plt.legend()
    leg.get_lines()[0].set_linewidth(2)

    if show_plot:
        plt.show()
    else:
        plt.savefig(os.path.join('figs', f'aucvstime_48_20bins.pdf'))
Ejemplo n.º 3
0
def pmort_vs_time(model, idx, show_plot=True):
    # Load data
    data = np.load(os.path.join('data', 'arrs_48_20.npy')).item()
    models = [f for f in os.listdir('results') if f.startswith(model)]

    # Print info
    print(idx, data['paths_train'][idx], data['Y_train'][idx])

    preds = np.zeros((len(models), 48))
    for i, model in enumerate(models):
        # Load model
        model_dir = os.path.join('results', model)
        model = load_model(model_dir)

        # Predict
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        x = data['X_train'][idx:idx+1]
        x = torch.tensor(x).to(device)
        preds[i] = array(model(x))

    # Bootstrap values
    mu, lb, ub = bootstrap2d(preds, low=0.025, high=0.975, n_samples=10000)

    # Plot
    plt.fill_between(range(1, len(mu)+1), lb, ub, color='r', alpha=0.5, label='95% CI')
    plt.plot(range(1, len(mu)+1), mu, ':ko', label='Mean')

    plt.xlim(0, 48)
    plt.xticks([0, 12, 24, 36, 48], fontsize=12)
    plt.xlabel('In-Hospital Hours After Admission', fontsize=15)

    plt.ylim(0, 1)
    plt.yticks(np.linspace(0, 1, 11), fontsize=12)
    plt.ylabel('p$_{mortality}$', fontsize=15)
    plt.legend()

    if show_plot:
        plt.show()
    else:
        plt.savefig(os.path.join('figs', f'pmortvstime_48_20bins_idx{idx}.pdf'))
        return array(x), model
Ejemplo n.º 4
0
def main(args):
    """Main train and evaluation function.

    Parameters
    ----------
    args: argparse.Namespace
        Arguments
    """

    # Logging info
    formatter = logging.Formatter('%(asctime)s %(levelname)s - '
                                  '%(funcName)s: %(message)s',
                                  '%H:%M:%S')
    logger = logging.getLogger(__name__)
    logger.setLevel('INFO')
    stream = logging.StreamHandler()
    stream.setLevel('INFO')
    stream.setFormatter(formatter)
    logger.addHandler(stream)

    set_seed(args.seed)
    device = torch.device(
        'cuda' if torch.cuda.is_available() and args.cuda else 'cpu')
    model_name = f'{args.name}_lr{args.lr}_z{args.latent_dim}' \
                 + f'_h{args.hidden_dim}_p{args.p_dropout}'
    model_dir = os.path.join(args.results, model_name)
    logger.info(f'Directory for saving and loading models: {model_dir}')

    if not args.eval:
        # Model directory
        new_model_dir(model_dir, logger=logger)

        # Dataloaders
        train_loader, valid_loader = get_dataloaders(
            args.data, args.t_hours, args.n_bins,
            validation=True, dynamic=args.dynamic,
            batch_size=args.bs, logger=logger)
        logger.info(
            f'Train {args.model_type}-{args.t_hours} ' +
            f'with {len(train_loader.dataset)} samples')

        # Load model
        n_tokens = len(np.load(
            os.path.join(
                args.data, '_dicts', f'{args.t_hours}_{args.n_bins}.npy'),
            allow_pickle=True).item())
        model = init_model(
            args.model_type, n_tokens, args.latent_dim, args.hidden_dim,
            p_dropout=args.p_dropout, dt=args.dt,
            weighted=args.weighted, dynamic=args.dynamic)
        logger.info(f'#params in model: {get_n_param(model)}')

        # Optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        loss_f = BCE()
        model = model.to(device)

        # Training
        trainer = Trainer(
            model, loss_f, optimizer,
            device=device, logger=logger, save_dir=model_dir, p_bar=args.p_bar)
        trainer.train(
            train_loader, valid_loader,
            epochs=args.epochs, early_stopping=args.early_stopping)

        # Save model
        metadata = vars(args)
        metadata['n_tokens'] = n_tokens
        save_model(trainer.model, model_dir, metadata=metadata)

    if args.test:
        # Load model
        model = load_model(model_dir, is_gpu=args.cuda)
        metadata = load_metadata(model_dir)

        # Dataloader
        test_loader, _ = get_dataloaders(
            metadata['data'], metadata['t_hours'], metadata['n_bins'],
            validation=False, dynamic=metadata['dynamic'], batch_size=128,
            shuffle=False, logger=logger)

        # Evaluate
        loss_f = BCE()
        evaluator = Trainer(
            model, loss_f,
            device=device, logger=logger, save_dir=model_dir, p_bar=args.p_bar)
        evaluator._valid_epoch(test_loader)