Ejemplo n.º 1
0
    def compute_metrics(self, y_pred, y_true):
        """Compute metrics for predicted vs true labels."""
        if not isinstance(y_pred, np.ndarray):
            y_pred = array(y_pred)
        if not isinstance(y_true, np.ndarray):
            y_true = array(y_true)

        if y_pred.ndim == 2:
            y_pred = y_pred[:, -1]
            y_true = y_true[:, -1]

        metrics = {}
        metrics['auroc'] = [roc_auc_score(y_true, y_pred)]

        return metrics
Ejemplo n.º 2
0
    def _valid_epoch(self, data_loader, storer=defaultdict(list)):
        """Trains the model on the validation set for one epoch."""
        epoch_loss = 0.
        y_preds = []

        with trange(len(data_loader)) as t:
            for data, y_true in data_loader:
                data = data.to(self.device)
                y_true = y_true.to(self.device)

                y_pred = self.model(data)
                y_preds += [array(y_pred)]
                iter_loss = self.loss_f(y_pred, y_true, self.model.training,
                                        storer)
                epoch_loss += iter_loss.item()

                if self.p_bar:
                    t.set_postfix(loss=iter_loss.item())
                    t.update()

        y_preds = np.concatenate(y_preds)
        y_trues = data_loader.dataset.Y

        metrics = self.compute_metrics(y_preds, y_trues)
        storer.update(metrics)

        return epoch_loss / len(data_loader)
Ejemplo n.º 3
0
def predict(test_loader, model):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()
    preds = []
    with torch.no_grad():
        for X in test_loader:
            X = X[0].to(device)
            preds += [model(X)]

    return array(torch.cat(preds))
Ejemplo n.º 4
0
def pmort_vs_time(model, idx, show_plot=True):
    # Load data
    data = np.load(os.path.join('data', 'arrs_48_20.npy')).item()
    models = [f for f in os.listdir('results') if f.startswith(model)]

    # Print info
    print(idx, data['paths_train'][idx], data['Y_train'][idx])

    preds = np.zeros((len(models), 48))
    for i, model in enumerate(models):
        # Load model
        model_dir = os.path.join('results', model)
        model = load_model(model_dir)

        # Predict
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        x = data['X_train'][idx:idx+1]
        x = torch.tensor(x).to(device)
        preds[i] = array(model(x))

    # Bootstrap values
    mu, lb, ub = bootstrap2d(preds, low=0.025, high=0.975, n_samples=10000)

    # Plot
    plt.fill_between(range(1, len(mu)+1), lb, ub, color='r', alpha=0.5, label='95% CI')
    plt.plot(range(1, len(mu)+1), mu, ':ko', label='Mean')

    plt.xlim(0, 48)
    plt.xticks([0, 12, 24, 36, 48], fontsize=12)
    plt.xlabel('In-Hospital Hours After Admission', fontsize=15)

    plt.ylim(0, 1)
    plt.yticks(np.linspace(0, 1, 11), fontsize=12)
    plt.ylabel('p$_{mortality}$', fontsize=15)
    plt.legend()

    if show_plot:
        plt.show()
    else:
        plt.savefig(os.path.join('figs', f'pmortvstime_48_20bins_idx{idx}.pdf'))
        return array(x), model
Ejemplo n.º 5
0
def test(model, device, test_loader):
    model.eval()
    test_loss = 0

    outputs = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.binary_cross_entropy(output, target).item()
            outputs += [array(output)]

    mean_test_loss = test_loss / len(test_loader)

    outputs = np.concatenate(outputs)[:, -1]
    targets = test_loader.dataset.Y[:, -1]
    test_auroc = roc_auc_score(targets, outputs)
    test_auprc = average_precision_score(targets, outputs)

    return mean_test_loss, test_auroc, test_auprc
Ejemplo n.º 6
0
def case_study(model, idx):
    case_study_dir = os.path.join('results', f'case_study_{idx}')

    # Plot
    x, model = pmort_vs_time(model, idx, show_plot=False)

    # Reverse dict
    token2index = np.load(os.path.join('data', 'token2index_48_20.npy')).item()
    index2token = {v: k for k, v in token2index.items()}
    index2token[0] = ''

    # Hourly tokens and weights
    weight = np.exp(array(model.embedder.embedW.weights)[:, 0])
    token_inds = []
    tokens = []
    W = []
    for t in np.arange(0, 48):
        x_t = x[0, (t < x[0, :, 0]) & (x[0, :, 0] < (t + 1)), 1]

        w = weight[x_t.astype(np.int64)]
        inds = np.argsort(-w)

        token_inds += [inds]
        tokens += [np.array([index2token[i] for i in x_t])[inds]]
        W += [w[inds]]

    if not os.path.exists(case_study_dir):
        os.makedirs(case_study_dir)

    # Table
    pd.DataFrame(token_inds).transpose().to_csv(os.path.join(
        case_study_dir, 'input_inds.csv'),
                                                index=None)
    pd.DataFrame(tokens).transpose().to_csv(os.path.join(
        case_study_dir, 'inputs.csv'),
                                            index=None)
    pd.DataFrame(W).transpose().to_csv(os.path.join(case_study_dir,
                                                    'weights.csv'),
                                       index=None)
Ejemplo n.º 7
0
    def _valid_epoch(self, data_loader_pos, data_loader_neg, 
                     data_loader, storer=defaultdict(list)):
        """Trains the model on the validation set for one epoch."""
        epoch_loss = 0.
        
        coef_pos = 0.5
        coef_neg = 0.5
        
        y_preds_bal = []
        y_trues_bal = []
        y_preds_rand = []
        y_trues_rand = []
        
        loader_pos = iter(data_loader_pos)
        loader_neg = iter(data_loader_neg)

        with trange(len(data_loader)) as t:
            for data, y_true in data_loader:
                
                try:
                    X_pos, y_pos = next(loader_pos) 
                except StopIteration:
                    loader_pos = iter(data_loader_pos)
                    X_pos, y_pos = next(loader_pos) 
                try:
                    X_neg, y_neg = next(loader_neg) 
                except StopIteration:
                    loader_neg = iter(data_loader_neg)
                    X_neg, y_neg = next(loader_neg)
                
                
                data = data.to(self.device)
                y_true = y_true.to(self.device)
                X_pos = X_pos.to(self.device)
                y_pos = y_pos.to(self.device)
                X_neg = X_neg.to(self.device)
                y_neg = y_neg.to(self.device)

                if_main = False
                y_pred = self.model(data, if_main)
                
                iter_loss_rand = self.loss_f(
                    y_pred, y_true, self.model.training, if_main, storer)
                
                
                if_main = True
                y_pred_pos = self.model(X_pos, if_main)
                
                iter_loss_pos = self.loss_f(y_pred_pos, y_pos, 
                                             self.model.training, if_main, storer)
                
                y_pred_neg = self.model(X_neg, if_main)
                
                iter_loss_neg = self.loss_f(y_pred_neg, y_neg, 
                                             self.model.training, if_main, storer)
                
                iter_loss_balance = iter_loss_pos * coef_pos + iter_loss_neg * coef_neg
                
                iter_loss = (iter_loss_rand + iter_loss_balance) * 0.5
                
                epoch_loss += iter_loss.item()

                #y_preds += [array(y_pred)]
                # only validate on balanced batch
                y_trues_bal += [array(y_pos)]
                y_trues_bal += [array(y_neg)]
                
                y_preds_bal += [array(y_pred_pos)]
                y_preds_bal += [array(y_pred_neg)]
                
                y_trues_rand += [array(y_true)]
                y_preds_rand += [array(y_pred)]
                
                if self.p_bar:
                    t.set_postfix(loss=iter_loss.item())
                    t.update()

        y_preds_bal = np.concatenate(y_preds_bal)
        y_trues_bal = np.concatenate(y_trues_bal)
        
        y_trues_rand = np.concatenate(y_trues_rand)
        y_preds_rand = np.concatenate(y_preds_rand)
        
        #y_trues = data_loader.dataset.Y

        metrics = self.compute_metrics(y_preds_bal, y_trues_bal, if_bal=True)
        storer.update(metrics)
        
        metrics = self.compute_metrics(y_preds_rand, y_trues_rand, if_bal=False)
        storer.update(metrics)

        return epoch_loss / len(data_loader)