def compute_metrics(self, y_pred, y_true): """Compute metrics for predicted vs true labels.""" if not isinstance(y_pred, np.ndarray): y_pred = array(y_pred) if not isinstance(y_true, np.ndarray): y_true = array(y_true) if y_pred.ndim == 2: y_pred = y_pred[:, -1] y_true = y_true[:, -1] metrics = {} metrics['auroc'] = [roc_auc_score(y_true, y_pred)] return metrics
def _valid_epoch(self, data_loader, storer=defaultdict(list)): """Trains the model on the validation set for one epoch.""" epoch_loss = 0. y_preds = [] with trange(len(data_loader)) as t: for data, y_true in data_loader: data = data.to(self.device) y_true = y_true.to(self.device) y_pred = self.model(data) y_preds += [array(y_pred)] iter_loss = self.loss_f(y_pred, y_true, self.model.training, storer) epoch_loss += iter_loss.item() if self.p_bar: t.set_postfix(loss=iter_loss.item()) t.update() y_preds = np.concatenate(y_preds) y_trues = data_loader.dataset.Y metrics = self.compute_metrics(y_preds, y_trues) storer.update(metrics) return epoch_loss / len(data_loader)
def predict(test_loader, model): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.eval() preds = [] with torch.no_grad(): for X in test_loader: X = X[0].to(device) preds += [model(X)] return array(torch.cat(preds))
def pmort_vs_time(model, idx, show_plot=True): # Load data data = np.load(os.path.join('data', 'arrs_48_20.npy')).item() models = [f for f in os.listdir('results') if f.startswith(model)] # Print info print(idx, data['paths_train'][idx], data['Y_train'][idx]) preds = np.zeros((len(models), 48)) for i, model in enumerate(models): # Load model model_dir = os.path.join('results', model) model = load_model(model_dir) # Predict device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') x = data['X_train'][idx:idx+1] x = torch.tensor(x).to(device) preds[i] = array(model(x)) # Bootstrap values mu, lb, ub = bootstrap2d(preds, low=0.025, high=0.975, n_samples=10000) # Plot plt.fill_between(range(1, len(mu)+1), lb, ub, color='r', alpha=0.5, label='95% CI') plt.plot(range(1, len(mu)+1), mu, ':ko', label='Mean') plt.xlim(0, 48) plt.xticks([0, 12, 24, 36, 48], fontsize=12) plt.xlabel('In-Hospital Hours After Admission', fontsize=15) plt.ylim(0, 1) plt.yticks(np.linspace(0, 1, 11), fontsize=12) plt.ylabel('p$_{mortality}$', fontsize=15) plt.legend() if show_plot: plt.show() else: plt.savefig(os.path.join('figs', f'pmortvstime_48_20bins_idx{idx}.pdf')) return array(x), model
def test(model, device, test_loader): model.eval() test_loss = 0 outputs = [] with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.binary_cross_entropy(output, target).item() outputs += [array(output)] mean_test_loss = test_loss / len(test_loader) outputs = np.concatenate(outputs)[:, -1] targets = test_loader.dataset.Y[:, -1] test_auroc = roc_auc_score(targets, outputs) test_auprc = average_precision_score(targets, outputs) return mean_test_loss, test_auroc, test_auprc
def case_study(model, idx): case_study_dir = os.path.join('results', f'case_study_{idx}') # Plot x, model = pmort_vs_time(model, idx, show_plot=False) # Reverse dict token2index = np.load(os.path.join('data', 'token2index_48_20.npy')).item() index2token = {v: k for k, v in token2index.items()} index2token[0] = '' # Hourly tokens and weights weight = np.exp(array(model.embedder.embedW.weights)[:, 0]) token_inds = [] tokens = [] W = [] for t in np.arange(0, 48): x_t = x[0, (t < x[0, :, 0]) & (x[0, :, 0] < (t + 1)), 1] w = weight[x_t.astype(np.int64)] inds = np.argsort(-w) token_inds += [inds] tokens += [np.array([index2token[i] for i in x_t])[inds]] W += [w[inds]] if not os.path.exists(case_study_dir): os.makedirs(case_study_dir) # Table pd.DataFrame(token_inds).transpose().to_csv(os.path.join( case_study_dir, 'input_inds.csv'), index=None) pd.DataFrame(tokens).transpose().to_csv(os.path.join( case_study_dir, 'inputs.csv'), index=None) pd.DataFrame(W).transpose().to_csv(os.path.join(case_study_dir, 'weights.csv'), index=None)
def _valid_epoch(self, data_loader_pos, data_loader_neg, data_loader, storer=defaultdict(list)): """Trains the model on the validation set for one epoch.""" epoch_loss = 0. coef_pos = 0.5 coef_neg = 0.5 y_preds_bal = [] y_trues_bal = [] y_preds_rand = [] y_trues_rand = [] loader_pos = iter(data_loader_pos) loader_neg = iter(data_loader_neg) with trange(len(data_loader)) as t: for data, y_true in data_loader: try: X_pos, y_pos = next(loader_pos) except StopIteration: loader_pos = iter(data_loader_pos) X_pos, y_pos = next(loader_pos) try: X_neg, y_neg = next(loader_neg) except StopIteration: loader_neg = iter(data_loader_neg) X_neg, y_neg = next(loader_neg) data = data.to(self.device) y_true = y_true.to(self.device) X_pos = X_pos.to(self.device) y_pos = y_pos.to(self.device) X_neg = X_neg.to(self.device) y_neg = y_neg.to(self.device) if_main = False y_pred = self.model(data, if_main) iter_loss_rand = self.loss_f( y_pred, y_true, self.model.training, if_main, storer) if_main = True y_pred_pos = self.model(X_pos, if_main) iter_loss_pos = self.loss_f(y_pred_pos, y_pos, self.model.training, if_main, storer) y_pred_neg = self.model(X_neg, if_main) iter_loss_neg = self.loss_f(y_pred_neg, y_neg, self.model.training, if_main, storer) iter_loss_balance = iter_loss_pos * coef_pos + iter_loss_neg * coef_neg iter_loss = (iter_loss_rand + iter_loss_balance) * 0.5 epoch_loss += iter_loss.item() #y_preds += [array(y_pred)] # only validate on balanced batch y_trues_bal += [array(y_pos)] y_trues_bal += [array(y_neg)] y_preds_bal += [array(y_pred_pos)] y_preds_bal += [array(y_pred_neg)] y_trues_rand += [array(y_true)] y_preds_rand += [array(y_pred)] if self.p_bar: t.set_postfix(loss=iter_loss.item()) t.update() y_preds_bal = np.concatenate(y_preds_bal) y_trues_bal = np.concatenate(y_trues_bal) y_trues_rand = np.concatenate(y_trues_rand) y_preds_rand = np.concatenate(y_preds_rand) #y_trues = data_loader.dataset.Y metrics = self.compute_metrics(y_preds_bal, y_trues_bal, if_bal=True) storer.update(metrics) metrics = self.compute_metrics(y_preds_rand, y_trues_rand, if_bal=False) storer.update(metrics) return epoch_loss / len(data_loader)