def allied_train(model, optimizer, loss_fn, data_iterator, metrics, params, num_steps, train_target=False): # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() # Use tqdm for progress bar t = trange(num_steps) running_auc = utils.OutputAUC() running_auc_icds = utils.MetricsICD() for i in t: # fetch the next training batch train_batch_w2v, train_batch_sp, labels_batch, icd_labels, ids = next(data_iterator) output_batch, icd_batch = model(train_batch_w2v) if train_target: loss = loss_fn(output_batch, labels_batch) + loss_fn(icd_batch, icd_labels) else: loss = loss_fn(icd_batch, icd_labels) loss = loss / params.grad_acc # Normalize our loss (if averaged) running_auc.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) running_auc_icds.update(icd_labels.data.cpu().numpy(), icd_batch.data.cpu().numpy()) # compute gradients of all variables wrt loss loss.backward() if i % params.grad_acc == 0: # performs updates using calculated gradients optimizer.step() # clear previous gradients optimizer.zero_grad() # Evaluate summaries only once in a while if i % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() # compute all metrics on this batch summary_batch = {metric: metrics[metric](output_batch, labels_batch) for metric in metrics} summary_batch['loss'] = float(loss.data.item()) summ.append(summary_batch) # update the average loss loss_avg.update(float(loss.data.item())) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) # compute mean of all metrics in summary metrics_mean = {metric: np.mean([x[metric] for x in summ]) for metric in summ[0]} metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string) logging.info('Train AUC: ' + str(running_auc())) logging.info('Train ICD AUC: ' + str(running_auc_icds())) return loss_avg()
def train(model, optimizer, loss_fn, data_iterator, params, num_steps): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch data_iterator: (generator) a generator that generates batches of data and labels params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() # Use tqdm for progress bar t = trange(num_steps) running_auc = utils.OutputAUC() for i in t: # fetch the next training batch train_batch_w2v, train_batch_sp, labels_batch, ids = next( data_iterator) if 'w2v' in params.emb: output_batch = model(train_batch_w2v) elif 'sp' in params.emb: output_batch = model(train_batch_sp) else: output_batch = model(train_batch_w2v, train_batch_sp) loss = loss_fn(output_batch, labels_batch) loss = loss / params.grad_acc # Normalize our loss (if averaged) running_auc.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) loss.backward() if i % params.grad_acc == 0: # performs updates using calculated gradients optimizer.step() # clear previous gradients optimizer.zero_grad() # update the average loss loss_avg.update(float(loss.data.item())) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) logging.info('Train AUC: ' + str(running_auc())) return loss_avg()
def evaluate(model, loss_fn, data_iterator, params, num_steps): """Evaluate the model on `num_steps` batches. Args: model: (torch.nn.Module) the neural network loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch data_iterator: (generator) a generator that generates batches of data and labels params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ # set model to evaluation mode model.eval() # compute metrics over the dataset running_auc = utils.OutputAUC() running_metrics = utils.TestMetrics() # Use tqdm for progress bar with torch.no_grad(): t = trange(num_steps) for _ in t: # fetch the next evaluation batch train_batch_w2v, train_batch_sp, labels_batch, ids = next( data_iterator) if 'w2v' in params.emb: output_batch = model(train_batch_w2v) elif 'sp' in params.emb: output_batch = model(train_batch_sp) else: output_batch = model(train_batch_w2v, train_batch_sp) loss_fn(output_batch, labels_batch) running_auc.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) running_metrics.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) logging.info('AUCROC' + str(running_auc())) logging.info('METRICS' + str(running_metrics())) metrics = running_metrics() return {'AUCROC': metrics[0], "AUCPR": metrics[1]}
def evaluate(model, loss_fn, data_iterator, metrics, params, num_steps): # set model to evaluation mode model.eval() # summary for current eval loop summ = [] # compute metrics over the dataset running_auc = utils.OutputAUC() running_metrics = utils.TestMetrics() running_icd = utils.MetricsICD() # Use tqdm for progress bar with torch.no_grad(): t = trange(num_steps) for i in t: # fetch the next evaluation batch if 'phen' in params.model: if params.task == 'icd': train_batch_w2v, train_batch_sp, _, labels_batch = next( data_iterator) else: train_batch_w2v, train_batch_sp, labels_batch, _ = next( data_iterator) output_batch = model(train_batch_w2v) loss = loss_fn(output_batch, labels_batch) loss = loss / params.grad_acc # Normalize our loss (if averaged) # print(loss) elif params.model == 'lr': train_batch, labels_batch = next(data_iterator) output_batch = model(train_batch) loss = loss_fn(output_batch, labels_batch) loss = loss / params.grad_acc # Normalize our loss (if averaged) # print(loss) else: train_batch_w2v, train_batch_sp, labels_batch, _, ids = next( data_iterator) output_batch = model(train_batch_w2v) loss = loss_fn(output_batch, labels_batch) running_auc.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) running_metrics.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) if params.task == 'icd_only': running_icd.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() # compute all metrics on this batch summary_batch = { metric: metrics[metric](output_batch, labels_batch) for metric in metrics } summary_batch['loss'] = loss.data.item() summ.append(summary_batch) metrics_mean = { metric: np.mean([x[metric] for x in summ]) for metric in summ[0] } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Eval metrics : " + metrics_string) logging.info('AUCROC' + str(running_auc())) if params.task == 'icd_only': return { 'AUCROC': running_icd(), 'MACRO_AUCROC_ICD': running_icd.macro_auc() } else: logging.info('METRICS' + str(running_metrics())) metrics = running_metrics() return {'AUCROC': metrics[0], "AUCPR": metrics[1]}
def allied_final_evaluate(model, loss_fn, data_iterator, metrics, params, num_steps, allied=False): # set model to evaluation mode model.eval() # summary for current eval loop summ = [] # compute metrics over the dataset running_auc = utils.OutputAUC() running_metrics = utils.TestMetrics() running_icd = utils.MetricsICD() # Use tqdm for progress bar with torch.no_grad(): t = trange(num_steps) for i in t: # fetch the next evaluation batch train_batch_w2v, train_batch_sp, labels_batch, icd_labels, ids = next( data_iterator) if 'w2v' in params.emb: output_batch, icd_batch = model(train_batch_w2v) elif 'sp' in params.emb: output_batch, icd_batch = model(train_batch_sp) else: output_batch, icd_batch = model(train_batch_w2v, train_batch_sp) loss = loss_fn(output_batch, labels_batch) # print(loss) running_icd.update(icd_labels.data.cpu().numpy(), icd_batch.data.cpu().numpy()) running_auc.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) running_metrics.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy()) # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() # compute all metrics on this batch summary_batch = { metric: metrics[metric](output_batch, labels_batch) for metric in metrics } summary_batch['loss'] = loss.data.item() summ.append(summary_batch) metrics = running_metrics() metrics_mean = { metric: np.mean([x[metric] for x in summ]) for metric in summ[0] } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Eval metrics : " + metrics_string) logging.info('AUCROC' + str(running_auc())) logging.info('AUCROC' + str(metrics[0])) logging.info('AUCPR' + str(metrics[1])) logging.info('MICRO AUCROC_ICD' + str(running_icd())) macro_auc = running_icd.macro_auc() logging.info('MACRO AUCROC_ICD' + str(macro_auc)) return { 'AUCROC': metrics[0], "AUCPR": metrics[1], "MICRO_AUCROC_ICD": running_icd(), "MACRO_AUCROC_ICD": macro_auc }