def allied_train(model, optimizer, loss_fn, data_iterator, metrics, params, num_steps, train_target=False):
    # set model to training mode
    model.train()

    # summary for current training loop and a running average object for loss
    summ = []
    loss_avg = utils.RunningAverage()

    # Use tqdm for progress bar
    t = trange(num_steps)
    running_auc = utils.OutputAUC()
    running_auc_icds = utils.MetricsICD()
    for i in t:
        # fetch the next training batch
        train_batch_w2v, train_batch_sp, labels_batch, icd_labels, ids = next(data_iterator)

        output_batch, icd_batch = model(train_batch_w2v)
        if train_target:
            loss = loss_fn(output_batch, labels_batch) + loss_fn(icd_batch, icd_labels)
        else:
            loss = loss_fn(icd_batch, icd_labels)
        loss = loss / params.grad_acc  # Normalize our loss (if averaged)

        running_auc.update(labels_batch.data.cpu().numpy(), output_batch.data.cpu().numpy())
        running_auc_icds.update(icd_labels.data.cpu().numpy(), icd_batch.data.cpu().numpy())

        # compute gradients of all variables wrt loss
        loss.backward()

        if i % params.grad_acc == 0:
            # performs updates using calculated gradients
            optimizer.step()
            # clear previous gradients
            optimizer.zero_grad()

        # Evaluate summaries only once in a while
        if i % params.save_summary_steps == 0:
            # extract data from torch Variable, move to cpu, convert to numpy arrays
            output_batch = output_batch.data.cpu().numpy()
            labels_batch = labels_batch.data.cpu().numpy()

            # compute all metrics on this batch
            summary_batch = {metric: metrics[metric](output_batch, labels_batch)
                             for metric in metrics}

            summary_batch['loss'] = float(loss.data.item())
            summ.append(summary_batch)

        # update the average loss
        loss_avg.update(float(loss.data.item()))
        t.set_postfix(loss='{:05.3f}'.format(loss_avg()))

    # compute mean of all metrics in summary
    metrics_mean = {metric: np.mean([x[metric] for x in summ]) for metric in summ[0]}
    metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items())
    logging.info("- Train metrics: " + metrics_string)
    logging.info('Train AUC: ' + str(running_auc()))
    logging.info('Train ICD AUC: ' + str(running_auc_icds()))
    return loss_avg()
Esempio n. 2
0
def train(model, optimizer, loss_fn, data_iterator, params, num_steps):
    """Train the model on `num_steps` batches
    Args:
        model: (torch.nn.Module) the neural network
        optimizer: (torch.optim) optimizer for parameters of model
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        data_iterator: (generator) a generator that generates batches of data and labels
        params: (Params) hyperparameters
        num_steps: (int) number of batches to train on, each of size params.batch_size
    """

    # set model to training mode
    model.train()

    # summary for current training loop and a running average object for loss
    summ = []
    loss_avg = utils.RunningAverage()

    # Use tqdm for progress bar
    t = trange(num_steps)
    running_auc = utils.OutputAUC()
    for i in t:
        # fetch the next training batch
        train_batch_w2v, train_batch_sp, labels_batch, ids = next(
            data_iterator)
        if 'w2v' in params.emb:
            output_batch = model(train_batch_w2v)
        elif 'sp' in params.emb:
            output_batch = model(train_batch_sp)
        else:
            output_batch = model(train_batch_w2v, train_batch_sp)
        loss = loss_fn(output_batch, labels_batch)
        loss = loss / params.grad_acc  # Normalize our loss (if averaged)

        running_auc.update(labels_batch.data.cpu().numpy(),
                           output_batch.data.cpu().numpy())
        loss.backward()

        if i % params.grad_acc == 0:
            # performs updates using calculated gradients
            optimizer.step()
            # clear previous gradients
            optimizer.zero_grad()

        # update the average loss
        loss_avg.update(float(loss.data.item()))
        t.set_postfix(loss='{:05.3f}'.format(loss_avg()))

    logging.info('Train AUC: ' + str(running_auc()))
    return loss_avg()
Esempio n. 3
0
def evaluate(model, loss_fn, data_iterator, params, num_steps):
    """Evaluate the model on `num_steps` batches.
    Args:
        model: (torch.nn.Module) the neural network
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        data_iterator: (generator) a generator that generates batches of data and labels
        params: (Params) hyperparameters
        num_steps: (int) number of batches to train on, each of size params.batch_size
    """

    # set model to evaluation mode

    model.eval()
    # compute metrics over the dataset
    running_auc = utils.OutputAUC()
    running_metrics = utils.TestMetrics()
    # Use tqdm for progress bar
    with torch.no_grad():
        t = trange(num_steps)
        for _ in t:
            # fetch the next evaluation batch
            train_batch_w2v, train_batch_sp, labels_batch, ids = next(
                data_iterator)
            if 'w2v' in params.emb:
                output_batch = model(train_batch_w2v)
            elif 'sp' in params.emb:
                output_batch = model(train_batch_sp)
            else:
                output_batch = model(train_batch_w2v, train_batch_sp)
            loss_fn(output_batch, labels_batch)
            running_auc.update(labels_batch.data.cpu().numpy(),
                               output_batch.data.cpu().numpy())
            running_metrics.update(labels_batch.data.cpu().numpy(),
                                   output_batch.data.cpu().numpy())

    logging.info('AUCROC' + str(running_auc()))
    logging.info('METRICS' + str(running_metrics()))
    metrics = running_metrics()
    return {'AUCROC': metrics[0], "AUCPR": metrics[1]}
Esempio n. 4
0
def evaluate(model, loss_fn, data_iterator, metrics, params, num_steps):
    # set model to evaluation mode

    model.eval()

    # summary for current eval loop
    summ = []

    # compute metrics over the dataset
    running_auc = utils.OutputAUC()
    running_metrics = utils.TestMetrics()
    running_icd = utils.MetricsICD()
    # Use tqdm for progress bar
    with torch.no_grad():
        t = trange(num_steps)
        for i in t:
            # fetch the next evaluation batch
            if 'phen' in params.model:
                if params.task == 'icd':
                    train_batch_w2v, train_batch_sp, _, labels_batch = next(
                        data_iterator)
                else:
                    train_batch_w2v, train_batch_sp, labels_batch, _ = next(
                        data_iterator)
                output_batch = model(train_batch_w2v)
                loss = loss_fn(output_batch, labels_batch)
                loss = loss / params.grad_acc  # Normalize our loss (if averaged)
                # print(loss)
            elif params.model == 'lr':
                train_batch, labels_batch = next(data_iterator)
                output_batch = model(train_batch)
                loss = loss_fn(output_batch, labels_batch)

                loss = loss / params.grad_acc  # Normalize our loss (if averaged)
                # print(loss)
            else:
                train_batch_w2v, train_batch_sp, labels_batch, _, ids = next(
                    data_iterator)
                output_batch = model(train_batch_w2v)
                loss = loss_fn(output_batch, labels_batch)

            running_auc.update(labels_batch.data.cpu().numpy(),
                               output_batch.data.cpu().numpy())
            running_metrics.update(labels_batch.data.cpu().numpy(),
                                   output_batch.data.cpu().numpy())
            if params.task == 'icd_only':
                running_icd.update(labels_batch.data.cpu().numpy(),
                                   output_batch.data.cpu().numpy())
            # extract data from torch Variable, move to cpu, convert to numpy arrays
            output_batch = output_batch.data.cpu().numpy()
            labels_batch = labels_batch.data.cpu().numpy()

            # compute all metrics on this batch
            summary_batch = {
                metric: metrics[metric](output_batch, labels_batch)
                for metric in metrics
            }
            summary_batch['loss'] = loss.data.item()
            summ.append(summary_batch)

    metrics_mean = {
        metric: np.mean([x[metric] for x in summ])
        for metric in summ[0]
    }
    metrics_string = " ; ".join("{}: {:05.3f}".format(k, v)
                                for k, v in metrics_mean.items())
    logging.info("- Eval metrics : " + metrics_string)
    logging.info('AUCROC' + str(running_auc()))

    if params.task == 'icd_only':
        return {
            'AUCROC': running_icd(),
            'MACRO_AUCROC_ICD': running_icd.macro_auc()
        }
    else:
        logging.info('METRICS' + str(running_metrics()))
        metrics = running_metrics()
        return {'AUCROC': metrics[0], "AUCPR": metrics[1]}
Esempio n. 5
0
def allied_final_evaluate(model,
                          loss_fn,
                          data_iterator,
                          metrics,
                          params,
                          num_steps,
                          allied=False):
    # set model to evaluation mode

    model.eval()

    # summary for current eval loop
    summ = []

    # compute metrics over the dataset
    running_auc = utils.OutputAUC()
    running_metrics = utils.TestMetrics()
    running_icd = utils.MetricsICD()
    # Use tqdm for progress bar
    with torch.no_grad():
        t = trange(num_steps)
        for i in t:
            # fetch the next evaluation batch
            train_batch_w2v, train_batch_sp, labels_batch, icd_labels, ids = next(
                data_iterator)
            if 'w2v' in params.emb:
                output_batch, icd_batch = model(train_batch_w2v)
            elif 'sp' in params.emb:
                output_batch, icd_batch = model(train_batch_sp)
            else:
                output_batch, icd_batch = model(train_batch_w2v,
                                                train_batch_sp)
            loss = loss_fn(output_batch, labels_batch)
            # print(loss)
            running_icd.update(icd_labels.data.cpu().numpy(),
                               icd_batch.data.cpu().numpy())
            running_auc.update(labels_batch.data.cpu().numpy(),
                               output_batch.data.cpu().numpy())
            running_metrics.update(labels_batch.data.cpu().numpy(),
                                   output_batch.data.cpu().numpy())
            # extract data from torch Variable, move to cpu, convert to numpy arrays
            output_batch = output_batch.data.cpu().numpy()
            labels_batch = labels_batch.data.cpu().numpy()

            # compute all metrics on this batch
            summary_batch = {
                metric: metrics[metric](output_batch, labels_batch)
                for metric in metrics
            }
            summary_batch['loss'] = loss.data.item()
            summ.append(summary_batch)

    metrics = running_metrics()
    metrics_mean = {
        metric: np.mean([x[metric] for x in summ])
        for metric in summ[0]
    }
    metrics_string = " ; ".join("{}: {:05.3f}".format(k, v)
                                for k, v in metrics_mean.items())
    logging.info("- Eval metrics : " + metrics_string)
    logging.info('AUCROC' + str(running_auc()))
    logging.info('AUCROC' + str(metrics[0]))
    logging.info('AUCPR' + str(metrics[1]))
    logging.info('MICRO AUCROC_ICD' + str(running_icd()))
    macro_auc = running_icd.macro_auc()
    logging.info('MACRO AUCROC_ICD' + str(macro_auc))

    return {
        'AUCROC': metrics[0],
        "AUCPR": metrics[1],
        "MICRO_AUCROC_ICD": running_icd(),
        "MACRO_AUCROC_ICD": macro_auc
    }