Exemple #1
0
def _evaluate(data_loader,
              model,
              criterion,
              observer,
              observer_criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    :param data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set

    :param model : torch.nn.module
        The network model being used

    :param criterion: torch.nn.loss
        The loss function used to compute the loss of the model

    :param writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.

    :param epoch : int
        Number of the epoch (for logging purposes)

    :param logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.

    :param no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.

    :param log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    :return:
        None
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    observer_loss_meter = AverageMeter()
    top1 = AverageMeter()
    observer_acc_meter = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # Compute output
        output = model(input_var)

        # Get the features from second last layer
        input_features_var = torch.autograd.Variable(
            model.module.features.data)

        # Use observer on the features
        observer_acc, observer_loss = evaluate_one_mini_batch(
            observer, observer_criterion, input_features_var, target_var,
            observer_loss_meter, observer_acc_meter)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.data[0], input.size(0))

        # Compute and record the accuracy
        acc1 = accuracy(output.data, target, topk=(1, ))[0]
        top1.update(acc1[0], input.size(0))

        # Get the predictions
        _ = [
            preds.append(item) for item in
            [np.argmax(item) for item in output.data.cpu().numpy()]
        ]
        _ = [targets.append(item) for item in target.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/mb_accuracy',
                              acc1.cpu().numpy(),
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/obs_mb_loss',
                              observer_loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/obs_mb_accuracy',
                              observer_acc.cpu().numpy(),
                              epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/mb_accuracy_{}'.format(multi_run),
                acc1.cpu().numpy(),
                epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/obs_mb_loss_{}'.format(multi_run),
                observer_loss.data[0],
                epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/obs_mb_accuracy_{}'.format(multi_run),
                observer_acc.cpu().numpy(),
                epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                Acc1='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Make a confusion matrix
    try:
        cm = confusion_matrix(y_true=targets, y_pred=preds)
        confusion_matrix_heatmap = make_heatmap(cm,
                                                data_loader.dataset.classes)
    except ValueError:
        logging.warning('Confusion Matrix did not work as expected')

        confusion_matrix_heatmap = np.zeros((10, 10, 3))

    # Logging the epoch-wise accuracy and confusion matrix
    if multi_run is None:
        writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch)
        writer.add_scalar(logging_label + '/obs_accuracy',
                          observer_acc_meter.avg, epoch)
        save_image_and_log_to_tensorboard(
            writer,
            tag=logging_label + '/confusion_matrix',
            image_tensor=confusion_matrix_heatmap,
            global_step=epoch)
    else:
        writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run),
                          top1.avg, epoch)
        writer.add_scalar(logging_label + '/obs_accuracy_{}'.format(multi_run),
                          observer_acc_meter.avg, epoch)
        save_image_and_log_to_tensorboard(
            writer,
            tag=logging_label + '/confusion_matrix_{}'.format(multi_run),
            image_tensor=confusion_matrix_heatmap,
            global_step=epoch)

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        'Acc@1={top1.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               top1=top1))

    # Generate a classification report for each epoch
    _log_classification_report(data_loader, epoch, preds, targets, writer)

    return top1.avg
Exemple #2
0
def _evaluate(data_loader,
              model,
              criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []
    multi_run = False

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:
        # todo: how to you implement sliding window accross batches

        if len(input.size()) == 5:
            multi_run = True
            # input [64, 5, 3, 299, 299]
            bs, ncrops, c, h, w = input.size()
            # input.view leaves the 3rd 4th and 5th dimension as is, but multiplies the 1st and 2nd together
            # result [320, 3, 299, 299]
            # result = input.view(-1, c, h, w) # fuse batch size and ncrops
            # result_avg = input.view(bs, -1, c, h, w).mean(1)
            input = input.view(-1, c, h, w)

            # If you are using tensor.max(1) then you get a tupel with two tensors, choose the first one
            # which is a floattensor and what you need.
            # input = result_avg[0]
        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        # todo: check them out in debugger
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # Compute output
        output = model(input_var)

        if multi_run:
            output = output.view(bs, ncrops, -1).mean(1)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.data[0], input.size(0))

        # Compute and record the accuracy
        acc1 = accuracy(output.data, target, topk=(1, ))[0]
        top1.update(acc1[0], input.size(0))

        # Get the predictions
        _ = [
            preds.append(item) for item in
            [np.argmax(item) for item in output.data.cpu().numpy()]
        ]
        _ = [targets.append(item) for item in target.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/mb_accuracy',
                              acc1.cpu().numpy(),
                              epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/mb_accuracy_{}'.format(multi_run),
                acc1.cpu().numpy(),
                epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                Acc1='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Make a confusion matrix
    try:
        cm = confusion_matrix(y_true=targets, y_pred=preds)
        confusion_matrix_heatmap = make_heatmap(cm,
                                                data_loader.dataset.classes)
    except ValueError:
        logging.warning('Confusion Matrix did not work as expected')

        confusion_matrix_heatmap = np.zeros((10, 10, 3))

    # Logging the epoch-wise accuracy and confusion matrix
    if multi_run is None:
        writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch)
        # ERROR: save_image_and_log_tensorboard() got an unexpected keyword argument 'image_tensore'
        # changed 'image_tensor=confusion_matrix_heattmap' to 'image=confusion_mastrix_heatmap'
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label +
                                          '/confusion_matrix',
                                          image=confusion_matrix_heatmap,
                                          global_step=epoch)
    else:
        writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run),
                          top1.avg, epoch)
        save_image_and_log_to_tensorboard(
            writer,
            tag=logging_label + '/confusion_matrix_{}'.format(multi_run),
            image=confusion_matrix_heatmap,
            global_step=epoch)

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        'Acc@1={top1.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               top1=top1))

    # Generate a classification report for each epoch
    _log_classification_report(data_loader, epoch, preds, targets, writer)

    return top1.avg
Exemple #3
0
def _evaluate(data_loader,
              model,
              criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    #TODO All parts computing the accuracy are commented out. It is necessary to
    #TODO implement a 2D softmax and instead of regressing the output class have it
    #TODO work with class labels. Notice that, however, it would be
    #TODO of interest leaving open the possibility to work with soft labels
    #TODO (e.g. the ground truth for pixel X,Y is an array of probabilities instead
    #TODO of an integer.

    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, _) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)

        # Split the data into halves to separate the input from the GT
        satel_image, map_image = torch.chunk(input, chunks=2, dim=3)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(satel_image)
        target_var = torch.autograd.Variable(map_image)

        # Compute output
        output = model(input_var)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.data[0], input.size(0))

        # Compute and record the accuracy
        # acc1 = accuracy(output.data, target, topk=(1,))[0]
        # top1.update(acc1[0], input.size(0))

        # Get the predictions
        # _ = [preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()]]
        # _ = [targets.append(item) for item in target.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
        # writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            # writer.add_scalar(logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(),
            #                   epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                # Acc1='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Logging the epoch-wise accuracy
    if multi_run is None:
        # writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch)
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label + '/output',
                                          image=output[:1],
                                          global_step=epoch)
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label + '/input',
                                          image=satel_image[:1])
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label + '/target',
                                          image=map_image[:1])
    else:
        # writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch)
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label +
                                          '/output_{}'.format(multi_run),
                                          image=output[:1],
                                          global_step=epoch)
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label +
                                          '/input_{}'.format(multi_run),
                                          image=satel_image[:1])
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label + '/target',
                                          image=map_image[:1])

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        # 'Acc@1={top1.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               top1=top1))
def validate(val_loader, model, criterion, writer, epoch, class_encodings, no_cuda=False, log_interval=10, **kwargs):
    """
    The evaluation routine

    Parameters
    ----------

    val_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    class_encodings : List
        Contains the classes (range of ints)
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    meanIU.avg : float
        MeanIU of the model of the evaluated split
    """
    # 'Run' is injected in kwargs at runtime IFF it is a multi-run event
    multi_run = kwargs['run'] if 'run' in kwargs else None

    num_classes = len(class_encodings)

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    meanIU = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    pbar = tqdm(enumerate(val_loader), total=len(val_loader), unit='batch', ncols=150, leave=False)
    for batch_idx, (input, target) in pbar:
        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        # Compute output
        output = model(input)

        # Compute and record the loss
        loss = criterion(output, target)
        losses.update(loss.item(), input.size(0))

        # Compute and record the accuracy
        _, _, mean_iu_batch, _ = accuracy_segmentation(target.cpu().numpy(), get_argmax(output), num_classes)
        meanIU.update(mean_iu_batch, input.size(0))

        # Add loss and meanIU to Tensorboard
        scalar_label = 'val/mb_loss' if multi_run is None else 'val/mb_loss_{}'.format(multi_run)
        writer.add_scalar(scalar_label, loss.item(), epoch * len(val_loader) + batch_idx)
        scalar_label = 'val/mb_meanIU' if multi_run is None else 'val/mb_meanIU_{}'.format(multi_run)
        writer.add_scalar(scalar_label, mean_iu_batch, epoch * len(val_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description('val epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(val_loader)))

            pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                             Loss='{loss.avg:.4f}\t'.format(loss=losses),
                             meanIU='{meanIU.avg:.3f}\t'.format(meanIU=meanIU),
                             Data='{data_time.avg:.3f}\t'.format(data_time=data_time))


    # Logging the epoch-wise meanIU
    scalar_label = 'val/meanIU' if multi_run is None else 'val/meanIU_{}'.format(multi_run)
    writer.add_scalar(scalar_label, meanIU.avg, epoch)

    logging.info(_prettyprint_logging_label("val") +
                 ' epoch[{}]: '
                 'MeanIU={meanIU.avg:.3f}\t'
                 'Loss={loss.avg:.4f}\t'
                 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'
                 .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, meanIU=meanIU))

    return meanIU.avg
def test(test_loader, model, criterion, writer, epoch, class_encodings, img_names_sizes_dict, dataset_folder,
         post_process, no_cuda=False, log_interval=10, **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    img_names_sizes_dict: dictionary {str: (int, int)}
        Key: gt image name (with extension), Value: image size
    test_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    class_encodings : List
        Contains the range of encoded classes
    img_names_sizes_dict
        # TODO
    dataset_folder : str
        # TODO
    post_process : Boolean
        apply post-processing to the output of the network
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    meanIU.avg : float
        MeanIU of the model of the evaluated split
    """
    # 'Run' is injected in kwargs at runtime IFF it is a multi-run event
    multi_run = kwargs['run'] if 'run' in kwargs else None

    num_classes = len(class_encodings)

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    meanIU = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Need to store the images currently being processes
    canvas = {}

    pbar = tqdm(enumerate(test_loader), total=len(test_loader), unit='batch', ncols=150, leave=False)
    for batch_idx, (input, target) in pbar:
        # Unpack input
        input, top_left_coordinates, test_img_names = input

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        # Compute output
        output = model(input)

        # Compute and record the loss
        loss = criterion(output, target)
        losses.update(loss.item(), input.size(0))

        # Compute and record the batch meanIU
        _, _, mean_iu_batch, _ = accuracy_segmentation(target.cpu().numpy(), get_argmax(output), num_classes)

        # Add loss and meanIU to Tensorboard
        scalar_label = 'test/mb_loss' if multi_run is None else 'test/mb_loss_{}'.format(multi_run)
        writer.add_scalar(scalar_label, loss.item(), epoch * len(test_loader) + batch_idx)
        scalar_label = 'test/mb_meanIU' if multi_run is None else 'test/mb_meanIU_{}'.format(multi_run)
        writer.add_scalar(scalar_label, mean_iu_batch, epoch * len(test_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description('test epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(test_loader)))
            pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                             Loss='{loss.avg:.4f}\t'.format(loss=losses),
                             meanIU='{meanIU.avg:.3f}\t'.format(meanIU=meanIU),
                             Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

        # Output needs to be patched together to form the complete output of the full image
        # patches are returned as a sliding window over the full image, overlapping sections are averaged
        for patch, x, y, img_name in zip(output.data.cpu().numpy(), top_left_coordinates[0].numpy(), top_left_coordinates[1].numpy(), test_img_names):

            # Is a new image?
            if not img_name in canvas:
                # Create a new image of the right size filled with NaNs
                canvas[img_name] = np.empty((num_classes, *img_names_sizes_dict[img_name]))
                canvas[img_name].fill(np.nan)

            # Add the patch to the image
            canvas[img_name] = merge_patches(patch, (x, y), canvas[img_name])

            # Save the image when done
            if not np.isnan(np.sum(canvas[img_name])):
                # Save the final image
                mean_iu = process_full_image(img_name, canvas[img_name], multi_run, dataset_folder, class_encodings, post_process)
                # Update the meanIU
                meanIU.update(mean_iu, 1)
                # Remove the entry
                canvas.pop(img_name)
                logging.info("\nProcessed image {} with mean IU={}".format(img_name, mean_iu))

    # Canvas MUST be empty or something was wrong with coverage of all images
    assert len(canvas) == 0

    # Logging the epoch-wise meanIU
    scalar_label = 'test/mb_meanIU' if multi_run is None else 'test/mb_meanIU_{}'.format(multi_run)
    writer.add_scalar(scalar_label, meanIU.avg, epoch)

    logging.info(_prettyprint_logging_label("test") +
                 ' epoch[{}]: '
                 'MeanIU={meanIU.avg:.3f}\t'
                 'Loss={loss.avg:.4f}\t'
                 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'
                 .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, meanIU=meanIU))

    return meanIU.avg
Exemple #6
0
def _evaluate(data_loader,
              model,
              criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # Compute output
        output = model(input_var)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.data[0], input.size(0))

        # Apply sigmoid and take everything above a threshold of 0.5
        squashed_output = torch.nn.Sigmoid()(output).data.cpu().numpy()
        target_vals = target.cpu().numpy().astype(np.int)

        # jss = compute_jss(target_vals, get_preds_from_minibatch(squashed_output))
        # top1.update(jss, input.size(0))

        # Store results of each minibatch
        _ = [
            preds.append(item)
            for item in get_preds_from_minibatch(squashed_output)
        ]
        _ = [targets.append(item) for item in target.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            # writer.add_scalar(logging_label + '/mb_jaccard_similarity', jss, epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            # writer.add_scalar(logging_label + '/mb_jaccard_similarity_{}'.format(multi_run), jss,
            #                   epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                # JSS='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Generate a classification report for each epoch
    targets = np.array(targets).astype(np.int)
    preds = np.array(preds).astype(np.int)
    _log_classification_report(data_loader, epoch, preds, targets, writer)
    jss_epoch = compute_jss(targets, preds)
    # try:
    #     np.testing.assert_approx_equal(jss_epoch, top1.avg)
    # except:
    #     logging.error('Computed JSS scores do not match')
    #     logging.error('JSS: {} Avg: {}'.format(jss_epoch, top1.avg))

    # # Logging the epoch-wise JSS
    if multi_run is None:
        writer.add_scalar(logging_label + '/loss', losses.avg, epoch)
        writer.add_scalar(logging_label + '/jaccard_similarity', jss_epoch,
                          epoch)
    else:
        writer.add_scalar(logging_label + '/loss_{}'.format(multi_run),
                          losses.avg, epoch)
        writer.add_scalar(
            logging_label + '/jaccard_similarity_{}'.format(multi_run),
            jss_epoch, epoch)

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        'JSS={jss_epoch:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               jss_epoch=jss_epoch))

    return jss_epoch
Exemple #7
0
        save_image_and_log_to_tensorboard(
            writer,
            tag=logging_label + '/confusion_matrix',
            image_tensor=confusion_matrix_heatmap,
            global_step=epoch)
    else:
        writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run),
                          top1.avg, epoch)
        save_image_and_log_to_tensorboard(
            writer,
            tag=logging_label + '/confusion_matrix_{}'.format(multi_run),
            image_tensor=confusion_matrix_heatmap,
            global_step=epoch)

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        'Acc@1={top1.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               top1=top1))

    # Generate a classification report for each epoch
    _log_classification_report(data_loader, epoch, preds, targets, writer)

    return top1.avg