def _evaluate(data_loader, model, criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- top1.avg : float Accuracy of the model of the evaluated split """ multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] multi_run = False pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, target) in pbar: # todo: how to you implement sliding window accross batches if len(input.size()) == 5: multi_run = True # input [64, 5, 3, 299, 299] bs, ncrops, c, h, w = input.size() # input.view leaves the 3rd 4th and 5th dimension as is, but multiplies the 1st and 2nd together # result [320, 3, 299, 299] # result = input.view(-1, c, h, w) # fuse batch size and ncrops # result_avg = input.view(bs, -1, c, h, w).mean(1) input = input.view(-1, c, h, w) # If you are using tensor.max(1) then you get a tupel with two tensors, choose the first one # which is a floattensor and what you need. # input = result_avg[0] # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) target = target.cuda(async=True) # Convert the input and its labels to Torch Variables # todo: check them out in debugger input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # Compute output output = model(input_var) if multi_run: output = output.view(bs, ncrops, -1).mean(1) # Compute and record the loss loss = criterion(output, target_var) losses.update(loss.data[0], input.size(0)) # Compute and record the accuracy acc1 = accuracy(output.data, target, topk=(1, ))[0] top1.update(acc1[0], input.size(0)) # Get the predictions _ = [ preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()] ] _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar( logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), Acc1='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Make a confusion matrix try: cm = confusion_matrix(y_true=targets, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) except ValueError: logging.warning('Confusion Matrix did not work as expected') confusion_matrix_heatmap = np.zeros((10, 10, 3)) # Logging the epoch-wise accuracy and confusion matrix if multi_run is None: writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) # ERROR: save_image_and_log_tensorboard() got an unexpected keyword argument 'image_tensore' # changed 'image_tensor=confusion_matrix_heattmap' to 'image=confusion_mastrix_heatmap' save_image_and_log_to_tensorboard(writer, tag=logging_label + '/confusion_matrix', image=confusion_matrix_heatmap, global_step=epoch) else: writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix_{}'.format(multi_run), image=confusion_matrix_heatmap, global_step=epoch) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'. format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) # Generate a classification report for each epoch _log_classification_report(data_loader, epoch, preds, targets, writer) return top1.avg
def _evaluate(data_loader, model, criterion, observer, observer_criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- :param data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set :param model : torch.nn.module The network model being used :param criterion: torch.nn.loss The loss function used to compute the loss of the model :param writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. :param epoch : int Number of the epoch (for logging purposes) :param logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. :param no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. :param log_interval : int Interval limiting the logging of mini-batches. Default value of 10. :return: None """ multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() observer_loss_meter = AverageMeter() top1 = AverageMeter() observer_acc_meter = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, target) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) target = target.cuda(async=True) # Convert the input and its labels to Torch Variables input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # Compute output output = model(input_var) # Get the features from second last layer input_features_var = torch.autograd.Variable( model.module.features.data) # Use observer on the features observer_acc, observer_loss = evaluate_one_mini_batch( observer, observer_criterion, input_features_var, target_var, observer_loss_meter, observer_acc_meter) # Compute and record the loss loss = criterion(output, target_var) losses.update(loss.data[0], input.size(0)) # Compute and record the accuracy acc1 = accuracy(output.data, target, topk=(1, ))[0] top1.update(acc1[0], input.size(0)) # Get the predictions _ = [ preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()] ] _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/obs_mb_loss', observer_loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/obs_mb_accuracy', observer_acc.cpu().numpy(), epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar( logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) writer.add_scalar( logging_label + '/obs_mb_loss_{}'.format(multi_run), observer_loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar( logging_label + '/obs_mb_accuracy_{}'.format(multi_run), observer_acc.cpu().numpy(), epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), Acc1='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Make a confusion matrix try: cm = confusion_matrix(y_true=targets, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) except ValueError: logging.warning('Confusion Matrix did not work as expected') confusion_matrix_heatmap = np.zeros((10, 10, 3)) # Logging the epoch-wise accuracy and confusion matrix if multi_run is None: writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) writer.add_scalar(logging_label + '/obs_accuracy', observer_acc_meter.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix', image_tensor=confusion_matrix_heatmap, global_step=epoch) else: writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) writer.add_scalar(logging_label + '/obs_accuracy_{}'.format(multi_run), observer_acc_meter.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix_{}'.format(multi_run), image_tensor=confusion_matrix_heatmap, global_step=epoch) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'. format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) # Generate a classification report for each epoch _log_classification_report(data_loader, epoch, preds, targets, writer) return top1.avg
def feature_extract(data_loader, model, writer, epoch, no_cuda, log_interval, classify, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. classify : boolean Specifies whether to generate a classification report for the data or not. Returns ------- None """ logging_label = 'apply' # Switch to evaluate mode (turn off dropout & such ) model.eval() labels, features, preds, filenames = [], [], [], [] multi_crop = False # Iterate over whole evaluation set pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=200) with torch.no_grad(): for batch_idx, (data, label, filename) in pbar: if len(data.size()) == 5: multi_crop = True bs, ncrops, c, h, w = data.size() data = data.view(-1, c, h, w) if not no_cuda: data = data.cuda() # Compute output out = model(data) if multi_crop: out = out.view(bs, ncrops, -1).mean(1) preds.append([np.argmax(item.data.cpu().numpy()) for item in out]) features.append(out.data.cpu().numpy()) labels.append(label) filenames.append(filename) # Log progress to console if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' Epoch: {} [{}/{} ({:.0f}%)]'.format( epoch, batch_idx * len(data), len(data_loader.dataset), 100. * batch_idx / len(data_loader))) # Measure accuracy (FPR95) num_tests = len(data_loader.dataset) labels = np.concatenate(labels, 0).reshape(num_tests) features = np.concatenate(features, 0) preds = np.concatenate(preds, 0) filenames = np.concatenate(filenames, 0) if classify: # Make a confusion matrix try: cm = confusion_matrix(y_true=labels, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/confusion_matrix', image=confusion_matrix_heatmap, global_step=epoch) except ValueError: logging.warning('Confusion matrix received weird values') # Generate a classification report for each epoch logging.info('Classification Report for epoch {}\n'.format(epoch)) logging.info('\n' + classification_report(y_true=labels, y_pred=preds, target_names=[str(item) for item in data_loader.dataset.classes])) else: preds = None return features, preds, labels, filenames
if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), Acc1='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Make a confusion matrix try: cm = confusion_matrix(y_true=targets, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) except ValueError: logging.warning('Confusion Matrix did not work as expected') confusion_matrix_heatmap = np.zeros((10, 10, 3)) # Logging the epoch-wise accuracy and confusion matrix if multi_run is None: writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix', image_tensor=confusion_matrix_heatmap, global_step=epoch) else: writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run),