def _evaluate(data_loader, model, criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- top1.avg : float Accuracy of the model of the evaluated split """ multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, target) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) target = target.cuda(async=True) # Convert the input and its labels to Torch Variables input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # Compute output output = model(input_var) # Compute and record the loss loss = criterion(output, target_var) losses.update(loss.data[0], input.size(0)) # Compute and record the accuracy acc1 = accuracy(output.data, target, topk=(1,))[0] top1.update(acc1[0], input.size(0)) # Get the predictions _ = [preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()]] _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) writer.add_scalar(logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(data_loader))) pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), Acc1='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Make a confusion matrix try: cm = confusion_matrix(y_true=targets, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) except ValueError: logging.warning('Confusion Matrix did not work as expected') confusion_matrix_heatmap = np.zeros((10, 10, 3)) # Logging the epoch-wise accuracy and confusion matrix if multi_run is None: writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/confusion_matrix', image_tensor=confusion_matrix_heatmap, global_step=epoch) else: writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/confusion_matrix_{}'.format(multi_run), image_tensor=confusion_matrix_heatmap, global_step=epoch) logging.info(_prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)' .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) # Generate a classification report for each epoch _log_classification_report(data_loader, epoch, preds, targets, writer) return top1.avg
def _evaluate(data_loader, model, criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- top1.avg : float Accuracy of the model of the evaluated split """ multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, _) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) # Convert the input to Torch Variables input_var = torch.autograd.Variable(input, volatile=True) # Compute output output = model(input_var) # Compute and record the loss loss = criterion(output, input_var) losses.update(loss.data[0], input.size(0)) # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) input_img = torchvision.utils.make_grid(input_var[:25].data.cpu(), nrow=5, normalize=False, scale_each=False).permute( 1, 2, 0).numpy() output_img = torchvision.utils.make_grid(output[:25].data.cpu(), nrow=5, normalize=False, scale_each=False).permute( 1, 2, 0).numpy() save_image_and_log_to_tensorboard(writer, tag=logging_label + '/input_image', image=input_img) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/output_image', image=output_img, global_step=epoch) return losses.avg
def _multi_run(runner_class, writer, current_log_folder, args): """ Run multiple times an experiment and aggregates the results. This is particularly useful to counter effects of randomness. Here multiple runs with same parameters are executed and the results averaged. Additionally "variance shaded plots" gets to be generated and are visible not only on FS but also on tensorboard under 'IMAGES'. Parameters ---------- runner_class : String This is necessary to know on which class should we run the experiments. Default is runner.image_classification.image_classification writer: Tensorboard.SummaryWriter Responsible for writing logs in Tensorboard compatible format. current_log_folder : String Path to the output folder. Required for saving the raw data of the plots generated by the multi-run routine. args : dict Contains all command line arguments parsed. Returns ------- train_scores : ndarray[float] of size (n, `epochs`) val_scores : ndarray[float] of size (n, `epochs`+1) test_score : ndarray[float] of size (n) Train, Val and Test results for each run (n) and epoch """ # Instantiate the scores tables which will stores the results. train_scores = np.zeros((args.multi_run, args.epochs)) val_scores = np.zeros((args.multi_run, args.epochs + 1)) test_scores = np.zeros(args.multi_run) # As many times as runs for i in range(args.multi_run): logging.info('Multi-Run: {} of {}'.format(i + 1, args.multi_run)) train_scores[i, :], val_scores[ i, :], test_scores[i] = runner_class.single_run( writer, run=i, current_log_folder=current_log_folder, **args.__dict__) # Generate and add to tensorboard the shaded plot for train train_curve = plot_mean_std(arr=train_scores[:i + 1], suptitle='Multi-Run: Train', title='Runs: {}'.format(i + 1), xlabel='Epoch', ylabel='Score', ylim=[0, 100.0]) save_image_and_log_to_tensorboard(writer, tag='train_curve', image=train_curve, global_step=i) logging.info('Generated mean-variance plot for train') # Generate and add to tensorboard the shaded plot for va val_curve = plot_mean_std(x=(np.arange(args.epochs + 1) - 1), arr=np.roll(val_scores[:i + 1], axis=1, shift=1), suptitle='Multi-Run: Val', title='Runs: {}'.format(i + 1), xlabel='Epoch', ylabel='Score', ylim=[0, 100.0]) save_image_and_log_to_tensorboard(writer, tag='val_curve', image=val_curve, global_step=i) logging.info('Generated mean-variance plot for val') # Log results on disk np.save(os.path.join(current_log_folder, 'train_values.npy'), train_scores) np.save(os.path.join(current_log_folder, 'val_values.npy'), val_scores) logging.info('Multi-run values for test-mean:{} test-std: {}'.format( np.mean(test_scores), np.std(test_scores))) return train_scores, val_scores, test_scores
def _evaluate(data_loader, model, criterion, writer, epoch, logging_label, no_cuda=False, log_interval=10, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used criterion: torch.nn.loss The loss function used to compute the loss of the model writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) logging_label : string Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages. no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. Returns ------- top1.avg : float Accuracy of the model of the evaluated split """ #TODO All parts computing the accuracy are commented out. It is necessary to #TODO implement a 2D softmax and instead of regressing the output class have it #TODO work with class labels. Notice that, however, it would be #TODO of interest leaving open the possibility to work with soft labels #TODO (e.g. the ground truth for pixel X,Y is an array of probabilities instead #TODO of an integer. multi_run = kwargs['run'] if 'run' in kwargs else None # Instantiate the counters batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() data_time = AverageMeter() # Switch to evaluate mode (turn off dropout & such ) model.eval() # Iterate over whole evaluation set end = time.time() # Empty lists to store the predictions and target values preds = [] targets = [] pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=150, leave=False) for batch_idx, (input, _) in pbar: # Measure data loading time data_time.update(time.time() - end) # Moving data to GPU if not no_cuda: input = input.cuda(async=True) # Split the data into halves to separate the input from the GT satel_image, map_image = torch.chunk(input, chunks=2, dim=3) # Convert the input and its labels to Torch Variables input_var = torch.autograd.Variable(satel_image) target_var = torch.autograd.Variable(map_image) # Compute output output = model(input_var) # Compute and record the loss loss = criterion(output, target_var) losses.update(loss.data[0], input.size(0)) # Compute and record the accuracy # acc1 = accuracy(output.data, target, topk=(1,))[0] # top1.update(acc1[0], input.size(0)) # Get the predictions # _ = [preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()]] # _ = [targets.append(item) for item in target.cpu().numpy()] # Add loss and accuracy to Tensorboard if multi_run is None: writer.add_scalar(logging_label + '/mb_loss', loss.data[0], epoch * len(data_loader) + batch_idx) # writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx) else: writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run), loss.data[0], epoch * len(data_loader) + batch_idx) # writer.add_scalar(logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(), # epoch * len(data_loader) + batch_idx) # Measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' epoch [{0}][{1}/{2}]\t'.format( epoch, batch_idx, len(data_loader))) pbar.set_postfix( Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time), Loss='{loss.avg:.4f}\t'.format(loss=losses), # Acc1='{top1.avg:.3f}\t'.format(top1=top1), Data='{data_time.avg:.3f}\t'.format(data_time=data_time)) # Logging the epoch-wise accuracy if multi_run is None: # writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/output', image=output[:1], global_step=epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/input', image=satel_image[:1]) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/target', image=map_image[:1]) else: # writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/output_{}'.format(multi_run), image=output[:1], global_step=epoch) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/input_{}'.format(multi_run), image=satel_image[:1]) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/target', image=map_image[:1]) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' # 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t' 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'. format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1))
def plot_decision_boundaries(output_winners, output_confidence, grid_x, grid_y, point_x, point_y, point_class, num_classes, step, writer, epochs, **kwargs): """ Plots the decision boundaries as a 2D image onto Tensorboard. Parameters ---------- output_winners: numpy.ndarray which class is the 'winner' of the network at each location output_confidence: numpy.ndarray confidence value of the network for the 'winner' class grid_x: numpy.ndarray X axis locations of the decision grid grid_y: numpy.ndarray Y axis locations of the decision grid point_x: numpy.ndarray X axis locations of the real points to be plotted point_y: numpy.ndarray Y axis locations of the real points to be plotted point_class: numpy.ndarray class of the real points at each location writer: tensorboardX SummaryWriter Tensorboard summarywriter object num_classes: int number of unique classes step: int global training step epochs: int total number of training epochs Returns ------- None """ multi_run = kwargs['run'] if 'run' in kwargs else None point_class = point_class.copy() point_class += 1 # Matplotlib stuff fig = plt.figure(1) axs = plt.gca() colors = ['blue', 'orange', 'green', 'red', 'purple'] colors_points = {'blue': '#000099', 'orange': '#e68a00', 'red': '#b30000', 'green': '#009900', 'purple': '#7300e6'} colors_contour = {'blue': plt.get_cmap('Blues'), 'orange': plt.get_cmap('Oranges'), 'red': plt.get_cmap('Reds'), 'green': plt.get_cmap('Greens'), 'purple': plt.get_cmap('Purples')} for i in np.unique(output_winners): locs = np.where(output_winners == i) tmp = np.zeros(output_confidence.shape) tmp[:] = np.NaN tmp[locs[0]] = output_confidence[locs[0]] grid_vals = np.flip(tmp.reshape(grid_x.shape), 1).T axs.imshow(grid_vals, extent=(np.min(grid_x), np.max(grid_x), np.min(grid_y), np.max(grid_y)), cmap=colors_contour[colors[i]], alpha=0.9) # Draw all the points for i in range(1, num_classes + 1): locs = np.where(point_class == i) axs.scatter(point_x[locs], point_y[locs], c=colors_points[colors[i - 1]], edgecolor='w', lw=0.75) # Draw image fig.canvas.draw() # Get image data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) overview_epochs = [-1, 0] if epochs > 10: _ = [overview_epochs.append(i) for i in np.arange(1, epochs, step=np.ceil((epochs - 2) / 8))] # Plot to tensorboard if multi_run is None: if step in overview_epochs or epochs <= 10: save_image_and_log_to_tensorboard(writer, tag='decision_boundary_overview', image=data, global_step=step) writer.add_image('decision_boundary/{}'.format(step), data, global_step=step) else: if step in overview_epochs or epochs <= 10: save_image_and_log_to_tensorboard(writer, tag='decision_boundary_overview_{}'.format(multi_run), image=data, global_step=step) writer.add_image('decision_boundary_{}/{}'.format(multi_run, step), data, global_step=step) plt.clf() return None
def feature_extract(data_loader, model, writer, epoch, no_cuda, log_interval, classify, **kwargs): """ The evaluation routine Parameters ---------- data_loader : torch.utils.data.DataLoader The dataloader of the evaluation set model : torch.nn.module The network model being used writer : tensorboardX.writer.SummaryWriter The tensorboard writer object. Used to log values on file for the tensorboard visualization. epoch : int Number of the epoch (for logging purposes) no_cuda : boolean Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used. log_interval : int Interval limiting the logging of mini-batches. Default value of 10. classify : boolean Specifies whether to generate a classification report for the data or not. Returns ------- None """ logging_label = 'apply' # Switch to evaluate mode (turn off dropout & such ) model.eval() labels, features, preds, filenames = [], [], [], [] multi_crop = False # Iterate over whole evaluation set pbar = tqdm(enumerate(data_loader), total=len(data_loader), unit='batch', ncols=200) for batch_idx, (data, label, filename) in pbar: if len(data.size()) == 5: multi_crop = True bs, ncrops, c, h, w = data.size() data = data.view(-1, c, h, w) if not no_cuda: data = data.cuda() data_a = Variable(data, volatile=True) # Compute output out = model(data_a) if multi_crop: out = out.view(bs, ncrops, -1).mean(1) preds.append([np.argmax(item.data.cpu().numpy()) for item in out]) features.append(out.data.cpu().numpy()) labels.append(label) filenames.append(filename) # Log progress to console if batch_idx % log_interval == 0: pbar.set_description(logging_label + ' Epoch: {} [{}/{} ({:.0f}%)]'.format( epoch, batch_idx * len(data_a), len(data_loader.dataset), 100. * batch_idx / len(data_loader))) # Measure accuracy (FPR95) num_tests = len(data_loader.dataset) labels = np.concatenate(labels, 0).reshape(num_tests) features = np.concatenate(features, 0) preds = np.concatenate(preds, 0) filenames = np.concatenate(filenames, 0) if classify: # Make a confusion matrix try: cm = confusion_matrix(y_true=labels, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) save_image_and_log_to_tensorboard(writer, tag=logging_label + '/confusion_matrix', image_tensor=confusion_matrix_heatmap, global_step=epoch) except ValueError: logging.warning('Confusion matrix received weird values') # Generate a classification report for each epoch logging.info('Classification Report for epoch {}\n'.format(epoch)) logging.info('\n' + classification_report(y_true=labels, y_pred=preds, target_names=[str(item) for item in data_loader.dataset.classes])) else: preds = None return features, preds, labels, filenames
# Make a confusion matrix try: cm = confusion_matrix(y_true=targets, y_pred=preds) confusion_matrix_heatmap = make_heatmap(cm, data_loader.dataset.classes) except ValueError: logging.warning('Confusion Matrix did not work as expected') confusion_matrix_heatmap = np.zeros((10, 10, 3)) # Logging the epoch-wise accuracy and confusion matrix if multi_run is None: writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix', image_tensor=confusion_matrix_heatmap, global_step=epoch) else: writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch) save_image_and_log_to_tensorboard( writer, tag=logging_label + '/confusion_matrix_{}'.format(multi_run), image_tensor=confusion_matrix_heatmap, global_step=epoch) logging.info( _prettyprint_logging_label(logging_label) + ' epoch[{}]: ' 'Acc@1={top1.avg:.3f}\t' 'Loss={loss.avg:.4f}\t'
def _multi_run(runner_class, writer, current_log_folder, args): """ Here multiple runs with same parameters are executed and the results averaged. Additionally "variance shaded plots" gets to be generated and are visible not only on FS but also on tensorboard under 'IMAGES'. Parameters: ----------- :param runner_class: class This is necessary to know on which class should we run the experiments. Default is runner.image_classification.image_classification :param writer: Tensorboard SummaryWriter Responsible for writing logs in Tensorboard compatible format. :param args: Any additional arguments (especially for the runner_class) :return: float[n, epochs], float[n, epochs], float[n] Train, Val and Test results for each run (n) and epoch """ # Instantiate the scores tables which will stores the results. train_scores = np.zeros((args.multi_run, args.epochs)) val_scores = np.zeros((args.multi_run, args.epochs + 1)) test_scores = np.zeros(args.multi_run) # As many times as runs for i in range(args.multi_run): logging.info('Multi-Run: {} of {}'.format(i + 1, args.multi_run)) train_scores[i, :], val_scores[ i, :], test_scores[i] = runner_class.single_run( writer, run=i, current_log_folder=current_log_folder, **args.__dict__) # Generate and add to tensorboard the shaded plot for train train_curve = plot_mean_std(arr=train_scores[:i + 1], suptitle='Multi-Run: Train', title='Runs: {}'.format(i + 1), xlabel='Epoch', ylabel='Score', ylim=[0, 100.0]) save_image_and_log_to_tensorboard(writer, tag='train_curve', image_tensor=train_curve, global_step=i) logging.info('Generated mean-variance plot for train') # Generate and add to tensorboard the shaded plot for va val_curve = plot_mean_std(x=(np.arange(args.epochs + 1) - 1), arr=np.roll(val_scores[:i + 1], axis=1, shift=1), suptitle='Multi-Run: Val', title='Runs: {}'.format(i + 1), xlabel='Epoch', ylabel='Score', ylim=[0, 100.0]) save_image_and_log_to_tensorboard(writer, tag='val_curve', image_tensor=val_curve, global_step=i) logging.info('Generated mean-variance plot for val') # Log results on disk np.save(os.path.join(current_log_folder, 'train_values.npy'), train_scores) np.save(os.path.join(current_log_folder, 'val_values.npy'), val_scores) logging.info('Multi-run values for test-mean:{} test-std: {}'.format( np.mean(test_scores), np.std(test_scores))) return train_scores, val_scores, test_scores