def evaluate_one_dataset(LOG, dataloader, model, opt, save=True, give_return=False, epoch=0): """ Compute evaluation metrics, update LOGGER and print results. Args: LOG: aux.LOGGER-instance. Main Logging Functionality. dataloader: PyTorch Dataloader, Testdata to be evaluated. model: PyTorch Network, Network to evaluate. opt: argparse.Namespace, contains all training-specific parameters. save: bool, if True, Checkpoints are saved when testing metrics (specifically Recall @ 1) improve. give_return: bool, if True, return computed metrics. epoch: int, current epoch, required for logger. Returns: (optional) Computed metrics. Are normally written directly to LOG and printed. """ start = time.time() image_paths = np.array(dataloader.dataset.image_list) with torch.no_grad(): #Compute Metrics F1, NMI, recall_at_ks, feature_matrix_all = aux.eval_metrics_one_dataset( model, dataloader, device=opt.device, k_vals=opt.k_vals, opt=opt) #Make printable summary string. result_str = ', '.join('@{0}: {1:.4f}'.format(k, rec) for k, rec in zip(opt.k_vals, recall_at_ks)) result_str = 'Epoch (Test) {0}: NMI [{1:.4f}] | F1 [{2:.4f}] | Recall [{3}]'.format( epoch, NMI, F1, result_str) if LOG is not None: if save: if not len(LOG.progress_saver['val'] ['Recall @ 1']) or recall_at_ks[0] > np.max( LOG.progress_saver['val']['Recall @ 1']): #Save Checkpoint aux.set_checkpoint( model, opt, LOG.progress_saver, LOG.prop.save_path + '/checkpoint.pth.tar') aux.recover_closest_one_dataset( feature_matrix_all, image_paths, LOG.prop.save_path + '/sample_recoveries.png') #Update logs. LOG.log('val', LOG.metrics_to_log['val'], [epoch, np.round(time.time() - start), NMI, F1] + recall_at_ks) print(result_str) if give_return: return recall_at_ks, NMI, F1 else: None
def evaluate_multiple_datasets(LOG, dataloaders, model, opt, save=True, give_return=False, epoch=0): """ Compute evaluation metrics, update LOGGER and print results, specifically for Multi-test datasets s.a. PKU Vehicle ID. Args: LOG: aux.LOGGER-instance. Main Logging Functionality. dataloaders: List of PyTorch Dataloaders, test-dataloaders to evaluate. model: PyTorch Network, Network to evaluate. opt: argparse.Namespace, contains all training-specific parameters. save: bool, if True, Checkpoints are saved when testing metrics (specifically Recall @ 1) improve. give_return: bool, if True, return computed metrics. epoch: int, current epoch, required for logger. Returns: (optional) Computed metrics. Are normally written directly to LOG and printed. """ start = time.time() csv_data = [epoch] with torch.no_grad(): for i,dataloader in enumerate(dataloaders): print('Working on Set {}/{}'.format(i+1, len(dataloaders))) image_paths = np.array(dataloader.dataset.image_list) #Compute Metrics for specific testset. F1, NMI, recall_at_ks, feature_matrix_all = aux.eval_metrics_one_dataset(model, dataloader, device=opt.device, k_vals=opt.k_vals, opt=opt) #Generate printable summary string. result_str = ', '.join('@{0}: {1:.4f}'.format(k,rec) for k,rec in zip(opt.k_vals, recall_at_ks)) result_str = 'SET {0}: Epoch (Test) {1}: NMI [{2:.4f}] | F1 {3:.4f}| Recall [{4}]'.format(i+1, epoch, NMI, F1, result_str) if LOG is not None: if save: if not len(LOG.progress_saver['val']['Set {} Recall @ 1'.format(i)]) or recall_at_ks[0]>np.max(LOG.progress_saver['val']['Set {} Recall @ 1'.format(i)]): #Save Checkpoint for specific test set. aux.set_checkpoint(model, opt, LOG.progress_saver, LOG.prop.save_path+'/checkpoint_set{}.pth.tar'.format(i+1)) aux.recover_closest_one_dataset(feature_matrix_all, image_paths, LOG.prop.save_path+'/sample_recoveries_set{}.png'.format(i+1)) csv_data += [NMI, F1]+recall_at_ks print(result_str) csv_data.insert(0, np.round(time.time()-start)) #Update logs. LOG.log('val', LOG.metrics_to_log['val'], csv_data) if give_return: return csv_data[2:] else: None
def validate(test_loader, model, args): # switch to evaluation mode model.eval() testdata = torch.Tensor() testlabel = torch.LongTensor() if args.dataset == "online_products": nmi, recall = aux.eval_metrics_one_dataset(model, test_loader, device=args.device, k_vals=args.k_vals, opt=args) else: with torch.no_grad(): testloader = tqdm(test_loader, desc='Epoch {} Testing...'.format(args.cur_epoch)) for i, (input, target) in enumerate(testloader): if args.gpu is not None: # input = input.cuda(args.gpu, non_blocking=True) input = input.to(args.device) # compute output output = model(input) testdata = torch.cat((testdata, output.cpu()), 0) testlabel = torch.cat((testlabel, target)) nmi, recall = eva.evaluation(testdata.numpy(), testlabel.numpy(), [1, 2, 4, 8]) return nmi, recall
#Get dataloaders, primarily the one for the test set. For that, point to the folder that contains the datasets: opt.source_path = '<path_to_dataset>/' + opt.dataset dataloaders = data.give_dataloaders(opt.dataset, opt) opt.num_classes = len(dataloaders['training'].dataset.avail_classes) opt.device = torch.device('cuda') """================================""" #Compute test metrics - note that weights were stored at optimal R@1 performance. _ = model.to(opt.device) _ = model.eval() start = time.time() image_paths = np.array(dataloaders['testing'].dataset.image_list) with torch.no_grad(): evaltypes = ['Class'] metrics, labels = aux.eval_metrics_one_dataset(model, dataloaders['testing'], device=opt.device, k_vals=opt.k_vals, opt=opt, evaltypes=evaltypes) ### full_result_str = '' for evaltype in evaltypes: result_str = ', '.join( '@{0}: {1:.4f}'.format(k, rec) for k, rec in zip(opt.k_vals, metrics[evaltype]['Recall@k'])) result_str = '{0}-embed: NMI [{1:.4f}] | F1 [{2:.4f}] | Recall [{3}]'.format( evaltype, metrics[evaltype]['NMI'], metrics[evaltype]['F1'], result_str) full_result_str += result_str + '\n' print(full_result_str)
def evaluate_one_dataset(LOG, dataloader, model, opt, spliteval=True, evaltypes=['Class'], save=True, give_return=False, aux_store=None, epoch=0, monitor_distances=True, log_key='Test'): start = time.time() image_paths = np.array(dataloader.dataset.image_list) with torch.no_grad(): metrics, labels = aux.eval_metrics_one_dataset(model, dataloader, device=opt.device, spliteval=spliteval, k_vals=opt.k_vals, opt=opt, evaltypes=evaltypes) ### full_result_str = '' for evaltype in evaltypes: result_str = ', '.join( '@{0}: {1:.4f}'.format(k, rec) for k, rec in zip(opt.k_vals, metrics[evaltype]['Recall@k'])) result_str = '{0}-embed: NMI [{1:.4f}] | F1 [{2:.4f}] | Recall [{3}]'.format( evaltype, metrics[evaltype]['NMI'], metrics[evaltype]['F1'], result_str) full_result_str += result_str ### if LOG is not None: for evaltype in evaltypes: if save: if (evaltype + '_Recall' not in LOG.progress_saver[log_key].groups.keys() ) or metrics[evaltype]['Recall@k'][0] > np.max( LOG.progress_saver[log_key].groups[evaltype + '_Recall'] ['Recall @ 1']['content']): aux.set_checkpoint( model, opt, LOG.progress_saver, LOG.prop.save_path + '/checkpoint_{}.pth.tar'.format(evaltype), aux=aux_store) aux.recover_closest_one_dataset( metrics[evaltype]['Features'], image_paths, LOG.prop.save_path + '/sample_recoveries.png') LOG.progress_saver[log_key].log('NMI', metrics[evaltype]['NMI'], group=evaltype + '_NMI') LOG.progress_saver[log_key].log('F1', metrics[evaltype]['F1'], group=evaltype + '_F1') for k_val, recall_val in zip(opt.k_vals, metrics[evaltype]['Recall@k']): LOG.progress_saver[log_key].log( 'Recall @ {}'.format(k_val), recall_val, group=evaltype + '_Recall') if monitor_distances: intra_dist, inter_dist = distance_measure( metrics[evaltype]['Features'], labels) LOG.progress_saver[log_key].log('Intraclass', intra_dist, group=evaltype + '_Distances') LOG.progress_saver[log_key].log('Interclass', inter_dist, group=evaltype + '_Distances') LOG.progress_saver[log_key].log('Epochs', epoch, group='Epochs') LOG.progress_saver[log_key].log('Time', np.round(time.time() - start, 4), group='Time') print(full_result_str) if give_return: return metrics else: None