def test(model, fetcher): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) total_size = 0 # true positive / intersection tp = torch.zeros(num_classes) fp = torch.zeros(num_classes) fn = torch.zeros(num_classes) pbar = tqdm(fetcher) for idx, (inputs, targets) in enumerate(pbar): batch_idx = idx + 1 outputs = model(inputs) loss = compute_loss(outputs, targets, model) val_loss += loss.item() predicted = outputs.max(1)[1] if idx == 0: show_batch(inputs, predicted) predicted = predicted.view(-1) targets = targets.view(-1) eq = predicted.eq(targets) total_size += predicted.size(0) for c_i, c in enumerate(classes): indices = targets.eq(c_i) positive = indices.sum().item() tpi = eq[indices].sum().item() fni = positive - tpi fpi = predicted.eq(c_i).sum().item() - tpi tp[c_i] += tpi fn[c_i] += fni fp[c_i] += fpi T, P, R, miou, F1 = compute_metrics(tp, fn, fp) pbar.set_description( 'loss: %8g, mAP: %8g, F1: %8g, miou: %8g' % (val_loss / batch_idx, P.mean(), F1.mean(), miou.mean())) if dist.is_initialized(): tp = tp.to(device) fn = fn.to(device) fp = fp.to(device) dist.all_reduce(tp, op=dist.ReduceOp.SUM) dist.all_reduce(fn, op=dist.ReduceOp.SUM) dist.all_reduce(fp, op=dist.ReduceOp.SUM) T, P, R, miou, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu()) if len(classes) < 10: for c_i, c in enumerate(classes): print( 'cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g' % (c, T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i])) else: print('top error 5') copy_miou = miou.clone() for i in range(5): c_i = copy_miou.min(0)[1] copy_miou[c_i] = 1 print( 'cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g' % (classes[c_i], T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i])) return miou.mean().item()
def test(model, fetcher, distributed=False): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) total_size = 0 # true positive / intersection tp = torch.zeros(num_classes) fp = torch.zeros(num_classes) fn = torch.zeros(num_classes) with torch.no_grad(): pbar = tqdm(enumerate(fetcher), total=len(fetcher)) for idx, (inputs, targets) in pbar: batch_idx = idx + 1 outputs = model(inputs) loss = compute_loss(outputs, targets) val_loss += loss.item() predicted = outputs if idx == 0: show_batch('test_batch.png', inputs.cpu(), predicted.cpu()) predicted = predicted.max(1)[1].view(-1) targets = targets.max(1)[1].view(-1) eq = predicted.eq(targets) total_size += predicted.size(0) for c_i, c in enumerate(classes): indices = targets.eq(c_i) positive = indices.sum().item() tpi = eq[indices].sum().item() fni = positive - tpi fpi = predicted.eq(c_i).sum().item() - tpi tp[c_i] += tpi fn[c_i] += fni fp[c_i] += fpi T, P, R, miou, F1 = compute_metrics(tp, fn, fp) pbar.set_description( 'loss: %8g, mAP: %8g, F1: %8g, miou: %8g' % (val_loss / batch_idx, P.mean(), F1.mean(), miou.mean())) if distributed: tp = tp.to(device) fn = fn.to(device) fp = fp.to(device) dist.all_reduce(tp, op=dist.ReduceOp.SUM) dist.all_reduce(fn, op=dist.ReduceOp.SUM) dist.all_reduce(fp, op=dist.ReduceOp.SUM) T, P, R, miou, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu()) for c_i, c in enumerate(classes): print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, iou: %8g, F1: %8g' % (c, T[c_i], P[c_i], R[c_i], miou[c_i], F1[c_i])) return miou.mean().item()
def get_accuracy_f1_scores_from_damic_model(damic_model, labeled_train_and_valid, device): """ Predict labels and compare to true labels to compute the accuracy and F1 score. :param damic_model: the model we want to do the pre-training on :param labeled_train_and_valid: dataset composed of the labeled trained and validation set :param device: cuda (training is done on gpu) or cpu :return: predictions made by damic, accuracy and f1 score """ print("Evaluating DAMIC model ...") start_time = time() valid_and_train_real_label_loader = DataLoader( labeled_train_and_valid, batch_size=len(labeled_train_and_valid)) with torch.no_grad(): for inputs, labels in valid_and_train_real_label_loader: inputs = inputs.to(device) labels = labels.long() labels = labels.squeeze() print("Accuracy predictions") damic_predictions = damic_model(inputs) _, damic_predictions = damic_predictions.max(1) print("DAMIC predictions results") print(damic_predictions) print("Expected results") print(labels) accuracy, f1 = compute_metrics(labels, damic_predictions.cpu()) print("Done in {:.2f} sec | Accuracy: {:.2f} - F1: {:.2f}".format( time() - start_time, accuracy * 100, f1 * 100)) return damic_predictions, accuracy, f1
def test(model, fetcher): model.eval() val_loss = 0 classes = fetcher.loader.dataset.classes num_classes = len(classes) total_size = torch.Tensor([0]) true_size = torch.Tensor([0]) tp = torch.zeros(num_classes) fp = torch.zeros(num_classes) fn = torch.zeros(num_classes) pbar = tqdm(enumerate(fetcher), total=len(fetcher)) for idx, (inputs, targets) in pbar: batch_idx = idx + 1 outputs = model(inputs) loss = compute_loss(outputs, targets, model) val_loss += loss.item() predicted = outputs.max(1)[1] if idx == 0: show_batch(inputs.cpu(), predicted.cpu(), classes) eq = predicted.eq(targets) total_size += predicted.size(0) true_size += eq.sum() for c_i, c in enumerate(classes): indices = targets.eq(c_i) positive = indices.sum().item() tpi = eq[indices].sum().item() fni = positive - tpi fpi = predicted.eq(c_i).sum().item() - tpi tp[c_i] += tpi fn[c_i] += fni fp[c_i] += fpi pbar.set_description('loss: %8g, acc: %8g' % (val_loss / batch_idx, true_size / total_size)) if dist.is_initialized(): tp = tp.to(device) fn = fn.to(device) fp = fp.to(device) total_size = total_size.to(device) true_size = true_size.to(device) dist.all_reduce(tp, op=dist.ReduceOp.SUM) dist.all_reduce(fn, op=dist.ReduceOp.SUM) dist.all_reduce(fp, op=dist.ReduceOp.SUM) dist.all_reduce(total_size, op=dist.ReduceOp.SUM) dist.all_reduce(true_size, op=dist.ReduceOp.SUM) T, P, R, F1 = compute_metrics(tp.cpu(), fn.cpu(), fp.cpu()) if len(classes) < 10: for c_i, c in enumerate(classes): print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, F1: %8g' % (c, T[c_i], P[c_i], R[c_i], F1[c_i])) else: print('top error 5') copy_P = P.clone() for i in range(5): c_i = copy_P.min(0)[1] copy_P[c_i] = 1 print('cls: %8s, targets: %8d, pre: %8g, rec: %8g, F1: %8g' % (classes[c_i], T[c_i], P[c_i], R[c_i], F1[c_i])) return true_size.item() / total_size.item()
def train_(train_iter, net, opt, loss_function, loss_type, ind_ignore, n_classes): net.train() train_loss = 0 total = 0 # Create the confusion matrix cm = np.zeros((n_classes, n_classes)) nTrain = train_iter.nbatches for batch_idx in range(nTrain): all_data = train_iter.next() data = all_data[0] target = all_data[1] data, target = data.transpose((0, 3, 1, 2)), target.transpose( (0, 3, 1, 2)) data, target = torch.from_numpy(data), torch.from_numpy(target) data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) opt.zero_grad() output = net(data) target = target.type(torch.FloatTensor).cuda() _, target_indices = torch.max(target, 1) _, output_indices = torch.max(output, 1) flattened_output = output_indices.view(-1) flattened_target = target_indices.view(-1) if loss_type == 'cce_soft': loss = cce_soft(output, target, ignore_label=ind_ignore) else: loss = loss_function(output, target_indices) cm = confusion_matrix(cm, flattened_output.data.cpu().numpy(), flattened_target.data.cpu().numpy(), n_classes) loss.backward() nn.utils.clip_grad_norm(net.parameters(), max_norm=4) opt.step() train_loss += loss.data[0] _, predicted = torch.max(output.data, 1) total += target.size(0) progress_bar(batch_idx, nTrain, 'Loss: %.3f' % (train_loss / (batch_idx + 1))) del (output) del (loss) del (flattened_output) del (output_indices) jaccard_per_class, jaccard, accuracy = compute_metrics(cm) metrics_string = print_metrics(train_loss, nTrain, n_classes, jaccard_per_class, jaccard, accuracy) print(metrics_string) return jaccard, jaccard_per_class, accuracy, train_loss / (nTrain)
def eval(self): # compute the metrics on the provided dataset with the provided networks measures = [] metrics = {} metrics_list = [ 'Abs rel', 'Sq rel', 'RMSE', 'log RMSE', 's1', 's2', 's3' ] MSELoss = nn.MSELoss() print( 'Starting evaluation on datasets: ', functools.reduce(lambda s1, s2: s1['path'] + ', ' + s2['path'], self.dataset_paths)) for idx, (tensorImage, disparities, masks, imageNetTensor, dataset_ids) in enumerate(tqdm(self.data_loader)): tensorImage = tensorImage.to(device, non_blocking=True) disparities = disparities.to(device, non_blocking=True) masks = masks.to(device, non_blocking=True) N = tensorImage.size()[2] * tensorImage.size()[3] # pretrained networks from 3D KBE were trained with image normalized between 0 and 1 if self.eval_pretrained: tensorImage = (tensorImage + 1) / 2 tensorResized = resize_image(tensorImage) tensorDisparity = self.moduleDisparity( tensorResized, self.moduleSemantics(tensorResized)) # depth estimation tensorDisparity = self.moduleRefine( tensorImage, tensorDisparity) # increase resolution tensorDisparity = F.threshold(tensorDisparity, threshold=0.0, value=0.0) masks = masks.clamp(0, 1) measures.append( np.array(compute_metrics(tensorDisparity, disparities, masks))) measures = np.array(measures).mean(axis=0) for i, name in enumerate(metrics_list): metrics[name] = measures[i] return metrics
# Save output images, one at a time, to results img_tensor = inputs.detach().cpu() output_tensor = outputs.detach().cpu() label_tensor = labels.detach().cpu() # Extract each tensor within batch and save results for iii, sample_batched in enumerate( zip(img_tensor, output_tensor, label_tensor)): img, output, label = sample_batched pred = torch.max(output, 0)[1].float() # print('pred:', pred.shape, pred.dtype, pred.min(), pred.max()) # print('label:', label.shape, label.dtype, label.min(), label.max()) iou, tp, tn, fp, fn = utils.compute_metrics(pred, label.squeeze(0)) running_iou.append(iou) running_tp.append(tp) running_tn.append(tn) running_fp.append(fp) running_fn.append(fn) # Write the data into a csv file with open(os.path.join(results_dir, csv_filename), 'a', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=field_names, delimiter=',') row_data = [((ii * config.eval.batchSize) + iii), iou, tp, tn, fp, fn] writer.writerow(dict(zip(field_names, row_data)))
def test_(test_iter, net, experiment_dir_final, loss_function, loss_type, void_labels, save_test_images, n_classes): ckt_names = ['best_jaccard.t7'] for ckt_name in ckt_names: print('Testing checkpoint ' + ckt_name) checkpoint = torch.load( os.path.join(experiment_dir_final, 'checkpoint', ckt_name)) print('Checkpoint loaded for testing...') net.load_state_dict(checkpoint['net']) net.eval() test_loss = 0 total = 0 # Create the confusion matrix cm = np.zeros((n_classes, n_classes)) nTest = test_iter.nbatches for batch_idx in range(nTest): all_data = test_iter.next() data_ = all_data[0] target_ = all_data[1] data, target = data_.transpose((0, 3, 1, 2)), target_.transpose( (0, 3, 1, 2)) data, target = torch.from_numpy(data), torch.from_numpy(target) data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) output = net(data) target = target.type(torch.LongTensor).cuda() _, target_indices = torch.max(target, 1) _, output_indices = torch.max(output, 1) flattened_output = output_indices.view(-1) flattened_target = target_indices.view(-1) loss = loss_function(output, target_indices) cm = confusion_matrix(cm, flattened_output.data.cpu().numpy(), flattened_target.data.cpu().numpy(), n_classes) test_loss += loss.data[0] _, predicted = torch.max(output.data, 1) total += target.size(0) progress_bar(batch_idx, test_iter.nbatches, 'Test loss: %.3f' % (test_loss / (batch_idx + 1))) if save_test_images: save_images(data_, target_, output, experiment_dir_final, batch_idx, void_labels) del (output) del (loss) del (flattened_output) del (output_indices) jaccard_per_class, jaccard, accuracy = compute_metrics(cm) metrics_string = print_metrics(test_loss, nTest, n_classes, jaccard_per_class, jaccard, accuracy) print(metrics_string)
def val_(val_iter, net, opt, loss_function, loss_type, epoch, es_step, ind_ignore, experiment_dir, max_patience, best_jacc, n_classes): code = 0 net.eval() test_loss = 0 total = 0 # Create the confusion matrix cm = np.zeros((n_classes, n_classes)) nVal = val_iter.nbatches for batch_idx in range(nVal): all_data = val_iter.next() data = all_data[0] target = all_data[1] data, target = data.transpose((0, 3, 1, 2)), target.transpose( (0, 3, 1, 2)) data, target = torch.from_numpy(data), torch.from_numpy(target) data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) output = net(data) target = target.type(torch.FloatTensor).cuda() _, target_indices = torch.max(target, 1) _, output_indices = torch.max(output, 1) flattened_output = output_indices.view(-1) flattened_target = target_indices.view(-1) if loss_type == 'cce_soft': loss = cce_soft(output, target, ignore_label=ind_ignore) else: loss = loss_function(output, target_indices) cm = confusion_matrix(cm, flattened_output.data.cpu().numpy(), flattened_target.data.cpu().numpy(), n_classes) test_loss += loss.data[0] _, predicted = torch.max(output.data, 1) total += target.size(0) progress_bar(batch_idx, val_iter.nbatches, 'Val loss: %.3f' % (test_loss / (batch_idx + 1))) del (output) del (loss) del (flattened_output) del (output_indices) jaccard_per_class, jaccard, accuracy = compute_metrics(cm) metrics_string = print_metrics(test_loss, nVal, n_classes, jaccard_per_class, jaccard, accuracy) print(metrics_string) es_step, best_jacc = save_checkpoints(jaccard, net, epoch, opt, experiment_dir, best_jacc, es_step) # Early stopping if es_step >= max_patience: print('Early stopping! Max mean jaccard: ' + str(best_jacc)) code = 1 return es_step, best_jacc, code, jaccard, jaccard_per_class, accuracy, \ test_loss / (nVal)
def evaluate(self, mode): if mode == 'test': dataset = self.test_dataset elif mode == 'dev': dataset = self.dev_dataset else: raise Exception("Only dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.eval_batch_size) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[3] } inputs['token_type_ids'] = batch[2] outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) preds_reshape = preds.tolist() out_label_ids_reshape = out_label_ids.tolist() class_labels = list( self.model.config.label2id.keys()) # classification 문제에만 들어가는 인자 result = compute_metrics(preds_reshape, out_label_ids_reshape, class_labels) print(result) # results.update(result) # # Slot result # # preds2 = np.argmax(preds, axis=2) # slot_label_map = {i: label for i, label in enumerate(self.label_lst)} # out_label_list = [[] for _ in range(out_label_ids.shape[0])] # preds_list = [[] for _ in range(out_label_ids.shape[0])] # # for i in range(out_label_ids.shape[0]): # for j in range(out_label_ids.shape[1]): # if out_label_ids[i, j] != self.pad_token_label_id: # out_label_list[i].append(slot_label_map[out_label_ids[i][j]]) # preds_list[i].append(slot_label_map[preds[i][j]]) # # result = compute_metrics(out_label_list, preds_list) # results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) return results