def _test(y_pred, y, batch_size): re.reset() assert re._updated is False if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update( (y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) else: re.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert re._type == "multilabel" assert re._updated is True re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute) re1 = Recall(is_multilabel=True, average=True) re2 = Recall(is_multilabel=True, average=False) assert re1._updated is False assert re2._updated is False re1.update((y_pred, y)) re2.update((y_pred, y)) assert re1._updated is True assert re2._updated is True assert re1.compute() == pytest.approx(re2.compute().mean().item()) assert re1._updated is True assert re2._updated is True
def test_compute(): recall = Recall() y_pred = torch.eye(4) y = torch.ones(4).type(torch.LongTensor) recall.update((y_pred, y)) result = list(recall.compute()) assert result[0] == 0.0 assert result[1] == 0.25 assert result[2] == 0.0 assert result[3] == 0.0 recall.reset() y_pred = torch.eye(2) y = torch.ones(2).type(torch.LongTensor) recall.update((y_pred, y)) y = torch.zeros(2).type(torch.LongTensor) recall.update((y_pred, y)) result = list(recall.compute()) assert result[0] == 0.5 assert result[1] == 0.5
def test_incorrect_type(): # Tests changing of type during training def _test(average): re = Recall(average=average) y_pred = torch.softmax(torch.rand(4, 4), dim=1) y = torch.ones(4).long() re.update((y_pred, y)) y_pred = torch.zeros(4, ) y = torch.ones(4).long() with pytest.raises(RuntimeError): re.update((y_pred, y)) _test(average=True) _test(average=False) re1 = Recall(is_multilabel=True, average=True) re2 = Recall(is_multilabel=True, average=False) y_pred = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re1.update((y_pred, y)) re2.update((y_pred, y)) assert re1.compute() == pytest.approx(re2.compute().mean().item())
def test_multilabel_input_NCHW(): def _test(average): re = Recall(average=average, is_multilabel=True) y_pred = torch.randint(0, 2, size=(10, 5, 18, 16)) y = torch.randint(0, 2, size=(10, 5, 18, 16)).long() re.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert re._type == 'multilabel' re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average='samples') == pytest.approx(re_compute) re.reset() y_pred = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert re._type == 'multilabel' re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average='samples') == pytest.approx(re_compute) # Batched Updates re.reset() y_pred = torch.randint(0, 2, size=(100, 5, 12, 14)) y = torch.randint(0, 2, size=(100, 5, 12, 14)).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) np_y = to_numpy_multilabel(y) np_y_pred = to_numpy_multilabel(y_pred) assert re._type == 'multilabel' re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average='samples') == pytest.approx(re_compute) for _ in range(5): _test(average=True) _test(average=False) re1 = Recall(is_multilabel=True, average=True) re2 = Recall(is_multilabel=True, average=False) y_pred = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re1.update((y_pred, y)) re2.update((y_pred, y)) assert re1.compute() == pytest.approx(re2.compute().mean().item())
def test_no_update(): recall = Recall() with pytest.raises(NotComputableError): recall.compute() recall = Recall(is_multilabel=True, average=True) with pytest.raises(NotComputableError): recall.compute()
def test_compute_average(): recall = Recall(average=True) y_pred = torch.eye(4) y = torch.ones(4).type(torch.LongTensor) recall.update((y_pred, y)) assert isinstance(recall.compute(), float) assert recall.compute() == 0.0625
def test_no_update(): recall = Recall() with pytest.raises( NotComputableError, match= r"Recall must have at least one example before it can be computed" ): recall.compute() recall = Recall(is_multilabel=True, average=True) with pytest.raises(NotComputableError): recall.compute()
def _test(average): re = Recall(average=average) y_pred = torch.rand(10, 5, 18, 16) y = torch.randint(0, 5, size=(10, 18, 16)).long() re.update((y_pred, y)) num_classes = y_pred.shape[1] np_y_pred = y_pred.argmax(dim=1).numpy().ravel() np_y = y.numpy().ravel() assert re._type == "multiclass" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = recall_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(re_compute) re.reset() y_pred = torch.rand(10, 7, 20, 12) y = torch.randint(0, 7, size=(10, 20, 12)).long() re.update((y_pred, y)) num_classes = y_pred.shape[1] np_y_pred = y_pred.argmax(dim=1).numpy().ravel() np_y = y.numpy().ravel() assert re._type == "multiclass" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = recall_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(re_compute) # Batched Updates re.reset() y_pred = torch.rand(100, 10, 12, 14) y = torch.randint(0, 10, size=(100, 12, 14)).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) num_classes = y_pred.shape[1] np_y = y.numpy().ravel() np_y_pred = y_pred.argmax(dim=1).numpy().ravel() assert re._type == "multiclass" assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = recall_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(re_compute)
def _test(average): re = Recall(average=average) y_pred = torch.rand(20, 6) y = torch.randint(0, 5, size=(20, )).type(torch.LongTensor) re.update((y_pred, y)) np_y_pred = y_pred.numpy().argmax(axis=1).ravel() np_y = y.numpy().ravel() assert re._type == 'multiclass' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sklearn_average_parameter = 'macro' if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score( np_y, np_y_pred, average=sklearn_average_parameter) == pytest.approx(re_compute) re.reset() y_pred = torch.rand(10, 4) y = torch.randint(0, 3, size=(10, 1)).type(torch.LongTensor) re.update((y_pred, y)) np_y_pred = y_pred.numpy().argmax(axis=1).ravel() np_y = y.numpy().ravel() assert re._type == 'multiclass' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sklearn_average_parameter = 'macro' if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score( np_y, np_y_pred, average=sklearn_average_parameter) == pytest.approx(re_compute) # 2-classes re.reset() y_pred = torch.rand(10, 2) y = torch.randint(0, 2, size=(10, 1)).type(torch.LongTensor) re.update((y_pred, y)) np_y_pred = y_pred.numpy().argmax(axis=1).ravel() np_y = y.numpy().ravel() assert re._type == 'multiclass' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() sklearn_average_parameter = 'macro' if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score( np_y, np_y_pred, average=sklearn_average_parameter) == pytest.approx(re_compute)
def test_binary_shapes(): recall = Recall(average=True) y = torch.LongTensor([1, 0]) y_pred = torch.FloatTensor([0.9, 0.2]) y_pred = y_pred.unsqueeze(1) indices = torch.max(torch.cat([1.0 - y_pred, y_pred], dim=1), dim=1)[1] recall.update((y_pred, y)) assert recall.compute() == pytest.approx( recall_score(y.data.numpy(), indices.data.numpy(), average='macro')) assert recall.compute() == 1.0 y = torch.LongTensor([[1], [0]]) y_pred = torch.FloatTensor([[0.9], [0.2]]) indices = torch.max(torch.cat([1.0 - y_pred, y_pred], dim=1), dim=1)[1] recall.reset() recall.update((y_pred, y)) assert recall.compute() == pytest.approx( recall_score(y.data.numpy(), indices.data.numpy(), average='macro')) assert recall.compute() == 1.0
def test_compute_all_wrong(): recall = Recall() y_pred = torch.FloatTensor([[1.0, 0.0], [1.0, 0.0]]) y = torch.ones(2).type(torch.LongTensor) recall.update((y_pred, y)) result = list(recall.compute()) assert result[0] == 0.0 assert result[1] == 0.0
def _test(average): re = Recall(average=average) y_pred = torch.randint(0, 2, size=(10, 12, 10)) y = torch.randint(0, 2, size=(10, 12, 10)).type(torch.LongTensor) re.update((y_pred, y)) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert re._type == 'binary' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average='binary') == pytest.approx(re_compute) re.reset() y_pred = torch.randint(0, 2, size=(10, 1, 12, 10)) y = torch.randint(0, 2, size=(10, 1, 12, 10)).type(torch.LongTensor) re.update((y_pred, y)) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert re._type == 'binary' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average='binary') == pytest.approx(re_compute) re = Recall(average=average) # Batched Updates re.reset() y_pred = torch.randint(0, 2, size=(100, 12, 10)) y = torch.randint(0, 2, size=(100, 1, 12, 10)).type(torch.LongTensor) batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert re._type == 'binary' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average='binary') == pytest.approx(re_compute)
def _test(average): re = Recall(average=average) # TODO: y_pred should be binary after 0.1.2 release # y_pred = torch.randint(0, 2, size=(10, 12, 10)).type(torch.LongTensor) y_pred = torch.rand(10, 12, 10) y = torch.randint(0, 2, size=(10, 12, 10)).type(torch.LongTensor) re.update((y_pred, y)) np_y = y.numpy().ravel() # np_y_pred = y_pred.numpy().ravel() np_y_pred = (y_pred.numpy().ravel() > 0.5).astype('int') assert re._type == 'binary' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average='binary') == pytest.approx(re_compute) re.reset() # TODO: y_pred should be binary after 0.1.2 release # y_pred = torch.randint(0, 2, size=(10, 1, 12, 10)).type(torch.LongTensor) y_pred = torch.rand(10, 1, 12, 10) y = torch.randint(0, 2, size=(10, 1, 12, 10)).type(torch.LongTensor) re.update((y_pred, y)) np_y = y.numpy().ravel() # np_y_pred = y_pred.numpy().ravel() np_y_pred = (y_pred.numpy().ravel() > 0.5).astype('int') assert re._type == 'binary' assert isinstance(re.compute(), float if average else torch.Tensor) re_compute = re.compute() if average else re.compute().numpy() assert recall_score(np_y, np_y_pred, average='binary') == pytest.approx(re_compute)
def test_predict(model,dataloader_test,use_cuda): if use_cuda: model = model.cuda() precision = Precision() recall = Recall() f1 = Fbeta(beta=1.0, average=True, precision=precision, recall=recall) for i,(img, label) in enumerate(dataloader_test): img, labels = Variable(img),Variable(label) if use_cuda: img = img.cuda() label = label.cuda() pred = model(img) _,my_label = torch.max(label, dim=1) precision.update((pred, my_label)) recall.update((pred, my_label)) f1.update((pred, my_label)) precision.compute() recall.compute() print("\tF1 Score: {:0.2f}".format(f1.compute()*100))
class FbetaScore(Metric): def __init__( self, beta: int = 1, output_transform: Callable = lambda x: x, average: str = "macro", is_multilabel: bool = False, device: Optional[Union[str, torch.device]] = None, ): self._beta = beta self._average = average _average_flag = self._average != "macro" self._precision = Precision( output_transform=output_transform, average=_average_flag, is_multilabel=is_multilabel, device=device, ) self._recall = Recall( output_transform=output_transform, average=_average_flag, is_multilabel=is_multilabel, device=device, ) super(FbetaScore, self).__init__( output_transform=output_transform, device=device ) @reinit__is_reduced def reset(self) -> None: self._precision.reset() self._recall.reset() def compute(self) -> torch.Tensor: precision_val = self._precision.compute() recall_val = self._recall.compute() fbeta_val = ( (1.0 + self._beta ** 2) * precision_val * recall_val / (self._beta ** 2 * precision_val + recall_val + 1e-15) ) if self._average == "macro": fbeta_val = torch.mean(fbeta_val).item() return fbeta_val @reinit__is_reduced def update(self, output: Sequence[torch.Tensor]) -> None: self._precision.update(output) self._recall.update(output)
def _test(average, n_epochs, metric_device): n_iters = 60 s = 16 n_classes = 7 offset = n_iters * s y_true = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) y_preds = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, ...], y_true[i * s + rank * offset:(i + 1) * s + rank * offset, ...], ) engine = Engine(update) re = Recall(average=average, is_multilabel=True, device=metric_device) re.attach(engine, "re") assert re._updated is False data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "re" in engine.state.metrics assert re._updated is True res = engine.state.metrics["re"] res2 = re.compute() if isinstance(res, torch.Tensor): res = res.cpu().numpy() res2 = res2.cpu().numpy() assert (res == res2).all() else: assert res == res2 np_y_preds = to_numpy_multilabel(y_preds) np_y_true = to_numpy_multilabel(y_true) assert re._type == "multilabel" res = res if average else res.mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y_true, np_y_preds, average="samples") == pytest.approx(res)
def evaluate_epoch(eval_dl, model, criterion, epoch, writer): """ evaluation in a epoch Args: eval_dl (DataLoader): DataLoader of validation set model (nn.Module): model in PyTorch criterion (loss): PyTorch loss epoch (int): epoch number writer (SummaryWriter): instance of SummaryWriter for TensorBoard Returns: """ print('\neval epoch {}'.format(epoch)) device = next(model.parameters()).device model.eval() recall = Recall(lambda x: (x[0], x[1])) precision = Precision(lambda x: (x[0], x[1])) mean_recall = [] mean_precision = [] mean_loss = [] with torch.no_grad(): for idx, (inputs, targets) in enumerate(eval_dl): inputs, targets = inputs.to(device), targets.to(device) outputs = model(inputs) loss = criterion(outputs, targets) preds = outputs.argmax(1) precision.update((preds, targets)) recall.update((preds, targets)) mean_loss.append(loss.item()) mean_recall.append(recall.compute().item()) mean_precision.append(precision.compute().item()) # print('val-epoch:{} [{}/{}], loss: {:5.3}'.format(epoch, idx + 1, len(dataloader), loss.item())) writer.add_scalar('test/loss', loss.item(), len(eval_dl) * epoch + idx) mean_precision, mean_recall = np.array(mean_precision).mean(), np.array(mean_recall).mean() f1 = mean_precision * mean_recall * 2 / (mean_precision + mean_recall + 1e-20) print('precision: {:07.5}, recall: {:07.5}, f1: {:07.5}\n'.format(mean_precision, mean_recall, f1)) writer.add_scalar('test/epoch-loss', np.array(mean_loss).mean(), epoch) writer.add_scalar('test/f1', f1, epoch) writer.add_scalar('test/precision', mean_precision, epoch) writer.add_scalar('test/recall', mean_recall, epoch)
def test_ner_example(): recall = Recall() y = torch.Tensor([[0, 1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2, 2]]).type(torch.LongTensor) y_pred = torch.softmax(torch.rand(2, 3, 8), dim=1) indices = torch.max(y_pred, dim=1)[1] y_pred_labels = list(set(indices.view(-1).tolist())) recall_sk = recall_score(y.view(-1).data.numpy(), indices.view(-1).data.numpy(), labels=y_pred_labels, average=None) recall.update((y_pred, y)) recall_ig = recall.compute().tolist() recall_ig = [recall_ig[i] for i in y_pred_labels] assert all([a == pytest.approx(b) for a, b in zip(recall_sk, recall_ig)])
def evalidation(epoch, dataloader, model, criterion, device, writer, tb_test_imgs): print('\neval epoch {}'.format(epoch)) model.eval() recall = Recall(lambda x: (x[0], x[1])) precision = Precision(lambda x: (x[0], x[1])) mean_recall = [] mean_precision = [] mean_loss = [] with torch.no_grad(): for idx, (pre_img, post_img, targets) in enumerate(dataloader): pre_img, post_img, targets = pre_img.to(device), post_img.to( device), targets.to(device) outputs = model(pre_img, post_img) loss = criterion(outputs, targets) preds = outputs.argmax(1) precision.update((preds, targets)) recall.update((preds, targets)) mean_loss.append(loss.item()) mean_recall.append(recall.compute().item()) mean_precision.append(precision.compute().item()) # print('val-epoch:{} [{}/{}], loss: {:5.3}'.format(epoch, idx + 1, len(dataloader), loss.item())) writer.add_scalar('test/loss', loss.item(), len(dataloader) * epoch + idx) if idx < tb_test_imgs: writer.add_image('test/pre', pre_img[0], idx) writer.add_image('test/post', post_img[0], idx) writer.add_image('test/label', label[0], idx) writer.add_image('test/pred', preds, idx) mean_precision, mean_recall = np.array(mean_precision).mean(), np.array( mean_recall).mean() f1 = mean_precision * mean_recall * 2 / (mean_precision + mean_recall + 1e-20) print('precision: {:07.5}, recall: {:07.5}, f1: {:07.5}\n'.format( mean_precision, mean_recall, f1)) writer.add_scalar('test/epoch-loss', np.array(mean_loss).mean(), epoch) writer.add_scalar('test/f1', f1, epoch) writer.add_scalar('test/precision', mean_precision, epoch) writer.add_scalar('test/recall', mean_recall, epoch)
def test_sklearn_compute(): recall = Recall(average=False) y = torch.Tensor(range(5)).type(torch.LongTensor) y_pred = torch.softmax(torch.rand(5, 5), dim=1) indices = torch.max(y_pred, dim=1)[1] recall.update((y_pred, y)) y_pred_labels = list(set(indices.tolist())) recall_sk = recall_score(y.data.numpy(), indices.data.numpy(), labels=y_pred_labels, average=None) recall_ig = recall.compute().tolist() recall_ig = [recall_ig[i] for i in y_pred_labels] assert all([a == pytest.approx(b) for a, b in zip(recall_sk, recall_ig)])
def _test(average): re = Recall(average=average, is_multilabel=True) y_pred = torch.randint(0, 2, size=(10, 5, 10)) y = torch.randint(0, 2, size=(10, 5, 10)).long() re.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert re._type == "multilabel" re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute) re.reset() y_pred = torch.randint(0, 2, size=(15, 4, 10)) y = torch.randint(0, 2, size=(15, 4, 10)).long() re.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert re._type == "multilabel" re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute) # Batched Updates re.reset() y_pred = torch.randint(0, 2, size=(100, 4, 12)) y = torch.randint(0, 2, size=(100, 4, 12)).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size re.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) np_y = to_numpy_multilabel(y) np_y_pred = to_numpy_multilabel(y_pred) assert re._type == "multilabel" re_compute = re.compute() if average else re.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y, np_y_pred, average="samples") == pytest.approx(re_compute)
def _test(average, n_epochs): n_iters = 60 s = 16 n_classes = 7 offset = n_iters * s y_true = torch.randint(0, 2, size=(offset * dist.get_world_size(), n_classes, 6, 8)).to(device) y_preds = torch.randint(0, 2, size=(offset * dist.get_world_size(), n_classes, 6, 8)).to(device) def update(engine, i): return y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, ...], \ y_true[i * s + rank * offset:(i + 1) * s + rank * offset, ...] engine = Engine(update) re = Recall(average=average, is_multilabel=True, device=device) re.attach(engine, "re") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "re" in engine.state.metrics res = engine.state.metrics['re'] res2 = re.compute() if isinstance(res, torch.Tensor): res = res.cpu().numpy() res2 = res2.cpu().numpy() assert (res == res2).all() else: assert res == res2 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) true_res = recall_score(to_numpy_multilabel(y_true), to_numpy_multilabel(y_preds), average='samples' if average else None) assert pytest.approx(res) == true_res
def _test_distrib_integration_multilabel(device): from ignite.engine import Engine rank = idist.get_rank() torch.manual_seed(12) def _test(average, n_epochs, metric_device): n_iters = 60 s = 16 n_classes = 7 offset = n_iters * s y_true = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) y_preds = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, ...], y_true[i * s + rank * offset:(i + 1) * s + rank * offset, ...], ) engine = Engine(update) re = Recall(average=average, is_multilabel=True, device=metric_device) re.attach(engine, "re") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "re" in engine.state.metrics res = engine.state.metrics["re"] res2 = re.compute() if isinstance(res, torch.Tensor): res = res.cpu().numpy() res2 = res2.cpu().numpy() assert (res == res2).all() else: assert res == res2 np_y_preds = to_numpy_multilabel(y_preds) np_y_true = to_numpy_multilabel(y_true) assert re._type == "multilabel" res = res if average else res.mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert recall_score(np_y_true, np_y_preds, average="samples") == pytest.approx(res) metric_devices = ["cpu"] if device.type != "xla": metric_devices.append(idist.device()) for _ in range(2): for metric_device in metric_devices: _test(average=True, n_epochs=1, metric_device=metric_device) _test(average=True, n_epochs=2, metric_device=metric_device) _test(average=False, n_epochs=1, metric_device=metric_device) _test(average=False, n_epochs=2, metric_device=metric_device) re1 = Recall(is_multilabel=True, average=True) re2 = Recall(is_multilabel=True, average=False) y_pred = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() re1.update((y_pred, y)) re2.update((y_pred, y)) assert re1.compute() == pytest.approx(re2.compute().mean().item())
def train_predict(dataloader_train,dataloader_val,model,epochs,learning_rate,use_cuda): start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) if use_cuda: model = model.cuda() model = model.train() start.record() train_loss_list=[] val_loss_list=[] train_f1=[] val_f1=[] loss_fn = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) precision = Precision() recall = Recall() f1 = Fbeta(beta=1.0, average=True, precision=precision, recall=recall) for epoch in range(epochs): print("Epoch: {}".format(epoch+1)) for i,(img, label) in enumerate(dataloader_train): img, label = Variable(img),Variable(label) if use_cuda: img = img.cuda() label = label.cuda() optimizer.zero_grad() pred = model.forward(img) _,my_label = torch.max(label, dim=1) loss = loss_fn(pred,my_label) if i == len(dataloader_train)-1: train_loss_list.append(loss.item()) loss.backward() optimizer.step() precision.update((pred, my_label)) recall.update((pred, my_label)) f1.update((pred, my_label)) print("\tTrain loss: {:0.2f}".format(train_loss_list[-1])) precision.compute() recall.compute() train_f1.append(f1.compute()*100) print("\tTrain F1 Score: {:0.2f}%".format(train_f1[-1])) precision = Precision() recall = Recall() f1 = Fbeta(beta=1.0, average=True, precision=precision, recall=recall) with torch.no_grad(): for i,(img, label) in enumerate(dataloader_val): img, labels = Variable(img),Variable(label) if use_cuda: img = img.cuda() label = label.cuda() pred = model(img) _,my_label = torch.max(label, dim=1) loss = loss_fn(pred,my_label) if i == len(dataloader_val)-1: val_loss_list.append(loss.item()) precision.update((pred, my_label)) recall.update((pred, my_label)) f1.update((pred, my_label)) print("\n\tVal loss: {:0.2f}".format(val_loss_list[-1])) precision.compute() recall.compute() val_f1.append(f1.compute()*100) print("\tVal F1 Score: {:0.2f}%".format(val_f1[-1])) end.record() torch.cuda.synchronize() time = start.elapsed_time(end) return (train_loss_list,val_loss_list,train_f1,val_f1,time,model)
def _test_distrib_itegration_multilabel(device): import torch.distributed as dist from ignite.engine import Engine rank = dist.get_rank() torch.manual_seed(12) def _test(average, n_epochs): n_iters = 60 s = 16 n_classes = 7 offset = n_iters * s y_true = torch.randint(0, 2, size=(offset * dist.get_world_size(), n_classes, 6, 8)).to(device) y_preds = torch.randint(0, 2, size=(offset * dist.get_world_size(), n_classes, 6, 8)).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, ...], y_true[i * s + rank * offset:(i + 1) * s + rank * offset, ...], ) engine = Engine(update) re = Recall(average=average, is_multilabel=True, device=device) re.attach(engine, "re") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "re" in engine.state.metrics res = engine.state.metrics["re"] res2 = re.compute() if isinstance(res, torch.Tensor): res = res.cpu().numpy() res2 = res2.cpu().numpy() assert (res == res2).all() else: assert res == res2 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) true_res = recall_score(to_numpy_multilabel(y_true), to_numpy_multilabel(y_preds), average="samples" if average else None) assert pytest.approx(res) == true_res for _ in range(2): _test(average=True, n_epochs=1) _test(average=True, n_epochs=2) with pytest.warns( RuntimeWarning, match= "Precision/Recall metrics do not work in distributed setting when " "average=False and is_multilabel=True", ): re = Recall(average=False, is_multilabel=True, device=device) y_pred = torch.randint(0, 2, size=(4, 3, 6, 8)) y = torch.randint(0, 2, size=(4, 3, 6, 8)).long() re.update((y_pred, y)) re_compute1 = re.compute() re_compute2 = re.compute() assert len(re_compute1) == 4 * 6 * 8 assert (re_compute1 == re_compute2).all()
def test_no_update(): recall = Recall() with pytest.raises(NotComputableError): recall.compute()