def test_cohen_kappa_all_weights_with_output_transform(weights): np.random.seed(1) size = 100 np_y_pred = np.random.randint(0, 2, size=(size, 1), dtype=np.long) np_y = np.zeros((size, ), dtype=np.long) np_y[size // 2:] = 1 np.random.shuffle(np_y) ck_value_sk = cohen_kappa_score(np_y, np_y_pred) batch_size = 10 def update_fn(engine, batch): idx = (engine.state.iteration - 1) * batch_size y_true_batch = np_y[idx:idx + batch_size] y_pred_batch = np_y_pred[idx:idx + batch_size] return idx, torch.from_numpy(y_pred_batch), torch.from_numpy( y_true_batch) engine = Engine(update_fn) ck_metric = CohenKappa(output_transform=lambda x: (x[1], x[2]), weights=weights) ck_metric.attach(engine, "cohen_kappa") data = list(range(size // batch_size)) ck_value = engine.run(data, max_epochs=1).metrics["cohen_kappa"] assert ck_value == pytest.approx(ck_value_sk)
def _test(n_epochs, metric_device): metric_device = torch.device(metric_device) n_iters = 80 s = 16 n_classes = 2 offset = n_iters * s y_true = torch.randint(0, n_classes, size=(offset * idist.get_world_size(),)).to(device) y_preds = torch.randint(0, n_classes, size=(offset * idist.get_world_size(),)).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset : (i + 1) * s + rank * offset], y_true[i * s + rank * offset : (i + 1) * s + rank * offset], ) engine = Engine(update) ck = CohenKappa(device=metric_device) ck.attach(engine, "ck") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "ck" in engine.state.metrics res = engine.state.metrics["ck"] if isinstance(res, torch.Tensor): res = res.cpu().numpy() true_res = cohen_kappa_score(y_true.cpu().numpy(), y_preds.cpu().numpy()) assert pytest.approx(res) == true_res
def test_no_update(): ck = CohenKappa() with pytest.raises( NotComputableError, match=r"EpochMetric must have at least one example before it can be computed" ): ck.compute()
def _test(y_pred, y, n_iters, metric_device): metric_device = torch.device(metric_device) ck = CohenKappa(device=metric_device) torch.manual_seed(10 + rank) ck.reset() ck.update((y_pred, y)) if n_iters > 1: batch_size = y.shape[0] // n_iters + 1 for i in range(n_iters): idx = i * batch_size ck.update( (y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) # gather y_pred, y y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) np_y = y.cpu().numpy() np_y_pred = y_pred.cpu().numpy() res = ck.compute() assert isinstance(res, float) assert cohen_kappa_score(np_y, np_y_pred) == pytest.approx(res)
def test_check_shape(): ck = CohenKappa() with pytest.raises(ValueError, match=r"Predictions should be of shape"): ck._check_shape((torch.tensor(0), torch.tensor(0))) with pytest.raises(ValueError, match=r"Predictions should be of shape"): ck._check_shape((torch.rand(4, 3, 1), torch.rand(4, 3))) with pytest.raises(ValueError, match=r"Targets should be of shape"): ck._check_shape((torch.rand(4, 3), torch.rand(4, 3, 1)))
def test_check_shape(): ck = CohenKappa() with pytest.raises(ValueError, match=r"Predictions should be of shape"): ck._check_shape((torch.randint(0, 2, size=(10, 1, 5, 12)).long(), torch.randint(0, 2, size=(10, 5, 6)).long())) with pytest.raises(ValueError, match=r"Predictions should be of shape"): ck._check_shape((torch.randint(0, 2, size=(10, 1, 6)).long(), torch.randint(0, 2, size=(10, 5, 6)).long())) with pytest.raises(ValueError, match=r"Targets should be of shape"): ck._check_shape((torch.randint(0, 2, size=(10, 1)).long(), torch.randint(0, 2, size=(10, 5, 2)).long()))
def _test(metric_device): metric_device = torch.device(metric_device) ck_metric = CohenKappa(device=metric_device) torch.manual_seed(10 + rank) y_pred = torch.randint(0, 2, size=(100, 1), device=device) y = torch.randint(0, 2, size=(100, 1), device=device) ck_metric.update((y_pred, y)) # gather y_pred, y y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) np_y_pred = y_pred.cpu().numpy() np_y = y.cpu().numpy() np_ck = cohen_kappa_score(np_y, np_y_pred) res = ck_metric.compute() assert res == pytest.approx(np_ck)
def _test(y_pred, y, batch_size): def update_fn(engine, batch): idx = (engine.state.iteration - 1) * batch_size y_true_batch = np_y[idx : idx + batch_size] y_pred_batch = np_y_pred[idx : idx + batch_size] return idx, torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch) engine = Engine(update_fn) ck_metric = CohenKappa(output_transform=lambda x: (x[1], x[2]), weights=weights) ck_metric.attach(engine, "ck") np_y = y.numpy() np_y_pred = y_pred.numpy() np_ck = cohen_kappa_score(np_y, np_y_pred, weights=weights) data = list(range(y_pred.shape[0] // batch_size)) ck = engine.run(data, max_epochs=1).metrics["ck"] assert isinstance(ck, float) assert np_ck == pytest.approx(ck)
def test_binary_input_N(weights): ck = CohenKappa(weights) def _test(y_pred, y, n_iters): ck.reset() ck.update((y_pred, y)) np_y = y.numpy() np_y_pred = y_pred.numpy() if n_iters > 1: batch_size = y.shape[0] // n_iters + 1 for i in range(n_iters): idx = i * batch_size ck.update( (y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) res = ck.compute() assert isinstance(res, float) assert cohen_kappa_score(np_y, np_y_pred, weights=weights) == pytest.approx(res) def get_test_cases(): test_cases = [ (torch.randint(0, 2, size=(10, )).long(), torch.randint(0, 2, size=(10, )).long(), 1), (torch.randint(0, 2, size=(100, )).long(), torch.randint(0, 2, size=(100, )).long(), 1), (torch.randint(0, 2, size=(10, 1)).long(), torch.randint(0, 2, size=(10, 1)).long(), 1), (torch.randint(0, 2, size=(100, 1)).long(), torch.randint(0, 2, size=(100, 1)).long(), 1), # updated batches (torch.randint(0, 2, size=(10, )).long(), torch.randint(0, 2, size=(10, )).long(), 16), (torch.randint(0, 2, size=(100, )).long(), torch.randint(0, 2, size=(100, )).long(), 16), (torch.randint(0, 2, size=(10, 1)).long(), torch.randint(0, 2, size=(10, 1)).long(), 16), (torch.randint(0, 2, size=(100, 1)).long(), torch.randint(0, 2, size=(100, 1)).long(), 16), ] return test_cases for _ in range(10): # check multiple random inputs as random exact occurencies are rare test_cases = get_test_cases() for y_pred, y, n_iters in test_cases: _test(y_pred, y, n_iters)
def test_input_types(): ck = CohenKappa() ck.reset() output1 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3), dtype=torch.long)) ck.update(output1) with pytest.raises( ValueError, match= r"Incoherent types between input y_pred and stored predictions"): ck.update((torch.randint(0, 5, size=(4, 3)), torch.randint(0, 2, size=(4, 3)))) with pytest.raises( ValueError, match=r"Incoherent types between input y and stored targets"): ck.update((torch.rand(4, 3), torch.randint(0, 2, size=(4, 3)).to(torch.int32)))
def test_cohen_kappa_all_weights(weights): size = 100 np_y_pred = np.random.randint(0, 2, size=(size, 1), dtype=np.long) np_y = np.random.randint(0, 2, size=(size, 1), dtype=np.long) np_ck = cohen_kappa_score(np_y, np_y_pred) ck_metric = CohenKappa(weights=weights) y_pred = torch.from_numpy(np_y_pred) y = torch.from_numpy(np_y) ck_metric.reset() ck_metric.update((y_pred, y)) ck = ck_metric.compute() assert ck == pytest.approx(np_ck)
def test_cohen_kappa_wrong_weights_type(): with pytest.raises(ValueError, match=r"Kappa Weighting type must be"): ck = CohenKappa(weights=7) with pytest.raises(ValueError, match=r"Kappa Weighting type must be"): ck = CohenKappa(weights="dd")
def test_no_sklearn(mock_no_sklearn): with pytest.raises( RuntimeError, match=r"This contrib module requires sklearn to be installed."): CohenKappa()
def test_multilabel_inputs(): ck = CohenKappa() with pytest.raises(ValueError, match=r"multilabel-indicator is not supported"): ck.reset() ck.update((torch.randint(0, 2, size=(10, 4)).long(), torch.randint(0, 2, size=(10, 4)).long())) ck.compute() with pytest.raises(ValueError, match=r"multilabel-indicator is not supported"): ck.reset() ck.update((torch.randint(0, 2, size=(10, 6)).long(), torch.randint(0, 2, size=(10, 6)).long())) ck.compute() with pytest.raises(ValueError, match=r"multilabel-indicator is not supported"): ck.reset() ck.update((torch.randint(0, 2, size=(10, 8)).long(), torch.randint(0, 2, size=(10, 8)).long())) ck.compute()