def test_check_compute_fn(): y_pred = torch.zeros((8, 13)) y_pred[:, 1] = 1 y_true = torch.zeros_like(y_pred) output = (y_pred, y_true) em = ROC_AUC(check_compute_fn=True) em.reset() with pytest.warns(EpochMetricWarning, match=r"Probably, there can be a problem with `compute_fn`"): em.update(output) em = ROC_AUC(check_compute_fn=False) em.update(output)
def test_no_update(): roc_auc = ROC_AUC() with pytest.raises( NotComputableError, match=r"EpochMetric must have at least one example before it can be computed" ): roc_auc.compute()
def _test(y_pred, y, batch_size, metric_device): metric_device = torch.device(metric_device) roc_auc = ROC_AUC(device=metric_device) torch.manual_seed(10 + rank) roc_auc.reset() if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size roc_auc.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) else: roc_auc.update((y_pred, y)) # gather y_pred, y y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) np_y = y.cpu().numpy() np_y_pred = y_pred.cpu().numpy() res = roc_auc.compute() assert isinstance(res, float) assert roc_auc_score(np_y, np_y_pred) == pytest.approx(res)
def test_input_types(): roc_auc = ROC_AUC() roc_auc.reset() output1 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3), dtype=torch.long)) roc_auc.update(output1) with pytest.raises( ValueError, match= r"Incoherent types between input y_pred and stored predictions"): roc_auc.update( (torch.randint(0, 5, size=(4, 3)), torch.randint(0, 2, size=(4, 3)))) with pytest.raises( ValueError, match=r"Incoherent types between input y and stored targets"): roc_auc.update( (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3)).to(torch.int32))) with pytest.raises( ValueError, match= r"Incoherent types between input y_pred and stored predictions"): roc_auc.update((torch.randint(0, 2, size=(10, )).long(), torch.randint(0, 2, size=(10, 5)).long()))
def test_roc_auc_score_2(): np.random.seed(1) size = 100 np_y_pred = np.random.rand(size, 1) np_y = np.zeros((size, ), dtype=np.long) np_y[size // 2:] = 1 np.random.shuffle(np_y) np_roc_auc = roc_auc_score(np_y, np_y_pred) roc_auc_metric = ROC_AUC() y_pred = torch.from_numpy(np_y_pred) y = torch.from_numpy(np_y) roc_auc_metric.reset() n_iters = 10 batch_size = size // n_iters for i in range(n_iters): idx = i * batch_size roc_auc_metric.update( (y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) roc_auc = roc_auc_metric.compute() assert roc_auc == np_roc_auc
def test_integration_roc_auc_score_with_activated_output_transform(): np.random.seed(1) size = 100 np_y_pred = np.random.rand(size, 1) np_y_pred_sigmoid = torch.sigmoid(torch.from_numpy(np_y_pred)).numpy() np_y = np.zeros((size, ), dtype=np.long) np_y[size // 2:] = 1 np.random.shuffle(np_y) np_roc_auc = roc_auc_score(np_y, np_y_pred_sigmoid) batch_size = 10 def update_fn(engine, batch): idx = (engine.state.iteration - 1) * batch_size y_true_batch = np_y[idx:idx + batch_size] y_pred_batch = np_y_pred[idx:idx + batch_size] return idx, torch.from_numpy(y_pred_batch), torch.from_numpy( y_true_batch) engine = Engine(update_fn) roc_auc_metric = ROC_AUC( output_transform=lambda x: (torch.sigmoid(x[1]), x[2])) roc_auc_metric.attach(engine, 'roc_auc') data = list(range(size // batch_size)) roc_auc = engine.run(data, max_epochs=1).metrics['roc_auc'] assert roc_auc == np_roc_auc
def test_check_shape(): roc_auc = ROC_AUC() with pytest.raises(ValueError, match=r"Predictions should be of shape"): roc_auc._check_shape((torch.tensor(0), torch.tensor(0))) with pytest.raises(ValueError, match=r"Predictions should be of shape"): roc_auc._check_shape((torch.rand(4, 3, 1), torch.rand(4, 3))) with pytest.raises(ValueError, match=r"Targets should be of shape"): roc_auc._check_shape((torch.rand(4, 3), torch.rand(4, 3, 1)))
def test_binary_and_multilabel_inputs(): roc_auc = ROC_AUC() def _test(y_pred, y, batch_size): roc_auc.reset() if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size roc_auc.update( (y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) else: roc_auc.update((y_pred, y)) np_y = y.numpy() np_y_pred = y_pred.numpy() res = roc_auc.compute() assert isinstance(res, float) assert roc_auc_score(np_y, np_y_pred) == pytest.approx(res) def get_test_cases(): test_cases = [ # Binary input data of shape (N,) or (N, 1) (torch.randint(0, 2, size=(50, )).long(), torch.randint(0, 2, size=(50, )).long(), 1), (torch.randint(0, 2, size=(50, 1)).long(), torch.randint(0, 2, size=(50, 1)).long(), 1), # updated batches (torch.randint(0, 2, size=(50, )).long(), torch.randint(0, 2, size=(50, )).long(), 16), (torch.randint(0, 2, size=(50, 1)).long(), torch.randint(0, 2, size=(50, 1)).long(), 16), # Binary input data of shape (N, L) (torch.randint(0, 2, size=(50, 4)).long(), torch.randint(0, 2, size=(50, 4)).long(), 1), (torch.randint(0, 2, size=(50, 7)).long(), torch.randint(0, 2, size=(50, 7)).long(), 1), # updated batches (torch.randint(0, 2, size=(50, 4)).long(), torch.randint(0, 2, size=(50, 4)).long(), 16), (torch.randint(0, 2, size=(50, 7)).long(), torch.randint(0, 2, size=(50, 7)).long(), 16), ] return test_cases for _ in range(5): test_cases = get_test_cases() # check multiple random inputs as random exact occurencies are rare for y_pred, y, batch_size in test_cases: _test(y_pred, y, batch_size)
def _test(y_preds, y_true, n_epochs, metric_device, update_fn): metric_device = torch.device(metric_device) engine = Engine(update_fn) roc_auc = ROC_AUC(device=metric_device) roc_auc.attach(engine, "roc_auc") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "roc_auc" in engine.state.metrics res = engine.state.metrics["roc_auc"] true_res = roc_auc_score(y_true.cpu().numpy(), y_preds.cpu().numpy()) assert pytest.approx(res) == true_res
def test_roc_auc_score(): size = 100 np_y_pred = np.random.rand(size, 1) np_y = np.zeros((size, ), dtype=np.long) np_y[size // 2:] = 1 np_roc_auc = roc_auc_score(np_y, np_y_pred) roc_auc_metric = ROC_AUC() y_pred = torch.from_numpy(np_y_pred) y = torch.from_numpy(np_y) roc_auc_metric.reset() roc_auc_metric.update((y_pred, y)) roc_auc = roc_auc_metric.compute() assert roc_auc == np_roc_auc
def set_handlers(trainer: Engine, evaluator: Engine, valloader: DataLoader, model: nn.Module, optimizer: optim.Optimizer, args: Namespace) -> None: ROC_AUC( output_transform=lambda output: (output.logit, output.label)).attach( engine=evaluator, name='roc_auc') Accuracy(output_transform=lambda output: ( (output.logit > 0).long(), output.label)).attach(engine=evaluator, name='accuracy') Loss(loss_fn=nn.BCEWithLogitsLoss(), output_transform=lambda output: (output.logit, output.label.float())).attach(engine=evaluator, name='loss') ProgressBar(persist=True, desc='Epoch').attach( engine=trainer, output_transform=lambda output: {'loss': output.loss}) ProgressBar(persist=False, desc='Eval').attach(engine=evaluator) ProgressBar(persist=True, desc='Eval').attach( engine=evaluator, metric_names=['roc_auc', 'accuracy', 'loss'], event_name=Events.EPOCH_COMPLETED, closing_event_name=Events.COMPLETED) @trainer.on(Events.ITERATION_COMPLETED(every=args.evaluation_interval)) def _evaluate(trainer: Engine): evaluator.run(valloader, max_epochs=1) evaluator.add_event_handler( event_name=Events.EPOCH_COMPLETED, handler=Checkpoint( to_save={ 'model': model, 'optimizer': optimizer, 'trainer': trainer }, save_handler=DiskSaver(dirname=args.checkpoint_dir, atomic=True, create_dir=True, require_empty=False), filename_prefix='best', score_function=lambda engine: engine.state.metrics['roc_auc'], score_name='val_roc_auc', n_saved=1, global_step_transform=global_step_from_engine(trainer)))
def get_evaluators(model, configuration): assert ( configuration.data_type in EVALUATOR_FACTORY_MAP ), "Data type not in {}".format(EVALUATOR_FACTORY_MAP.keys()) metrics = { "accuracy": Accuracy(_output_transform), "precision": Precision(_output_transform), "recall": Recall(_output_transform), "loss": Loss(get_criterion(configuration)), "auc": ROC_AUC(), "tnr": Recall(_negative_output_transform), "npv": Precision(_negative_output_transform), } train_evaluator = EVALUATOR_FACTORY_MAP[configuration.data_type]( model, metrics=metrics, device=configuration.device, ) val_evaluator = EVALUATOR_FACTORY_MAP[configuration.data_type]( model, metrics=metrics, device=configuration.device, ) return train_evaluator, val_evaluator
def _test(y_pred, y, batch_size): def update_fn(engine, batch): idx = (engine.state.iteration - 1) * batch_size y_true_batch = np_y[idx : idx + batch_size] y_pred_batch = np_y_pred[idx : idx + batch_size] return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch) engine = Engine(update_fn) roc_auc_metric = ROC_AUC() roc_auc_metric.attach(engine, "roc_auc") np_y = y.numpy() np_y_pred = y_pred.numpy() np_roc_auc = roc_auc_score(np_y, np_y_pred) data = list(range(y_pred.shape[0] // batch_size)) roc_auc = engine.run(data, max_epochs=1).metrics["roc_auc"] assert isinstance(roc_auc, float) assert np_roc_auc == pytest.approx(roc_auc)
def _test(n_epochs, metric_device): metric_device = torch.device(metric_device) n_iters = 80 s = 16 n_classes = 2 offset = n_iters * s y_true = torch.randint(0, n_classes, size=(offset * idist.get_world_size(), 10)).to(device) y_preds = torch.rand(offset * idist.get_world_size(), 10).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, :], y_true[i * s + rank * offset:(i + 1) * s + rank * offset, :], ) engine = Engine(update) roc_auc = ROC_AUC(device=metric_device) roc_auc.attach(engine, "roc_auc") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "roc_auc" in engine.state.metrics res = engine.state.metrics["roc_auc"] if isinstance(res, torch.Tensor): res = res.cpu().numpy() true_res = roc_auc_score(y_true.cpu().numpy(), y_preds.cpu().numpy()) assert pytest.approx(res) == true_res
def create_supervised_evaluator( model: torch.nn.Module, prepare_batch, criterion, metrics=None, device=None, non_blocking: bool = False, tqdm_log: bool = False, checkpoint_dir='output/checkpoints/' ) -> Engine: if device: model.to(device) def _inference(engine, batch): model.eval() with torch.no_grad(): actions, target = prepare_batch( batch, device=device, non_blocking=non_blocking) scores = model(actions) return (scores, target) engine = Engine(_inference) softmax_transform = lambda x:\ (F.softmax(x[0], dim=1)[:, 1] > 0.5, x[1]) Loss( criterion, output_transform=lambda x: x, ).attach(engine, 'loss') ROC_AUC( output_transform=lambda x: (F.softmax(x[0], dim=1)[:, 1], x[1]) ).attach(engine, 'roc_auc') ModdedPrecision( output_transform=softmax_transform ).attach(engine, 'precision') Recall( output_transform=softmax_transform ).attach(engine, 'recall') FalsePositiveRate( output_transform=softmax_transform ).attach(engine, 'FPR') if tqdm_log: pbar = ProgressBar(persist=True) pbar.attach(engine) # save the best model # to_save = {'model': model} # best_checkpoint_handler = Checkpoint( # to_save, # DiskSaver(checkpoint_dir, create_dir=True), # n_saved=1, # filename_prefix='best', # score_function=lambda x: engine.state.metrics['roc_auc'], # score_name="roc_auc", # global_step_transform=lambda x, y : engine.train_epoch) # engine.add_event_handler(Events.COMPLETED, best_checkpoint_handler) @engine.on(Events.COMPLETED) def log_validation_results(engine): metrics = engine.state.metrics if len(metrics) == 0: print('no metrics in log_validation_results!') return print(f"{'Validation Results':20} - " f"Avg loss: {metrics['loss']:.6f}, " f"ROC AUC: {metrics['roc_auc']:.6f}\n\t" f"Recall: {metrics['recall']:.6f} " f"Precision: {metrics['precision']:.6f} " f"FPR: {metrics['FPR']:.6f} " ) wandb.log({ "val_loss": metrics['loss'], "val_roc_auc": metrics['roc_auc'], "val_recall": metrics['recall'], "val_precision": metrics['precision'], "val_fpr": metrics['FPR'] }, commit=True) return engine
def create_supervised_trainer(model, optimizer, criterion, prepare_batch, metrics={}, device=None, tqdm_log=False, ) -> Engine: def _update(engine, batch): model.train() optimizer.zero_grad() actions, target = prepare_batch(batch, device=device) scores = model(actions) loss = criterion(scores, target) loss.backward() optimizer.step() return {'loss': loss.item(), 'y_pred': scores, 'y_true': target} model.to(device) engine = Engine(_update) softmax_transform = lambda x:\ (F.softmax(x['y_pred'], dim=1)[:, 1] > 0.5, x['y_true']) # Metrics RunningAverage(output_transform=lambda x: x['loss'])\ .attach(engine, 'running_average_loss') Loss( criterion, output_transform=lambda x: (x['y_pred'], x['y_true']), ).attach(engine, 'loss') ROC_AUC( output_transform=lambda x: (F.softmax(x['y_pred'], dim=1)[:, 1], x['y_true']) ).attach(engine, 'roc_auc') ModdedPrecision( output_transform=softmax_transform ).attach(engine, 'precision') Recall( output_transform=softmax_transform ).attach(engine, 'recall') FalsePositiveRate( output_transform=softmax_transform ).attach(engine, 'FPR') # TQDM if tqdm_log: pbar = ProgressBar( persist=True, ) pbar.attach(engine, ['average_loss']) @engine.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): metrics = engine.state.metrics print(f"Epoch {engine.state.epoch} completed!") print(f"{'Train Results':20} - " f"Avg loss: {metrics['loss']:.6f}, " f"ROC AUC: {metrics['roc_auc']:.6f}\n\t" f"Recall: {metrics['recall']:.6f} " f"Precision: {metrics['precision']:.6f} " f"FPR: {metrics['FPR']:.6f} " ) wandb.log({ "train_loss": metrics['loss'], "train_roc_auc": metrics['roc_auc'], "train_recall": metrics['recall'], "train_precision": metrics['precision'], "train_fpr": metrics['FPR'] }, commit=False) return engine
def train(config): model_suite.logging.setup_loggers(config) device = 'cpu' if torch.cuda.is_available(): device = 'cuda' logger.info(f'Device {device} will be used') data_df = read_data() train_ds, val_ds, test_ds = get_datasets(data_df) train_loader, val_loader = get_data_loaders( train_ds, val_ds, train_batch_size=config.train_batch_size, val_batch_size=config.val_batch_size) writer = SummaryWriter(log_dir=f'{config.model_dir}/logs') n_features = train_loader.dataset[0][0].shape[0] model = get_model(model_name=config.model, n_features=n_features) loss = torch.nn.BCEWithLogitsLoss() optimizer = Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) #optimizer = SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay, momentum=config.momentum) trainer = create_supervised_trainer(model, optimizer, loss, device=device) evaluator = create_supervised_evaluator(model, metrics={ 'loss': Loss(loss), 'roc': ROC_AUC(), 'accuracy': Accuracy(), 'precision': AveragePrecision() }, device=device) @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(engine): evaluator.run(train_loader) metrics = evaluator.state.metrics avg_loss = metrics['loss'] avg_roc = metrics['roc'] # avg_accuracy = metrics['accuracy'] # avg_precision = metrics['precision'] logger.info( f'Training results - Epoch: {engine.state.epoch} Avg loss: {avg_loss} ROC: {avg_roc}' ) writer.add_scalar("training/avg_loss", avg_loss, engine.state.epoch) writer.add_scalar("training/avg_roc", avg_roc, engine.state.epoch) # writer.add_scalar("training/avg_accuracy", avg_accuracy, engine.state.epoch) # writer.add_scalar("training/avg_precision", avg_precision, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics avg_loss = metrics['loss'] avg_roc = metrics['roc'] # avg_accuracy = metrics['accuracy'] # avg_precision = metrics['precision'] logger.info( f'Validation results - Epoch: {engine.state.epoch} Avg loss: {avg_loss} ROC: {avg_roc}' ) writer.add_scalar("valdation/avg_loss", avg_loss, engine.state.epoch) writer.add_scalar("valdation/avg_roc", avg_roc, engine.state.epoch) # writer.add_scalar("valdation/avg_accuracy", avg_accuracy, engine.state.epoch) # writer.add_scalar("valdation/avg_precision", avg_precision, engine.state.epoch) trainer.run(train_loader, max_epochs=config.n_epochs) writer.close()
def test_no_sklearn(mock_no_sklearn): with pytest.raises(RuntimeError, match=r"This contrib module requires sklearn to be installed."): ROC_AUC()