def create_evaluators(model, metrics, config): model_output_transform = getattr(config, "model_output_transform", lambda x: x) evaluator_args = dict( model=model, metrics=metrics, device=config.device, non_blocking=True, prepare_batch=config.prepare_batch, output_transform=lambda x, y, y_pred: ( model_output_transform(y_pred), y, ), ) train_evaluator = create_supervised_evaluator(**evaluator_args) evaluator = create_supervised_evaluator(**evaluator_args) if idist.get_rank() == 0: common.ProgressBar(desc="Evaluation (train)", persist=False).attach(train_evaluator) common.ProgressBar(desc="Evaluation (val)", persist=False).attach(evaluator) return evaluator, train_evaluator
def create_evaluator(model, metrics, config, tag="val"): with_amp = config["with_amp"] device = idist.device() @torch.no_grad() def evaluate_step(engine: Engine, batch): model.eval() x, y = batch[0], batch[1] if x.device != device: x = x.to(device, non_blocking=True) y = y.to(device, non_blocking=True) with autocast(enabled=with_amp): output = model(x) return output, y evaluator = Engine(evaluate_step) for name, metric in metrics.items(): metric.attach(evaluator, name) if idist.get_rank() == 0 and (not config["with_clearml"]): common.ProgressBar(desc=f"Evaluation ({tag})", persist=False).attach(evaluator) return evaluator
def create_evaluator(model, metrics, config, tag="val"): with_amp = config["with_amp"] device = idist.device() @torch.no_grad() def evaluate_step(engine, batch): model.eval() input_batch = batch[0] labels = batch[1].view(-1, 1) if labels.device != device: input_batch = { k: v.to(device, non_blocking=True, dtype=torch.long) for k, v in batch[0].items() } labels = labels.to(device, non_blocking=True, dtype=torch.float) with autocast(enabled=with_amp): output = model(input_batch) return output, labels evaluator = Engine(evaluate_step) for name, metric in metrics.items(): metric.attach(evaluator, name) if idist.get_rank() == 0 and (not config["with_clearml"]): common.ProgressBar(desc=f"Evaluation ({tag})", persist=False).attach(evaluator) return evaluator
def create_trainer(model, optimizer, criterion, train_sampler, config, logger): prepare_batch = config.prepare_batch device = config.device # Setup trainer accumulation_steps = getattr(config, "accumulation_steps", 1) model_output_transform = getattr(config, "model_output_transform", lambda x: x) def train_update_function(engine, batch): model.train() x, y = prepare_batch(batch, device=device, non_blocking=True) y_pred = model(x) y_pred = model_output_transform(y_pred) loss = criterion(y_pred, y) / accumulation_steps with amp.scale_loss(loss, optimizer, loss_id=0) as scaled_loss: scaled_loss.backward() if engine.state.iteration % accumulation_steps == 0: optimizer.step() optimizer.zero_grad() return { "supervised batch loss": loss.item(), } output_names = getattr(config, "output_names", ["supervised batch loss"]) lr_scheduler = config.lr_scheduler trainer = Engine(train_update_function) trainer.logger = logger to_save = { "model": model, "optimizer": optimizer, "lr_scheduler": lr_scheduler, "trainer": trainer, "amp": amp } save_every_iters = getattr(config, "save_every_iters", 1000) common.setup_common_training_handlers( trainer, train_sampler, to_save=to_save, save_every_iters=save_every_iters, output_path=config.output_path.as_posix(), lr_scheduler=lr_scheduler, with_gpu_stats=True, output_names=output_names, with_pbars=False, ) common.ProgressBar(persist=False).attach(trainer, metric_names="all") return trainer
def create_trainer(model, optimizer, criterion, train_sampler, config, logger): prepare_batch = config.prepare_batch device = config.device # Setup trainer accumulation_steps = getattr(config, "accumulation_steps", 1) model_output_transform = getattr(config, "model_output_transform", lambda x: x) def train_update_function(engine, batch): model.train() x, y = prepare_batch(batch, device=device, non_blocking=True) y_pred = model(x) y_pred = model_output_transform(y_pred) loss = criterion(y_pred, y) if isinstance(loss, Mapping): assert "supervised batch loss" in loss loss_dict = loss output = {k: v.item() for k, v in loss_dict.items()} loss = loss_dict["supervised batch loss"] / accumulation_steps else: output = {"supervised batch loss": loss.item()} with amp.scale_loss(loss, optimizer, loss_id=0) as scaled_loss: scaled_loss.backward() if engine.state.iteration % accumulation_steps == 0: optimizer.step() optimizer.zero_grad() return output output_names = getattr(config, "output_names", ["supervised batch loss",]) lr_scheduler = config.lr_scheduler trainer = Engine(train_update_function) trainer.logger = logger to_save = {"model": model, "optimizer": optimizer, "lr_scheduler": lr_scheduler, "trainer": trainer, "amp": amp} save_every_iters = getattr(config, "save_every_iters", 1000) common.setup_common_training_handlers( trainer, train_sampler, to_save=to_save, save_every_iters=save_every_iters, save_handler=get_save_handler(config), lr_scheduler=lr_scheduler, with_gpu_stats=exp_tracking.has_mlflow, output_names=output_names, with_pbars=False, ) if idist.get_rank() == 0: common.ProgressBar(persist=False).attach(trainer, metric_names="all") return trainer
def create_evaluator(model, metrics, config, with_clearml, tag="val"): model_output_transform = config.get("model_output_transform", lambda x: x) with_amp = config.get("with_amp", True) prepare_batch = data.prepare_image_mask @torch.no_grad() def evaluate_step(engine, batch): model.eval() with autocast(enabled=with_amp): x, y = prepare_batch(batch, device=config.device, non_blocking=True) y_pred = model(x) y_pred = model_output_transform(y_pred) return y_pred, y evaluator = Engine(evaluate_step) for name, metric in metrics.items(): metric.attach(evaluator, name) if idist.get_rank() == 0 and (not with_clearml): common.ProgressBar(desc=f"Evaluation ({tag})", persist=False).attach(evaluator) return evaluator