def test_ignite_evaluator_reporting_metrics(): try: from ignite.metrics import MeanSquaredError except ImportError: pytest.skip('pytorch-ignite is not installed') # This tests verifies that either, usuer manually reported metrics # and ignite calculated ones are correctly reflected in the reporter # observation model = IgniteDummyModel() n_data = 10 x = torch.randn((n_data, 2), requires_grad=True) y = torch.randn((n_data, 2)) dataset = torch.utils.data.TensorDataset(x, y) loader = torch.utils.data.DataLoader(dataset, batch_size=3) evaluator = create_dummy_evaluator(model) # Attach metrics to the evaluator metric = MeanSquaredError() metric.attach(evaluator, 'mse') evaluator_ignite_ext = ppe.training.extensions.IgniteEvaluator( evaluator, loader, model, progress_bar=False) reporter = ppe.reporting.Reporter() with reporter: result = evaluator_ignite_ext() # Internally reported metrics assert result['main/x'] == 1.5 # Ignite calculated metric assert result['val/mse'] == 0.0
def _test_distrib_accumulator_device(device): metric_devices = [torch.device("cpu")] if device.type != "xla": metric_devices.append(idist.device()) for metric_device in metric_devices: device = torch.device(device) mse = MeanSquaredError(device=metric_device) assert mse._device == metric_device assert mse._sum_of_squared_errors.device == metric_device, "{}:{} vs {}:{}".format( type(mse._sum_of_squared_errors.device), mse._sum_of_squared_errors.device, type(metric_device), metric_device, ) y_pred = torch.tensor([[2.0], [-2.0]]) y = torch.zeros(2) mse.update((y_pred, y)) assert mse._sum_of_squared_errors.device == metric_device, "{}:{} vs {}:{}".format( type(mse._sum_of_squared_errors.device), mse._sum_of_squared_errors.device, type(metric_device), metric_device, )
def _test_distrib_integration(device, tol=1e-6): import numpy as np from ignite.engine import Engine rank = idist.get_rank() n_iters = 100 s = 10 offset = n_iters * s y_true = torch.arange(0, offset * idist.get_world_size(), dtype=torch.float).to(device) y_preds = torch.ones(offset * idist.get_world_size(), dtype=torch.float).to(device) def update(engine, i): return ( y_preds[i * s + offset * rank:(i + 1) * s + offset * rank], y_true[i * s + offset * rank:(i + 1) * s + offset * rank], ) engine = Engine(update) m = MeanSquaredError() m.attach(engine, "mse") data = list(range(n_iters)) engine.run(data=data, max_epochs=1) assert "mse" in engine.state.metrics res = engine.state.metrics["mse"] true_res = np.mean(np.power((y_true - y_preds).cpu().numpy(), 2.0)) assert pytest.approx(res, rel=tol) == true_res
def test_zero_div(): mse = MeanSquaredError() with pytest.raises( NotComputableError, match= r"MeanSquaredError must have at least one example before it can be computed" ): mse.compute()
def test_accumulator_detached(): mse = MeanSquaredError() y_pred = torch.tensor([[2.0], [-2.0]], requires_grad=True) y = torch.zeros(2) mse.update((y_pred, y)) assert not mse._sum_of_squared_errors.requires_grad
def test_compute(): mse = MeanSquaredError() y_pred = torch.Tensor([[2.0], [-2.0]]) y = torch.zeros(2) mse.update((y_pred, y)) assert mse.compute() == 4.0 mse.reset() y_pred = torch.Tensor([[3.0], [-3.0]]) y = torch.zeros(2) mse.update((y_pred, y)) assert mse.compute() == 9.0
def _test(metric_device): engine = Engine(update) m = MeanSquaredError(device=metric_device) m.attach(engine, "mse") data = list(range(n_iters)) engine.run(data=data, max_epochs=1) assert "mse" in engine.state.metrics res = engine.state.metrics["mse"] true_res = np.mean(np.power((y_true - y_preds).cpu().numpy(), 2.0)) assert pytest.approx(res, rel=tol) == true_res
def _create_cvae_evaluator(network, criterion, device, metrics=None, non_blocking=False): from ignite.metrics import Loss if metrics is None: metrics = {} def loss_output_transform(output): return (*output[:2], {"mu": output[3], "log_var": output[4]}) metrics.setdefault("loss", Loss(criterion, output_transform=loss_output_transform)) metrics.setdefault("mse", MeanSquaredError(output_transform=lambda x: x[:2])) eval_step = create_cvae_eval_step(network, device, non_blocking=non_blocking) evaluator = Engine(eval_step) for metric_name, metric in metrics.items(): metric.attach(evaluator, metric_name) return evaluator
def metrics_selector(mode, loss): mode = mode.lower() if mode == "classification": metrics = { "loss": loss, "accuracy": Accuracy(), "accuracy_topk": TopKCategoricalAccuracy(), "precision": Precision(average=True), "recall": Recall(average=True) } elif mode == "multiclass-multilabel": metrics = { "loss": loss, "accuracy": Accuracy(), } elif mode == "regression": metrics = { "loss": loss, "mse": MeanSquaredError(), "mae": MeanAbsoluteError() } else: raise RuntimeError( "Invalid task mode, select classification or regression") return metrics
def create_sr_evaluator( model, device=None, non_blocking=True, denormalize=True, mean=None, ): # transfer mean to the device and reshape it so # that is is broadcastable to the BCHW format mean = mean.to(device).reshape(1, -1, 1, 1) def denorm_fn(x): return torch.clamp(x + mean, min=0., max=1.) def _evaluate_model(engine, batch): model.eval() x, y = _prepare_batch(batch, device=device, non_blocking=non_blocking) with torch.no_grad(): y_pred = model(x) if denormalize: y_pred, y = map(denorm_fn, [y_pred, y]) return y_pred, y engine = Engine(_evaluate_model) MeanAbsoluteError().attach(engine, 'l1') MeanSquaredError().attach(engine, 'l2') PNSR(max_value=1.0).attach(engine, 'pnsr') return engine
def _test_distrib_accumulator_device(device): metric_devices = [torch.device("cpu")] if device.type != "xla": metric_devices.append(idist.device()) for metric_device in metric_devices: device = torch.device(device) mse = MeanSquaredError(device=metric_device) for dev in [mse._device, mse._sum_of_squared_errors.device]: assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}" y_pred = torch.tensor([[2.0], [-2.0]]) y = torch.zeros(2) mse.update((y_pred, y)) for dev in [mse._device, mse._sum_of_squared_errors.device]: assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"
def test_create_supervised_with_metrics(): model = Linear(1, 1) model.weight.data.zero_() model.bias.data.zero_() evaluator = create_supervised_evaluator(model, metrics={'mse': MeanSquaredError()}) x = torch.FloatTensor([[1.0], [2.0]]) y = torch.FloatTensor([[3.0], [4.0]]) data = [(x, y)] state = evaluator.run(data) assert state.metrics['mse'] == 12.5
def create_vae_engines( model, optimizer, criterion=None, metrics=None, device=None, non_blocking=False, fig_dir=None, unflatten=None, ): device = model.device if criterion is None: criterion = get_default_autoencoder_loss() train_step = create_vae_train_step(model, optimizer, criterion, device=device, non_blocking=non_blocking) eval_step = create_vae_eval_step(model, device=device, non_blocking=non_blocking) if metrics is None: metrics = {} metrics.setdefault( "loss", Loss(criterion, output_transform=loss_eval_output_transform), ) metrics.setdefault("mse", MeanSquaredError(output_transform=lambda x: x[:2])) trainer = Engine(train_step) evaluator = create_autoencoder_evaluator(eval_step, metrics=metrics) save_image_callback = create_save_image_callback(fig_dir, unflatten=unflatten) def _epoch_getter(): return trainer.state.__dict__.get("epoch", None) evaluator.add_event_handler( Events.ITERATION_COMPLETED(once=1), save_image_callback, epoch=_epoch_getter, ) val_log_handler, val_logger = create_log_handler(trainer) return trainer, evaluator, val_log_handler, val_logger
def test_compute(): mse = MeanSquaredError() def _test(y_pred, y, batch_size): mse.reset() if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size mse.update( (y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) else: mse.update((y_pred, y)) np_y = y.numpy() np_y_pred = y_pred.numpy() np_res = np.power((np_y - np_y_pred), 2.0).sum() / np_y.shape[0] assert isinstance(mse.compute(), float) assert mse.compute() == np_res def get_test_cases(): test_cases = [ (torch.randint(0, 10, size=(100, 1)), torch.randint(0, 10, size=(100, 1)), 1), (torch.randint(-20, 20, size=(100, 5)), torch.randint(-20, 20, size=(100, 5)), 1), # updated batches (torch.randint(0, 10, size=(100, 1)), torch.randint(0, 10, size=(100, 1)), 16), (torch.randint(-20, 20, size=(100, 5)), torch.randint(-20, 20, size=(100, 5)), 16), ] return test_cases for _ in range(5): # check multiple random inputs as random exact occurencies are rare test_cases = get_test_cases() for y_pred, y, batch_size in test_cases: _test(y_pred, y, batch_size)
def do_inference(cfg, model, test_loader, classes_list, loss_fn, target_set_name="test", plotFlag=False): num_classes = len(classes_list) if classes_list is not None else 0 device = cfg.MODEL.DEVICE logger = logging.getLogger("classification.inference") logging._warn_preinit_stderr = 0 logger.info("Enter inferencing for {} set".format(target_set_name)) metrics_eval = { "mse": MeanSquaredError( output_transform=lambda x: (x["rg_logits"], x["rg_labels"])), } evaluator = create_supervised_evaluator(model, metrics=metrics_eval, loss_fn=loss_fn, device=device) metrics = dict() @evaluator.on(Events.EPOCH_COMPLETED) def log_inference_results(engine): logger.info("Test Results") if engine.state.metrics.get("mse") != None: mse = engine.state.metrics["mse"] logger.info("MSE: {:.3f}".format(mse)) metrics["mse"] = mse evaluator.run(test_loader) return metrics
def fit_naive_model(): train_ds = PhotocurrentData("Spectral Responsivity Data Summary.csv", params="model_params.json") eval_ds = PhotocurrentData("Spectral Responsivity Data Summary.csv", params="model_params.json") _, _, y_mean, _ = train_ds.data_std() # criterion = MSELoss() # criterion = SmoothL1Loss() metrics = { "MSE": MeanSquaredError(), } train_loader = DataLoader(train_ds, shuffle=True, batch_size=2) val_loader = DataLoader(eval_ds, shuffle=True, batch_size=2) model = NaiveSpectralModel(wavelengths=train_ds.wavelengths, params="model_params.json") optimizer = Adam(model.parameters(), lr=1e-2) # criterion = SmoothWeightsLoss(model, weights=1/y_mean, lambda_l1=1e-2, lambda_rows=1e-3, lambda_cols=1e-3, lambda_norm=0) criterion = SmoothWeightsLoss(model, lambda_l1=1e-3, lambda_rows=0, lambda_cols=0, lambda_norm_rows=0, lambda_norm_cols=0) trainer = get_trainer(model, train_loader, val_loader, criterion, optimizer, metrics) trainer.run(train_loader, max_epochs=200) plot_weights_R_inv(model) plot_prediction_R_inv(model, train_ds) plot_test_R_inv(model, "Reconstruction Data Summary - BP-5um data.csv", title="BP-5\u03BCm", normlizer=train_ds.normlize)
def _attach_peaks_related(engine: Engine, prefix: str = ""): transform = lambda x: (x["refenrichment"].flatten(), x["predenrichment"]. flatten()) MeanSquaredError(transform).attach(engine, prefix + "mse") NonZeroMeanSquaredError(transform).attach(engine, prefix + "non_zero_mse")
def train_network( net, train_loader, valid_loader, hparameters, device, dtype, loggers=[False, False, False], log_dir=None): """ Network trainer using the ignite framework. Args: net (`torch.nn.Module`): the model to be trained. train_loader (`torch.data.DataLoader`): data loader for the training set. valid_loader (`torch.data.DataLoader`): data loader for the validation set. hparameters (dict): hyper-parameters for the training process. device (`torch.device`): device on which training and evaluation are performed. dtype (`torch.dtype`): data type for the tensors under processing. loggers (list of len 3): verbosity of the trainer. TODO: - At the moment, tensorboard logs are commented. - There is a discrepancy with the training loss and the MSE metric, as the implementation in the pytorch core and in ignite are not consistent. """ # Define loss and optimizer criterion = nn.MSELoss(reduction=hparameters.get('mse_reduction', 'mean')) metrics = {'mse': MeanSquaredError()} optimizer = get_optimiser(hparameters.get('optimiser', 'adam'), net.parameters(), hparameters) # define training and evaluation engines trainer = create_supervised_trainer(net, optimizer, criterion, device, dtype) train_evaluator = create_supervised_evaluator(net, metrics, device, dtype) valid_evaluator = create_supervised_evaluator(net, metrics, device, dtype) # adding early stopping criterion def score_function(engine): val_loss = engine.state.metrics['mse'] return -val_loss es_handler = EarlyStopping( patience=hparameters.get('patience', 20), score_function=score_function, trainer=trainer, model=net) # the handler is attached to an *Evaluator* (runs one epoch on validation dataset). valid_evaluator.add_event_handler(Events.COMPLETED, es_handler, net) # keep track of the training and validation loss tr_history = {'training_loss': [], 'validation_loss': []} # create_dir(log_dir) # creating the log dir if not None # writer = create_summary_writer(net, train_loader, log_dir) if(loggers[0]): @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(trainer): print("Epoch {} | Batch {} | Loss: {:.2f}".format( trainer.state.epoch, (trainer.state.iteration - 1) % len(train_loader), trainer.state.output)) # writer.add_scalar("training/loss", trainer.state.output, trainer.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(trainer): train_evaluator.run(train_loader) metrics = train_evaluator.state.metrics tr_history['training_loss'].append(metrics['mse']) if(loggers[1]): # writer.add_scalars('MSE', {"training": metrics['mse']}, trainer.state.epoch) print("Epoch: {} - Training loss: {:.2f} | MSE: {:.2f}" .format(trainer.state.epoch, trainer.state.output, metrics['mse'])) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(trainer): valid_evaluator.run(valid_loader) metrics = valid_evaluator.state.metrics tr_history['validation_loss'].append(metrics['mse']) if(loggers[2]): # writer.add_scalars('MSE', {"validation": metrics['mse']}, trainer.state.epoch) print("Epoch: {} - Validation MSE: {:.2f}" .format(trainer.state.epoch, metrics['mse'])) trainer.run(train_loader, max_epochs=hparameters.get("num_epochs", 10000)) # writer.close() return es_handler.get_best_model_after_stop(), tr_history
train_dataset, val_dataset = random_split( torch_ds, [X_scaled.shape[0] - num_split, num_split]) train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=128) # Setup model = Autoencoder(input_dim=X_scaled.shape[1], hidden_dim=100, latent_dim=3).to(device) optimizer = torch.optim.Adam(params=model.parameters()) loss = nn.MSELoss() # Ignite me trainer = create_supervised_trainer(model, optimizer, loss, device=device) evaluator = create_supervised_evaluator( model, metrics={'mse': MeanSquaredError()}, device=device) @trainer.on(Events.EPOCH_COMPLETED) def log_training_loss(trainer): print("Training results - EPOCH [{}]: Avg loss: {:.3f}".format( trainer.state.epoch, trainer.state.output)) @trainer.on(Events.EPOCH_STARTED) def validation_loss(trainer): if trainer.state.epoch == 1: evaluator.run(val_loader) metrics = evaluator.state.metrics print("Validation Results - Epoch: {} Avg MSE: {:.2f}".format( trainer.state.epoch, metrics['mse'])) @trainer.on(Events.EPOCH_COMPLETED)
# Setup logging log_dir = 'runs/meta_rec_mf_bias_' + str(datetime.now()).replace(' ', '_') writer = SummaryWriter(log_dir=log_dir) # Instantiate the model class object model = MF(n_user, n_item, writer=writer, k=k, c_bias=c_bias, c_vector=c_vector) # Use Adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Create a supervised trainer trainer = create_supervised_trainer(model, optimizer, model.loss) # Use Mean Squared Error as evaluation metric metrics = {'evaluation': MeanSquaredError()} # Create a supervised evaluator evaluator = create_supervised_evaluator(model, metrics=metrics) # Load the train and test data train_loader = Loader(train_x, train_y, batchsize=1024) test_loader = Loader(test_x, test_y, batchsize=1024) def log_training_loss(engine, log_interval=500): """ Function to log the training loss """ model.itr = engine.state.iteration # Keep track of iterations if model.itr % log_interval == 0:
def do_train( cfg, model, train_loader, val_loader, classes_list, optimizer, scheduler, loss_fn, start_epoch, ): # 1.Load parameters from cfg epochs = cfg.SOLVER.MAX_EPOCHS log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.SOLVER.OUTPUT_DIR device = cfg.MODEL.DEVICE # 2.Recording tools setup # (1) Logger logger = logging.getLogger("classification.train") # corresponding to logger("classification") logger.info("Start training") # (2) TensorBoard SummaryWriter # save progress writer_train = SummaryWriter(cfg.SOLVER.OUTPUT_DIR + "/summary/train/") writer_val = SummaryWriter(cfg.SOLVER.OUTPUT_DIR + "/summary/val") # save graph writer_graph = SummaryWriter(cfg.SOLVER.OUTPUT_DIR + "/summary/train/graph") inputshape = None try: data = next(iter(train_loader)) input = data[0] #inputshape = (input.shape[1], input.shape[2], input.shape[3]) if len(input.shape)==4 else (input.shape[1], input.shape[2]) inputshape = [input.shape[i] for i in range(1, len(input.shape))] """ grid = torchvision.utils.make_grid(input) writer_graph.add_image('images', grid, 0) writer_graph.add_graph(model, input) writer_graph.flush() """ except Exception as e: print("Failed to save model graph: {}".format(e)) # 3.Create engine # metrics relevant to training metrics_train = { "avg_total_loss": RunningAverage(output_transform=lambda x: x["total_loss"]), "accuracy": RunningAverage(MeanSquaredError(output_transform=lambda x: (x["cf_logits"], x["cf_labels"]))), "mse": RunningAverage(MeanSquaredError(output_transform=lambda x: (x["rg_logits"], x["rg_labels"]))), } # add seperate metrics lossKeys = cfg.LOSS.TYPE.split(" ") if "counts_regression_loss" in lossKeys: lossKeys.append("counts_classification_loss") for lossName in lossKeys: #""" if lossName == "contact_prediction_loss": metrics_train["AVG-" + "contact_prediction_loss"] = RunningAverage( output_transform=lambda x: x["losses"]["contact_prediction_loss"]) elif lossName == "secondary_structure_prediction_loss": metrics_train["AVG-" + "secondary_structure_prediction_loss"] = RunningAverage( output_transform=lambda x: x["losses"]["secondary_structure_prediction_loss"]) else: raise Exception('expected METRIC_LOSS_TYPE should not be {}'.format(cfg.LOSS.TYPE)) # create engine with metrics attached trainer = create_supervised_trainer(model, optimizer, metrics_train, loss_fn, device=device, ) # attach checkpointer & timer to the engine checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.BACKBONE_NAME, checkpoint_period, n_saved=300, require_empty=False, start_step=start_epoch) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model, 'optimizer': optimizer}) #checkpointer_save_graph = ModelCheckpoint(output_dir, cfg.MODEL.BACKBONE_NAME, checkpoint_period, n_saved=300, require_empty=False, start_step=-1) #trainer.add_event_handler(Events.STARTED, checkpointer_save_graph, {'model': model, 'optimizer': optimizers[0]}) timer = Timer(average=True) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # 4.Other event handlers @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch engine.state.iteration = engine.state.iteration + start_epoch * len(train_loader) logger.info("Model:{}".format(model)) print("Input Shape: {}".format(inputshape)) #inputshape = (cfg.DATA.TRANSFORM.CHANNEL, cfg.DATA.TRANSFORM.SIZE[0], cfg.DATA.TRANSFORM.SIZE[1]) #logger.info("Model:{}".format(model.count_param(input_shape=inputshape))) #metrics = do_inference(cfg, model, val_loader, classes_list, loss_fn, plotFlag=False) @trainer.on(Events.EPOCH_COMPLETED) # 注意,在pytorch1.2里面 scheduler.steo()应该放到 optimizer.step()之后 def adjust_learning_rate(engine): scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): global ITER ITER += 1 if ITER % (log_period * engine.state.accumulation_steps) == 0: step = engine.state.iteration # 1.Tensorboard Summary # loss (vector) avg_losses = {} for lossName in lossKeys: avg_losses[lossName] = (float("{:.3f}".format(engine.state.metrics["AVG-" + lossName]))) writer_train.add_scalar("Loss/" + lossName, avg_losses[lossName], step) writer_train.flush() # other scalars scalar_list = ["mse", "avg_total_loss"] for scalar in scalar_list: writer_train.add_scalar("Train/" + scalar, engine.state.metrics[scalar], step) writer_train.flush() # learning rate writer_train.add_scalar("Train/" + "LearningRate", scheduler.get_lr()[0], step) writer_train.flush() # 2.logger logger.info("Epoch[{}] Iteration[{}/{}] ATLoss: {:.3f}, Avg_Loss: {}, Accuracy: {:.3f}, Base Lr: {:.2e}, step: {}" .format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_total_loss'], avg_losses, engine.state.metrics['accuracy'], #engine.state.metrics['mse'], scheduler.get_lr()[0], step)) if len(train_loader) == ITER: ITER = 0 # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info('Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): if engine.state.epoch % eval_period == 0: metrics = do_inference(cfg, model, val_loader, classes_list, loss_fn, target_set_name="valid", plotFlag=False) #不进行绘制 step = engine.state.iteration writer_val.add_scalar("MSE", metrics['mse'], step) writer_val.flush() # 5.launch engine trainer.run(train_loader, max_epochs=epochs) writer_train.close() writer_val.close()
def create_default_engines_from_steps( train_step, eval_step, criterion=None, metrics=None, fig_dir=None, unflatten=None, ): """ create_default_engines_from_steps( train_step, eval_step, criterion=None, metrics=None, fig_dir=None, unflatten=None, ) Parameters ---------- train_step : callable The update function for the trainer eval_step : callable The update function for the evaluator criterion : nn.Loss (optional) Note: if criterion is not passed, then validation loss will not be tracked by ignite, unless passed via metrics. metrics : dict (optional) fig_dir : string (optional) unflatten : tuple (optional) Returns ------- trainer : ignite Engine evaluator : ignite Engine val_log_handler : ignite handler To be used with add_evaluation and some dataloaders, in order to track progress on a validation set during training. val_logger : util.Logger Object containing the validation metric data from training. """ if metrics is None: metrics = {} if criterion is not None: metrics.setdefault( "loss", Loss(criterion, output_transform=loss_eval_output_transform), ) metrics.setdefault("mse", MeanSquaredError(output_transform=lambda x: x[:2])) trainer = Engine(train_step) evaluator = create_autoencoder_evaluator(eval_step, metrics=metrics) save_image_callback = create_save_image_callback(fig_dir, unflatten=unflatten) def _epoch_getter(): return trainer.state.__dict__.get("epoch", None) evaluator.add_event_handler( Events.ITERATION_COMPLETED(once=1), save_image_callback, epoch=_epoch_getter, ) val_log_handler, val_logger = create_log_handler(trainer) return trainer, evaluator, val_log_handler, val_logger
def run(config): train_loader = get_instance(utils, 'dataloader', config, 'train') val_loader = get_instance(utils, 'dataloader', config, 'val') model = get_instance(models, 'arch', config) model = init_model(model, train_loader) model, device = ModelPrepper(model, config).out loss_fn = get_instance(nn, 'loss_fn', config) trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = get_instance(torch.optim, 'optimizer', config, trainable_params) writer = create_summary_writer(config, model, train_loader) batch_size = config['dataloader']['args']['batch_size'] if config['mode'] == 'eval' or config['resume']: model.load_state_dict(torch.load(config['ckpt_path'])) epoch_length = int(ceil(len(train_loader) / batch_size)) desc = "ITERATION - loss: {:.2f}" pbar = tqdm(initial=0, leave=False, total=epoch_length, desc=desc.format(0)) def process_batch(engine, batch): inputs, outputs = func(batch) model.train() model.zero_grad() optimizer.zero_grad() preds = model(inputs) loss = loss_fn(preds, outputs.to(device)) a = list(model.parameters())[0].clone() loss.backward() optimizer.step() # check if training is happening b = list(model.parameters())[0].clone() try: assert not torch.allclose(a.data, b.data), 'Model not updating anymore' except AssertionError: plot_grad_flow(model.named_parameters()) return loss.item() def predict_on_batch(engine, batch): inputs, outputs = func(batch) model.eval() with torch.no_grad(): y_pred = model(inputs) return inputs, y_pred, outputs.to(device) trainer = Engine(process_batch) trainer.logger = setup_logger("trainer") evaluator = Engine(predict_on_batch) evaluator.logger = setup_logger("evaluator") if config['task'] == 'actionpred': Accuracy(output_transform=lambda x: (x[1], x[2])).attach( evaluator, 'val_acc') if config['task'] == 'gazepred': MeanSquaredError(output_transform=lambda x: (x[1], x[2])).attach( evaluator, 'val_MSE') RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss') training_saver = ModelCheckpoint(config['checkpoint_dir'], filename_prefix='checkpoint_' + config['task'], n_saved=1, atomic=True, save_as_state_dict=True, create_dir=True, require_empty=False) trainer.add_event_handler(Events.EPOCH_COMPLETED, training_saver, {'model': model}) @trainer.on(Events.ITERATION_COMPLETED) def tb_log(engine): pbar.desc = desc.format(engine.state.output) pbar.update(1) writer.add_scalar('training/avg_loss', engine.state.metrics['loss'], engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def print_trainer_logs(engine): pbar.refresh() avg_loss = engine.state.metrics['loss'] tqdm.write('Trainer Results - Epoch {} - Avg loss: {:.2f} \n'.format( engine.state.epoch, avg_loss)) viz_param(writer=writer, model=model, global_step=engine.state.epoch) pbar.n = pbar.last_print_n = 0 @evaluator.on(Events.EPOCH_COMPLETED) def print_result(engine): try: print('Evaluator Results - Accuracy {} \n'.format( engine.state.metrics['val_acc'])) except KeyError: print('Evaluator Results - MSE {} \n'.format( engine.state.metrics['val_MSE'])) @evaluator.on(Events.ITERATION_COMPLETED) def viz_outputs(engine): visualize_outputs(writer=writer, state=engine.state, task=config['task']) if config['mode'] == 'train': trainer.run(train_loader, max_epochs=config['epochs'], epoch_length=epoch_length) pbar.close() evaluator.run(val_loader, max_epochs=1, epoch_length=int(ceil(len(val_loader) / batch_size))) writer.flush() writer.close()
def train(): """Training code. Most codes are of initialization and training setup; the actual training loop is hidden by functions of chainer. Each training iteration is implemented in `SimpleUpdater.update_core()`. """ import argparse # By using `argparse` module, you can specify parameters as command-line arguments. parser = argparse.ArgumentParser(description="Example of training") parser.add_argument( "--gpu", dest="use_gpu", action="store_true", help="GPU ID. Generally, setting 0 to use GPU, or -1 to use CPU.") parser.add_argument("--dataset-train", type=str, default="dataset/train.csv", help="Training dataset.") parser.add_argument("--dataset-validation", type=str, default="dataset/validation.csv", help="Validation dataset.") parser.add_argument("--epochs", type=int, default=100, help="Number of training epochs.") parser.add_argument("--batchsize", type=int, default=64, help="Size of a mini-batch.") parser.add_argument("--n-units", type=int, default=64, help="Number of hidden units.") parser.add_argument("--out", default="result", help="Output directory.") args = parser.parse_args() # Setup a neural network in_dim = 3 # Input dimension out_dim = 3 # Output dimension model = SimpleMLP(in_dim, out_dim, args.n_units) # Enable GPU if specified if args.use_gpu: device = "cuda" model = model.to("cuda") # Move the model to GPU memory else: device = "cpu" # Setup an optimizer. # The optimizer specifies the model to be trained and parameter updating method. optimizer = optim.Adam( model.parameters()) # Use Adam, one of the gradient descent method # Load a training dataset and validation dataset. train_loader = torch.utils.data.DataLoader(load_dataset( args.dataset_train), batch_size=args.batchsize, shuffle=True) val_loader = torch.utils.data.DataLoader(load_dataset( args.dataset_validation), batch_size=1000, shuffle=False) # Setup a loss function. # In this example, the mean squared error is used. loss = nn.MSELoss() # Setup a trainer. trainer = create_supervised_trainer(model, optimizer, loss, device) # Setup an evaluator. metrics = {'accuracy': MeanSquaredError(), 'nll': Loss(loss)} evaluator = create_supervised_evaluator(model, metrics, device) # Setup a log writer writer = SummaryWriter(log_dir=args.out) @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(trainer): print("Epoch[{}] Loss: {:.5f}".format(trainer.state.epoch, trainer.state.output), end="\r") @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(trainer): evaluator.run(train_loader) metrics = evaluator.state.metrics print( "Training Results - Epoch: {:3d} Avg accuracy: {:.5f} Avg loss: {:.5f}" .format(trainer.state.epoch, metrics['accuracy'], metrics['nll'])) writer.add_scalar("training/avg_loss", metrics['nll'], trainer.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(trainer): evaluator.run(val_loader) metrics = evaluator.state.metrics print( "Validation Results - Epoch: {:3d} Avg accuracy: {:.5f} Avg loss: {:.5f}" .format(trainer.state.epoch, metrics['accuracy'], metrics['nll'])) writer.add_scalar("validation/avg_loss", metrics['accuracy'], trainer.state.epoch) # Settings of model saving handler = ModelCheckpoint(dirname=args.out, filename_prefix='sample', save_interval=10, n_saved=3, create_dir=True, require_empty=False) trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, {'mymodel': model}) # Start training trainer.run(train_loader, max_epochs=args.epochs) writer.close()
def test(): from tqdm import tqdm import torch.utils.data from scipy.stats import pearsonr from sklearn.metrics import mean_squared_error, r2_score device = 'cuda' true = torch.rand(100_000) pred = true + .3 * torch.randn_like(true) ds = torch.utils.data.TensorDataset(true, pred) print('Manual:') print(' Mean true:', true.mean().item()) print(' Var true :', true.var().item()) print(' Var pred :', pred.var().item()) print(' Cov :', np.cov(true, pred)[0, 1]) print(' MSE :', mean_squared_error(true, pred)) print(' R :', pearsonr(true, pred)[0]) print(' R2 :', r2_score(true, pred)) print() mean_true = Mean() var_true = Variance() var_pred = Variance() cov = Covariance() mse = MeanSquaredError() r = PearsonR() r2 = R2() for batch_size in [len(ds), 25_000, 1_000, 5, 1]: mean_true.reset() var_true.reset() var_pred.reset() cov.reset() mse.reset() r.reset() r2.reset() dl = torch.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=0) for true_batch, pred_batch in tqdm(dl): true_batch = true_batch.to(device) pred_batch = pred_batch.to(device) mean_true.update(true) var_true.update(true) var_pred.update(pred) cov.update((pred_batch, true_batch)) mse.update((pred_batch, true_batch)) r.update((pred_batch, true_batch)) r2.update((pred_batch, true_batch)) print(f'Batch size {batch_size}:') print( f' Mean true: {mean_true.compute()} ({mean_true.compute() - true.mean().item():.0E})' ) print( f' Var true : {var_true.compute()} ({var_true.compute() - true.var().item():.0E})' ) print( f' Var pred : {var_pred.compute()} ({var_pred.compute() - pred.var().item():.0E})' ) print( f' Cov : {cov.compute()} ({cov.compute() - np.cov(true, pred)[0, 1]:.0E})' ) print( f' MSE : {mse.compute()} ({mse.compute() - mean_squared_error(true, pred):.0E})' ) print( f' R : {r.compute()} ({r.compute() - pearsonr(true, pred)[0]:.0E})' ) print( f' R2 : {r2.compute()} ({r2.compute() - r2_score(true, pred):.0E})' ) print()
def test_zero_div(): mse = MeanSquaredError() with pytest.raises(NotComputableError): mse.compute()
num_workers=num_workers) model = Model(number_of_classes=number_of_classes) optimizer = optim.Adam(model.parameters(), lr=args.learningrate) trainer = create_supervised_trainer(model, optimizer, criterion, device=device) metrics = { "accuracy": Accuracy(), "MAE": MeanAbsoluteError( output_transform=lambda out: (torch.max(out[0], dim=1)[1], out[1])), "MSE": MeanSquaredError( output_transform=lambda out: (torch.max(out[0], dim=1)[1], out[1])), "loss": Loss(loss_fn=criterion) } evaluator = create_supervised_evaluator(model, metrics=metrics, device=device) @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(trainer): print( f"Training (Epoch {trainer.state.epoch}): {trainer.state.output:.3f}") best_epoch = 0 best_val_metrics = {"MAE": np.inf}