def _test(metric_device): engine = Engine(update) m = MeanPairwiseDistance(device=metric_device) m.attach(engine, "mpwd") data = list(range(n_iters)) engine.run(data=data, max_epochs=1) assert "mpwd" in engine.state.metrics res = engine.state.metrics["mpwd"] true_res = [] for i in range(n_iters * idist.get_world_size()): true_res.append( torch.pairwise_distance( y_true[i * s : (i + 1) * s, ...], y_preds[i * s : (i + 1) * s, ...], p=m._p, eps=m._eps ) .cpu() .numpy() ) true_res = np.array(true_res).ravel() true_res = true_res.mean() assert pytest.approx(res) == true_res
def test_accumulator_detached(): mpd = MeanPairwiseDistance() y_pred = torch.tensor([[3.0, 4.0], [-3.0, -4.0]], requires_grad=True) y = torch.zeros(2, 2) mpd.update((y_pred, y)) assert not mpd._sum_of_distances.requires_grad
def test_zero_sample(): mpd = MeanPairwiseDistance() with pytest.raises( NotComputableError, match= r"MeanAbsoluteError must have at least one example before it can be computed" ): mpd.compute()
def test_compute(): mpd = MeanPairwiseDistance() y_pred = torch.Tensor([[3.0, 4.0], [-3.0, -4.0]]) y = torch.zeros(2, 2) mpd.update((y_pred, y)) assert mpd.compute() == approx(5.0) mpd.reset() y_pred = torch.Tensor([[4.0, 4.0, 4.0, 4.0], [-4.0, -4.0, -4.0, -4.0]]) y = torch.zeros(2, 4) mpd.update((y_pred, y)) assert mpd.compute() == approx(8.0)
def _test_distrib_itegration(device): import numpy as np import torch.distributed as dist from ignite.engine import Engine rank = dist.get_rank() torch.manual_seed(12) n_iters = 100 s = 50 offset = n_iters * s y_true = torch.rand(offset * dist.get_world_size(), 10).to(device) y_preds = torch.rand(offset * dist.get_world_size(), 10).to(device) def update(engine, i): return ( y_preds[i * s + offset * rank : (i + 1) * s + offset * rank, ...], y_true[i * s + offset * rank : (i + 1) * s + offset * rank, ...], ) engine = Engine(update) m = MeanPairwiseDistance(device=device) m.attach(engine, "mpwd") data = list(range(n_iters)) engine.run(data=data, max_epochs=1) assert "mpwd" in engine.state.metrics res = engine.state.metrics["mpwd"] true_res = [] for i in range(n_iters * dist.get_world_size()): true_res.append( torch.pairwise_distance( y_true[i * s : (i + 1) * s, ...], y_preds[i * s : (i + 1) * s, ...], p=m._p, eps=m._eps, ) .cpu() .numpy() ) true_res = np.array(true_res).ravel() true_res = true_res.mean() assert pytest.approx(res) == true_res
def _test_distrib_accumulator_device(device): metric_devices = [torch.device("cpu")] if device.type != "xla": metric_devices.append(idist.device()) for metric_device in metric_devices: mpd = MeanPairwiseDistance(device=metric_device) for dev in [mpd._device, mpd._sum_of_distances.device]: assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}" y_pred = torch.Tensor([[3.0, 4.0], [-3.0, -4.0]]) y = torch.zeros(2, 2) mpd.update((y_pred, y)) for dev in [mpd._device, mpd._sum_of_distances.device]: assert dev == metric_device, f"{type(dev)}:{dev} vs {type(metric_device)}:{metric_device}"
def test_compute(): mpd = MeanPairwiseDistance() def _test(y_pred, y, batch_size): mpd.reset() if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size mpd.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) else: mpd.update((y_pred, y)) np_res = np.mean(torch.pairwise_distance(y_pred, y, p=mpd._p, eps=mpd._eps).numpy()) assert isinstance(mpd.compute(), float) assert pytest.approx(mpd.compute()) == np_res def get_test_cases(): test_cases = [ (torch.randint(0, 10, size=(100, 1)), torch.randint(0, 10, size=(100, 1)), 1), (torch.randint(-20, 20, size=(100, 5)), torch.randint(-20, 20, size=(100, 5)), 1), # updated batches (torch.randint(0, 10, size=(100, 1)), torch.randint(0, 10, size=(100, 1)), 16), (torch.randint(-20, 20, size=(100, 5)), torch.randint(-20, 20, size=(100, 5)), 16), ] return test_cases for _ in range(5): # check multiple random inputs as random exact occurencies are rare test_cases = get_test_cases() for y_pred, y, batch_size in test_cases: _test(y_pred, y, batch_size)
def test_zero_div(): mpd = MeanPairwiseDistance() with pytest.raises(NotComputableError): mpd.compute()
def run(opt): if opt.log_file is not None: logging.basicConfig(filename=opt.log_file, level=logging.INFO) else: logging.basicConfig(level=logging.INFO) logger = logging.getLogger() # logger.addHandler(logging.StreamHandler()) logger = logger.info writer = SummaryWriter(log_dir=opt.log_dir) model_timer, data_timer = Timer(average=True), Timer(average=True) # Training variables logger('Loading models') model, parameters, mean, std = generate_model(opt) optimizer = SGD(parameters, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=opt.nesterov) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) # Loading checkpoint if opt.checkpoint: logger('loading checkpoint {}'.format(opt.checkpoint)) checkpoint = torch.load(opt.checkpoint) opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger('Loading dataset') train_transform = get_transform(mean, std, opt.face_size, mode='training') train_data = get_training_set(opt, transform=train_transform) train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) val_transform = get_transform(mean, std, opt.face_size, mode='validation') val_data = get_validation_set(opt, transform=val_transform) val_loader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) trainer = create_supervised_trainer(model, optimizer, nn.L1Loss().cuda(), cuda=True) evaluator = create_supervised_evaluator(model, metrics={ 'distance': MeanPairwiseDistance(), 'loss': MeanAbsoluteError() }, cuda=True) # Training timer handlers model_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) data_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_COMPLETED, pause=Events.ITERATION_STARTED, step=Events.ITERATION_STARTED) # Training log/plot handlers @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % opt.log_interval == 0: logger( "Epoch[{}] Iteration[{}/{}] Loss: {:.2f} Model Process: {:.3f}s/batch " "Data Preparation: {:.3f}s/batch".format( engine.state.epoch, iter, len(train_loader), engine.state.output, model_timer.value(), data_timer.value())) writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) # Log/Plot Learning rate @trainer.on(Events.EPOCH_STARTED) def log_learning_rate(engine): lr = optimizer.param_groups[0]['lr'] logger('Epoch[{}] Starts with lr={}'.format(engine.state.epoch, lr)) writer.add_scalar("learning_rate", lr, engine.state.epoch) # Checkpointing @trainer.on(Events.EPOCH_COMPLETED) def save_checkpoint(engine): if engine.state.epoch % opt.save_interval == 0: save_file_path = os.path.join( opt.result_path, 'save_{}.pth'.format(engine.state.epoch)) states = { 'epoch': engine.state.epoch, 'arch': opt.model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(states, save_file_path) # val_evaluator event handlers @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics # metric_values = [metrics[m] for m in val_metrics] logger("Validation Results - Epoch: {} ".format(engine.state.epoch) + ' '.join( ['{}: {:.4f}'.format(m, val) for m, val in metrics.items()])) for m, val in metrics.items(): writer.add_scalar('validation/{}'.format(m), val, engine.state.epoch) # if engine.state.epoch == 1: optimizer.param_groups[0]['lr'] = 1e-4 # Update Learning Rate scheduler.step(metrics['loss']) # kick everything off logger('Start training') trainer.run(train_loader, max_epochs=opt.n_epochs) writer.close()