def run(train_batch_size, val_batch_size, epochs, lr, momentum, log_interval): cuda = torch.cuda.is_available() train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size) model = Net() if cuda: model = model.cuda() optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) trainer = create_supervised_trainer(model, optimizer, F.nll_loss, cuda=cuda) evaluator = create_supervised_evaluator(model, metrics={'accuracy': CategoricalAccuracy(), 'nll': Loss(F.nll_loss)}, cuda=cuda) @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % log_interval == 0: print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}" "".format(engine.state.epoch, iter, len(train_loader), engine.state.output)) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['nll'] print("Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, avg_accuracy, avg_nll)) trainer.run(train_loader, max_epochs=epochs)
def run(mode, noise_fraction, train_batch_size, val_batch_size, epochs, lr, momentum, log_interval, log_dir): seed = 12345 random.seed(seed) torch.manual_seed(seed) now = datetime.now() log_dir = os.path.join(log_dir, "train_{}_{}__{}".format(mode, noise_fraction, now.strftime("%Y%m%d_%H%M"))) os.makedirs(log_dir) cuda = torch.cuda.is_available() train_loader, val_loader = get_data_loaders(noise_fraction, train_batch_size, val_batch_size) model = Net() writer = create_summary_writer(log_dir) if cuda: model = model.cuda() optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) if mode == 'xentropy': criterion = nn.CrossEntropyLoss() elif mode == 'soft_bootstrap': criterion = SoftBootstrappingLoss(beta=0.95) elif mode == 'hard_bootstrap': criterion = HardBootstrappingLoss(beta=0.8) else: raise TypeError("Wrong mode {}, expected: xentropy, soft_bootstrap or hard_bootstrap".format(mode)) trainer = create_supervised_trainer(model, optimizer, criterion, cuda=cuda) evaluator = create_supervised_evaluator(model, metrics={'accuracy': CategoricalAccuracy(), 'nll': Loss(nn.CrossEntropyLoss())}, cuda=cuda) @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % log_interval == 0: print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}" "".format(engine.state.epoch, iter, len(train_loader), engine.state.output)) writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['nll'] print("Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, avg_accuracy, avg_nll)) writer.add_scalar("valdation/loss", avg_nll, engine.state.epoch) writer.add_scalar("valdation/accuracy", avg_accuracy, engine.state.epoch) # kick everything off trainer.run(train_loader, max_epochs=epochs) writer.close()
def test_create_supervised_trainer(): model = Linear(1, 1) model.weight.data.zero_() model.bias.data.zero_() optimizer = SGD(model.parameters(), 0.1) trainer = create_supervised_trainer(model, optimizer, mse_loss) x = torch.FloatTensor([[1.0], [2.0]]) y = torch.FloatTensor([[3.0], [5.0]]) data = [(x, y)] assert model.weight.data[0, 0] == approx(0.0) assert model.bias.data[0] == approx(0.0) state = trainer.run(data) assert state.output == approx(17.0) assert model.weight.data[0, 0] == approx(1.3) assert model.bias.data[0] == approx(0.8)
def run(train_batch_size, val_batch_size, epochs, lr, momentum, log_interval, log_dir): cuda = torch.cuda.is_available() train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size) model = Net() writer = create_summary_writer(model, log_dir) if cuda: model = model.cuda() optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) trainer = create_supervised_trainer(model, optimizer, F.nll_loss, cuda=cuda) evaluator = create_supervised_evaluator(model, metrics={'accuracy': CategoricalAccuracy(), 'nll': Loss(F.nll_loss)}, cuda=cuda) @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(trainer, state): iter = (state.iteration - 1) % len(train_loader) + 1 if iter % log_interval == 0: print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}".format(state.epoch, iter, len(train_loader), state.output)) writer.add_scalar("training/loss", state.output, state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(trainer, state): metrics = evaluator.run(val_loader).metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['nll'] print("Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(state.epoch, avg_accuracy, avg_nll)) writer.add_scalar("valdation/loss", avg_nll, state.epoch) writer.add_scalar("valdation/accuracy", avg_accuracy, state.epoch) # kick everything off trainer.run(train_loader, max_epochs=epochs) writer.close()
def run(train_batch_size, val_batch_size, epochs, lr, momentum, log_interval): vis = visdom.Visdom() if not vis.check_connection(): raise RuntimeError( "Visdom server not running. Please run python -m visdom.server") cuda = torch.cuda.is_available() train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size) model = Net() if cuda: model = model.cuda() optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) trainer = create_supervised_trainer(model, optimizer, F.nll_loss, cuda=cuda) evaluator = create_supervised_evaluator(model, metrics={ 'accuracy': CategoricalAccuracy(), 'nll': Loss(F.nll_loss) }, cuda=cuda) train_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Loss') val_accuracy_window = create_plot_window(vis, '#Epochs', 'Accuracy', 'Validation Accuracy') val_loss_window = create_plot_window(vis, '#Epochs', 'Loss', 'Validation Loss') @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(trainer, state): iter = (state.iteration - 1) % len(train_loader) + 1 if iter % log_interval == 0: print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}".format( state.epoch, iter, len(train_loader), state.output)) vis.line(X=np.array([state.iteration]), Y=np.array([state.output]), update='append', win=train_loss_window) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(trainer, state): metrics = evaluator.run(val_loader).metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['nll'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(state.epoch, avg_accuracy, avg_nll)) vis.line(X=np.array([state.epoch]), Y=np.array([avg_accuracy]), win=val_accuracy_window, update='append') vis.line(X=np.array([state.epoch]), Y=np.array([avg_nll]), win=val_loss_window, update='append') # kick everything off trainer.run(train_loader, max_epochs=epochs)
def run(opt): if opt.log_file is not None: logging.basicConfig(filename=opt.log_file, level=logging.INFO) else: logging.basicConfig(level=logging.INFO) logger = logging.getLogger() # logger.addHandler(logging.StreamHandler()) logger = logger.info writer = SummaryWriter(log_dir=opt.log_dir) model_timer, data_timer = Timer(average=True), Timer(average=True) # Training variables logger('Loading models') model, parameters, mean, std = generate_model(opt) optimizer = SGD(parameters, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=opt.nesterov) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) # Loading checkpoint if opt.checkpoint: logger('loading checkpoint {}'.format(opt.checkpoint)) checkpoint = torch.load(opt.checkpoint) opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger('Loading dataset') train_transform = get_transform(mean, std, opt.face_size, mode='training') train_data = get_training_set(opt, transform=train_transform) train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) val_transform = get_transform(mean, std, opt.face_size, mode='validation') val_data = get_validation_set(opt, transform=val_transform) val_loader = DataLoader(val_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) trainer = create_supervised_trainer(model, optimizer, nn.L1Loss().cuda(), cuda=True) evaluator = create_supervised_evaluator(model, metrics={ 'distance': MeanPairwiseDistance(), 'loss': MeanAbsoluteError() }, cuda=True) # Training timer handlers model_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) data_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_COMPLETED, pause=Events.ITERATION_STARTED, step=Events.ITERATION_STARTED) # Training log/plot handlers @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % opt.log_interval == 0: logger( "Epoch[{}] Iteration[{}/{}] Loss: {:.2f} Model Process: {:.3f}s/batch " "Data Preparation: {:.3f}s/batch".format( engine.state.epoch, iter, len(train_loader), engine.state.output, model_timer.value(), data_timer.value())) writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) # Log/Plot Learning rate @trainer.on(Events.EPOCH_STARTED) def log_learning_rate(engine): lr = optimizer.param_groups[0]['lr'] logger('Epoch[{}] Starts with lr={}'.format(engine.state.epoch, lr)) writer.add_scalar("learning_rate", lr, engine.state.epoch) # Checkpointing @trainer.on(Events.EPOCH_COMPLETED) def save_checkpoint(engine): if engine.state.epoch % opt.save_interval == 0: save_file_path = os.path.join( opt.result_path, 'save_{}.pth'.format(engine.state.epoch)) states = { 'epoch': engine.state.epoch, 'arch': opt.model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(states, save_file_path) # val_evaluator event handlers @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics # metric_values = [metrics[m] for m in val_metrics] logger("Validation Results - Epoch: {} ".format(engine.state.epoch) + ' '.join( ['{}: {:.4f}'.format(m, val) for m, val in metrics.items()])) for m, val in metrics.items(): writer.add_scalar('validation/{}'.format(m), val, engine.state.epoch) # if engine.state.epoch == 1: optimizer.param_groups[0]['lr'] = 1e-4 # Update Learning Rate scheduler.step(metrics['loss']) # kick everything off logger('Start training') trainer.run(train_loader, max_epochs=opt.n_epochs) writer.close()