def _test(n_epochs): n_iters = 80 s = 16 n_classes = 10 offset = n_iters * s y_true = torch.randint(0, n_classes, size=(offset * idist.get_world_size(), )).to(device) y_preds = torch.rand(offset * idist.get_world_size(), n_classes).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, :], y_true[i * s + rank * offset:(i + 1) * s + rank * offset], ) engine = Engine(update) acc = Accuracy() acc.attach(engine, "acc") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "acc" in engine.state.metrics res = engine.state.metrics["acc"] if isinstance(res, torch.Tensor): res = res.cpu().numpy() true_res = accuracy_score(y_true.cpu().numpy(), torch.argmax(y_preds, dim=1).cpu().numpy()) assert pytest.approx(res) == true_res
def _test(n_epochs, metric_device): metric_device = torch.device(metric_device) n_iters = 80 s = 16 n_classes = 10 offset = n_iters * s y_true = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 8, 10)).to(device) y_preds = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 8, 10)).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset : (i + 1) * s + rank * offset, ...], y_true[i * s + rank * offset : (i + 1) * s + rank * offset, ...], ) engine = Engine(update) acc = Accuracy(is_multilabel=True, device=metric_device) acc.attach(engine, "acc") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert ( acc._num_correct.device == metric_device ), f"{type(acc._num_correct.device)}:{acc._num_correct.device} vs {type(metric_device)}:{metric_device}" assert "acc" in engine.state.metrics res = engine.state.metrics["acc"] if isinstance(res, torch.Tensor): res = res.cpu().numpy() true_res = accuracy_score(to_numpy_multilabel(y_true), to_numpy_multilabel(y_preds)) assert pytest.approx(res) == true_res
def adv_prune_train_loop(model, params, ds, dset, min_y, base_data, model_id, prune_type, device, batch_size, tpa, max_epochs=5): #assert prune_type in ['global_unstructured', 'structured'] total_prune_amount = tpa remove_amount = tpa ds_train, ds_valid = ds train_set, valid_set = dset min_y_train, min_y_val = min_y train_set, valid_set = dset total_prune_amount = tpa original_model = copy.deepcopy(model) original_model.eval() model_id = f'{model_id}_{prune_type}_pruning_{tpa}_l1' valid_freq = 200 * 500 // batch_size // 3 conv_layers = [model.conv1] for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]: for bottleneck in sequential: conv_layers.extend([bottleneck.conv1, bottleneck.conv2, bottleneck.conv3]) conv_layers = conv_layers[:22] def prune_model(model): print(f'pruned model by {total_prune_amount}') if prune_type == 'global_unstructured': parameters_to_prune = [(layer, 'weight') for layer in conv_layers] prune.global_unstructured( parameters_to_prune, pruning_method=prune.L1Unstructured, amount=total_prune_amount, ) else: for layer in conv_layers: prune_model(model) def valid_eval(model, dataset, dataloader, device, label): right = 0 total = 0 model.eval() with torch.no_grad(): for i, data in tqdm(enumerate(dataloader), total=len(dataset) / dataloader.batch_size): data, y = data data = data.to(device) y = y.to(device) - label ans = model.forward(data) right += torch.sum(torch.eq(torch.argmax(ans, dim=1), y)) total += y.shape[0] return right/total valid_acc = valid_eval(model, valid_set, ds_valid, device, min_y_val) print('initial accuracy:', valid_acc.item()) with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) # attack = GradientSignAttack(original_model, loss_fn=loss, eps=0.2) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train # with ctx_noparamgrad_and_eval(model): # x_adv = attack.perturb(x, y) # optimizer.zero_grad() # x = torch.cat((x, x_adv)) # y = torch.cat((y, y)) ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() with torch.no_grad(): for layer in conv_layers: layer.weight *= layer.weight_mask return l.item() trainer = Engine(train_step) def train_eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_train # x_adv = attack.perturb(x, y) # x = torch.cat((x, x_adv)) # y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_val # x_adv = attack.perturb(x, y) # x = torch.cat((x, x_adv)) # y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq)) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print("Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=100)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 50) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}" .format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq)) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint(to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler(Events.ITERATION_COMPLETED(every=valid_freq), handler) trainer.run(ds_train, max_epochs=max_epochs)
def run(train_batch_size, test_batch_size, epochs, lr, log_interval, log_dir, no_cuda, sub_spectrogram_size, sub_spectrogram_mel_hop, n_mel_bins, seed, root_dir, train_dir, eval_dir): """ Model runner Parameters ---------- train_batch_size : int Size of the training batch. Default: 16 test_batch_size : int size of the testing batch. Default: 16 epochs : int Number of training epochs. Default: 200 lr : float Learning rate for the ADAM optimizer. Default: 0.001 log_interval : int Interval for logging data: Default: 10 log_dir : str Directory to save the logs no_cuda : Bool Should you NOT use cuda? Default: False sub_spectrogram_size : int Size of the SubSpectrogram. Default 20 sub_spectrogram_mel_hop : int Mel-bin hop size of the SubSpectrogram. Default 10 n_mel_bins : int Number of mel-bins of the Spectrogram extracted. Default: 40. seed : int Torch random seed value, for reproducable results. Default: 1 root_dir : str Directory of the folder which contains the dataset (has 'audio' and 'evaluation_setup' folders inside) train_dir : str Set as default: 'evaluation_setup/train_fold1.txt' eval_dir : str Set as default: 'evaluation_setup/evaluate_fold1.txt' """ # check if possible to use CUDA use_cuda = not no_cuda and torch.cuda.is_available() # set seed torch.manual_seed(seed) # Map to GPU device = torch.device("cuda" if use_cuda else "cpu") # Load the data loaders train_loader, val_loader = get_data_loaders(train_batch_size, test_batch_size, sub_spectrogram_size, sub_spectrogram_mel_hop, n_mel_bins, use_cuda, root_dir, train_dir, eval_dir) # Get the model model = SubSpectralNet(sub_spectrogram_size, sub_spectrogram_mel_hop, n_mel_bins, use_cuda).to(device) # Init the TensorBoard summary writer writer = create_summary_writer(model, train_loader, log_dir) # Init the optimizer optimizer = optim.Adam(model.parameters(), lr=lr) # Use GPU if possible if device: model.to(device) def update_model(engine, batch): """Prepare batch for training: pass to a device with options. """ model.train() optimizer.zero_grad() inputs, label = prepare_batch(batch, device=device) output = model(inputs) losses = [] for ite in range(output.shape[1]): losses.append(F.nll_loss(output[:, ite, :], label)) loss = sum(losses) loss.backward() optimizer.step() return losses, output # get the trainer module trainer = Engine(update_model) def evaluate(engine, batch): """Prepare batch for training: pass to a device with options. """ model.eval() with torch.no_grad(): inputs, label = prepare_batch(batch, device=device) output = model(inputs) losses = [] correct = [] for ite in range(output.shape[1]): losses.append( F.nll_loss(output[:, ite, :], label, reduction='sum').item()) return losses, output, label # get the evaluator module evaluator = Engine(evaluate) # define output transforms for multiple outputs. def output_transform1(output): # `output` variable is returned by above `process_function` losses, correct, label = output return correct[:, 0, :], label metric = Accuracy(output_transform=output_transform1) metric.attach(evaluator, "acc_highband") metric = Loss(F.nll_loss, output_transform=output_transform1) metric.attach(evaluator, "loss_highband") def output_transform2(output): # `output` variable is returned by above `process_function` losses, correct, label = output return correct[:, 1, :], label metric = Accuracy(output_transform=output_transform2) metric.attach(evaluator, "acc_midband") metric = Loss(F.nll_loss, output_transform=output_transform2) metric.attach(evaluator, "loss_midband") def output_transform3(output): # `output` variable is returned by above `process_function` losses, correct, label = output return correct[:, 2, :], label metric = Accuracy(output_transform=output_transform3) metric.attach(evaluator, "acc_lowband") metric = Loss(F.nll_loss, output_transform=output_transform3) metric.attach(evaluator, "loss_lowband") def output_transform(output): # `output` variable is returned by above `process_function` losses, correct, label = output return correct[:, 3, :], label metric = Accuracy(output_transform=output_transform) metric.attach(evaluator, "acc_globalclassifier") metric = Loss(F.nll_loss, output_transform=output_transform) metric.attach(evaluator, "loss_globalclassifier") # Log the events in Ignite: EVERY ITERATION @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % log_interval == 0: losses, output = engine.state.output epoch = engine.state.epoch print( 'Train Epoch: {} [{}/{}]\tLosses: {:.6f} (Top Band), {:.6f} (Mid Band), {:.6f} (Low Band), {:.6f} (Global Classifier)' .format(epoch, iter, len(train_loader), losses[0].item(), losses[1].item(), losses[2].item(), losses[3].item())) # TensorBoard Logs writer.add_scalar("training/loss_topband_itr", losses[0].item(), engine.state.iteration) writer.add_scalar("training/loss_midband_itr", losses[1].item(), engine.state.iteration) writer.add_scalar("training/loss_lowband_itr", losses[2].item(), engine.state.iteration) writer.add_scalar("training/loss_global_itr", losses[3].item(), engine.state.iteration) # Log the events in Ignite: Test the training data on EVERY EPOCH @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(engine): evaluator.run(train_loader) print( "Training Results - Epoch: {} Global accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, evaluator.state.metrics['acc_globalclassifier'], evaluator.state.metrics['loss_globalclassifier'])) # TensorBoard Logs writer.add_scalar("training/global_loss", evaluator.state.metrics['loss_globalclassifier'], engine.state.epoch) writer.add_scalar("training/lowband_loss", evaluator.state.metrics['loss_lowband'], engine.state.epoch) writer.add_scalar("training/midband_loss", evaluator.state.metrics['loss_midband'], engine.state.epoch) writer.add_scalar("training/highband_loss", evaluator.state.metrics['loss_highband'], engine.state.epoch) writer.add_scalar("training/global_acc", evaluator.state.metrics['acc_globalclassifier'], engine.state.epoch) writer.add_scalar("training/lowband_acc", evaluator.state.metrics['acc_lowband'], engine.state.epoch) writer.add_scalar("training/midband_acc", evaluator.state.metrics['acc_midband'], engine.state.epoch) writer.add_scalar("training/highband_acc", evaluator.state.metrics['acc_highband'], engine.state.epoch) # Log the events in Ignite: Test the validation data on EVERY EPOCH @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) print( "Validation Results - Epoch: {} Global accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, evaluator.state.metrics['acc_globalclassifier'], evaluator.state.metrics['loss_globalclassifier'])) # TensorBoard Logs writer.add_scalar("validation/global_loss", evaluator.state.metrics['loss_globalclassifier'], engine.state.epoch) writer.add_scalar("validation/lowband_loss", evaluator.state.metrics['loss_lowband'], engine.state.epoch) writer.add_scalar("validation/midband_loss", evaluator.state.metrics['loss_midband'], engine.state.epoch) writer.add_scalar("validation/highband_loss", evaluator.state.metrics['loss_highband'], engine.state.epoch) writer.add_scalar("validation/global_acc", evaluator.state.metrics['acc_globalclassifier'], engine.state.epoch) writer.add_scalar("validation/lowband_acc", evaluator.state.metrics['acc_lowband'], engine.state.epoch) writer.add_scalar("validation/midband_acc", evaluator.state.metrics['acc_midband'], engine.state.epoch) writer.add_scalar("validation/highband_acc", evaluator.state.metrics['acc_highband'], engine.state.epoch) # kick everything off trainer.run(train_loader, max_epochs=epochs) # close the writer writer.close() # return the model return model
def multiclass_train_lstm( model: LstmClassifier, dataloader_train: DataLoader, dataloader_val: DataLoader, filename_prefix: str, ): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-3) criterion = CrossEntropyLossOneHot() def process_function(_engine, batch): model.train() optimizer.zero_grad() x, y = batch x = x.to(device) y = y.to(device) y_pred = model(x) loss = criterion(y_pred, y) loss.backward() optimizer.step() return y_pred, y, loss.item(), def eval_function(_engine, batch): model.eval() with torch.no_grad(): x, y = batch y = y.to(device) x = x.to(device) y_pred = model(x) return y_pred, y def score_function(engine): return engine.state.metrics['top3-accuracy'] model.to(device) trainer = Engine(process_function) train_evaluator = Engine(eval_function) validation_evaluator = Engine(eval_function) accuracy_top1 = Accuracy(output_transform=lambda x: (x[0], x[1]), device=device, is_multilabel=True) accuracy_top3 = TopKCategoricalAccuracy(output_transform=lambda x: (x[0], x[1]), k=3, device=device) RunningAverage(accuracy_top1).attach(trainer, 'accuracy') RunningAverage(accuracy_top3).attach(trainer, 'top3-accuracy') RunningAverage(output_transform=lambda x: x[2]).attach(trainer, 'loss') accuracy_top1.attach(train_evaluator, 'accuracy') accuracy_top3.attach(train_evaluator, 'top3-accuracy') Loss(criterion).attach(train_evaluator, 'loss') accuracy_top1.attach(validation_evaluator, 'accuracy') accuracy_top3.attach(validation_evaluator, 'top3-accuracy') Loss(criterion).attach(validation_evaluator, 'loss') pbar = ProgressBar(persist=True, bar_format="") pbar.attach(engine=trainer, metric_names='all') @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(engine): train_evaluator.run(dataloader_train) message = f'Training results - Epoch: {engine.state.epoch}.' for metric_name, score in train_evaluator.state.metrics.items(): message += f' {metric_name}: {score:.2f}.' pbar.log_message(message) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): validation_evaluator.run(dataloader_val) message = f'Validation results - Epoch: {engine.state.epoch}.' for metric_name, score in train_evaluator.state.metrics.items(): message += f' {metric_name}: {score:.2f}.' pbar.log_message(message) pbar.n = pbar.last_print_n = 0 validation_evaluator.add_event_handler( Events.COMPLETED, EarlyStopping(patience=5, score_function=score_function, trainer=trainer)) checkpointer = ModelCheckpoint(dirname=DIR_MODELS, filename_prefix=filename_prefix, score_function=score_function, score_name='top3-accuracy', n_saved=2, create_dir=True, save_as_state_dict=True, require_empty=False) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'v2': model}) trainer.run(dataloader_train, max_epochs=20)
def prune_train_loop(model, params, ds, dset, min_y, base_data, model_id, prune_type, device, batch_size, tpa, max_epochs=2): assert prune_type in ['global_unstructured', 'structured'] total_prune_amount = tpa ds_train, ds_valid = ds train_set, valid_set = dset min_y_train, min_y_val = min_y model_id = f'{model_id}_{prune_type}_pruning_{tpa}' valid_freq = 200 * 500 // batch_size // 3 conv_layers = [model.conv1] def prune_model(model): # remove_amount = total_prune_amount // (max_epochs) remove_amount = total_prune_amount print(f'pruned model by {remove_amount}') worst = select_filters(model, ds_valid, valid_set, remove_amount, device) worst = [ k for k in Counter(torch.stack(worst).view(-1).cpu().numpy()).keys() ] worst.sort(reverse=True) print(worst) for layer in conv_layers: for d in worst: TuckerStructured(layer, name='weight', amount=0, dim=0, filt=d) return worst bad = prune_model(model) zeros = [] wrong = [] for i in range(len(model.conv1.weight_mask)): if torch.sum(model.conv1.weight_mask[i]) == 0.0: zeros.append(i) zeros.sort(reverse=True) if zeros == bad: print("correctly zero'd filters") else: if len(zeros) == len(bad): for i in range(len(zeros)): if zeros[i] != bad[i]: wrong.append((bad[i], zeros[i])) print(wrong) else: print("diff number filters zero'd", zeros) with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train optimizer.zero_grad() ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() with torch.no_grad(): for layer in conv_layers: layer.weight *= layer.weight_mask # make sure pruned weights stay 0 return l.item() trainer = Engine(train_step) def train_eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_train with torch.no_grad(): ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_val with torch.no_grad(): ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq)) # @trainer.on(Events.ITERATION_COMPLETED) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) # prune_model(model) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=100)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 100) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq)) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler(Events.ITERATION_COMPLETED(every=valid_freq), handler) trainer.run(ds_train, max_epochs=max_epochs)
def adv_train_loop(model, params, ds, min_y, base_data, model_id, attack_type, device, batch_size, max_epochs=5): print('training adversarial:', attack_type) ds_train, ds_valid = ds min_y_train, min_y_val = min_y original_model = copy.deepcopy( model) # used to generate adv images for the trained model original_model.eval() model = copy.deepcopy( model) # making a copy so that original model is not changed model = model.to(device) model_id = f'{model_id}_{attack_type}' with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) classifier = PyTorchClassifier( model=original_model, clip_values=(0, 1), loss=nn.CrossEntropyLoss(), optimizer=optimizer, input_shape=(3, 64, 64), nb_classes=200, ) attack = None # if attack_type == "fgsm": # attack = FastGradientMethod(estimator=classifier, eps=0.2) # elif attack_type == "bim": # attack = BasicIterativeMethod(estimator=classifier, eps=0.2) # elif attack_type == "carlini": # attack = CarliniLInfMethod(classifier=classifier) # elif attack_type == "deepfool": # attack = DeepFool(classifier=classifier) if attack_type == "fgsm": attack = GradientSignAttack(model, loss_fn=loss, eps=0.2) elif attack_type == "ffa": attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3) elif attack_type == "carlini": attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000) elif attack_type == "lbfgs": attack = DeepFool(classifier=classifier) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train with ctx_noparamgrad_and_eval(model): x_adv = attack.perturb(x, y) optimizer.zero_grad() x = torch.cat((x, x_adv)) y = torch.cat((y, y)) ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() # return ans, y return l.item() trainer = Engine(train_step) # acc_metric.attach(trainer, "accuracy") # loss_metric.attach(trainer, 'loss') def train_eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_train x_adv = attack.perturb(x, y) x = torch.cat((x, x_adv)) y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_val x_adv = attack.perturb(x, y) x = torch.cat((x, x_adv)) y = torch.cat((y, y)) with torch.no_grad(): ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=50)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics # metrics = engine.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 50) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) # @trainer.on(Events.EPOCH_COMPLETED) # def log_training_results(engine): # train_evaluator.run(ds_train) # metrics = train_evaluator.state.metrics # # metrics = engine.state.metrics # avg_accuracy = metrics['accuracy'] # avg_nll = metrics['loss'] # print("Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_accuracy, avg_nll)) # writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch) # writer.add_scalar("training/avg_accuracy", # avg_accuracy, engine.state.epoch) # writer.add_scalar("training/avg_error", 1. - # avg_accuracy, engine.state.epoch) @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10), handler) trainer.run(ds_train, max_epochs=max_epochs)
def create_evaluator(model, cfg): def _validation_step(_, batch): model.eval() with torch.no_grad(): x_char, x_type, y_word, y_syllable = batch_to_tensor(batch, cfg) x_char, x_type, y_word, y_syllable = (t.to( cfg.device) for t in [x_char, x_type, y_word, y_syllable]) logits_word, logits_syllable = model(x_char, x_type) loss, word_loss, syllable_loss, align_loss = model.joint_loss( logits_word, y_word, logits_syllable, y_syllable) return ((logits_word > 0.5).long(), y_word, (logits_syllable > 0.5).long(), y_syllable, loss, word_loss, syllable_loss, align_loss) evaluator = Engine(_validation_step) w_loss = Accuracy(lambda x: x[0:2]) w_loss.attach(evaluator, 'w_acc') s_acc = Accuracy(lambda x: x[2:4]) s_acc.attach(evaluator, 's_acc') Average(lambda x: x[4]).attach(evaluator, 'loss') Average(lambda x: x[5]).attach(evaluator, 'w_loss') Average(lambda x: x[6]).attach(evaluator, 's_loss') Average(lambda x: x[7]).attach(evaluator, 'a_loss') accuracy = Accuracy(lambda x: x[0:2]) accuracy.attach(evaluator, "acc") w_precision = Precision(lambda x: x[0:2]) w_precision.attach(evaluator, 'WP') MetricsLambda(lambda t: torch.mean(t).item(), w_precision).attach(evaluator, "WMP") s_precision = Precision(lambda x: x[2:4]) s_precision.attach(evaluator, 'SP') MetricsLambda(lambda t: torch.mean(t).item(), s_precision).attach(evaluator, "SMP") w_recall = Recall(lambda x: x[0:2]) w_recall.attach(evaluator, 'WR') MetricsLambda(lambda t: torch.mean(t).item(), w_recall).attach(evaluator, "WMR") s_recall = Recall(lambda x: x[2:4]) s_recall.attach(evaluator, 'SR') MetricsLambda(lambda t: torch.mean(t).item(), s_recall).attach(evaluator, "SMR") w_f1 = 2. * w_precision * w_recall / (w_precision + w_recall + 1e-20) w_f1 = MetricsLambda(lambda t: torch.mean(t).item(), w_f1) w_f1.attach(evaluator, "WF1") s_f1 = 2. * s_precision * s_recall / (s_precision + s_recall + 1e-20) s_f1 = MetricsLambda(lambda t: torch.mean(t).item(), s_f1) s_f1.attach(evaluator, "SF1") return evaluator
def prune_train_loop(model, params, ds, min_y, base_data, model_id, prune_type, device, batch_size, max_epochs=5): assert prune_type in ['global_unstructured', 'structured'] total_prune_amount = 0.3 if prune_type == 'global_unstructured' else 0.1 ds_train, ds_valid = ds min_y_train, min_y_val = min_y model_id = f'{model_id}_{prune_type}_pruning' conv_layers = [model.conv1] for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]: for bottleneck in sequential: conv_layers.extend( [bottleneck.conv1, bottleneck.conv2, bottleneck.conv3]) def prune_model(model): remove_amount = total_prune_amount / (max_epochs * 10) print(f'pruned model by {remove_amount}') if prune_type == 'global_unstructured': parameters_to_prune = [(layer, 'weight') for layer in conv_layers] prune.global_unstructured( parameters_to_prune, pruning_method=prune.L1Unstructured, amount=remove_amount, ) else: for layer in conv_layers: prune.ln_structured(layer, name='weight', amount=remove_amount, n=1, dim=0) prune_model(model) with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train optimizer.zero_grad() ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() with torch.no_grad(): for layer in conv_layers: layer.weight *= layer.weight_mask # make sure pruned weights stay 0 return l.item() trainer = Engine(train_step) def train_eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_train with torch.no_grad(): ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() x, y = batch x = x.to(device) y = y.to(device) - min_y_val with torch.no_grad(): ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) prune_model(model) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=50)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 50) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) @trainer.on( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10)) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10), handler) trainer.run(ds_train, max_epochs=max_epochs)
def validation_step(engine, batch): # return torch.rand(16, 101), torch.zeros(16).long() # debug model.eval() with torch.no_grad(): video, class_num = batch["video"].cuda(), batch["class"].cuda() pred = model(video) pred = F.softmax(pred, dim=1) # torch.cuda.empty_cache() return pred, class_num evaluator = Engine(validation_step) accuracy_metric = Accuracy() accuracy_metric.attach(evaluator, "accuracy") ce_loss_metric = Loss(ce_loss_fn) ce_loss_metric.attach(evaluator, "loss") @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): e = engine.state.epoch i = engine.state.iteration loss = engine.state.output print(f"Epoch: {e} / {cfg.epochs} : {i} - Loss: {loss:.5f}") # if wandb_online: # wandb.log({"loss": loss}) @trainer.on(Events.EPOCH_COMPLETED)
model.eval() x, y = batch x.requires_grad_(True) z, y_pred = model(x) return y_pred, y, x, z trainer = Engine(step) evaluator = Engine(eval_step) metric = Accuracy(output_transform=output_transform_acc) metric.attach(evaluator, "accuracy") metric = Loss(F.binary_cross_entropy, output_transform=output_transform_bce) metric.attach(evaluator, "bce") metric = Loss(calc_gradient_penalty, output_transform=output_transform_gp) metric.attach(evaluator, "gp") ds_train = torch.utils.data.TensorDataset( torch.from_numpy(X_train).float(), F.one_hot(torch.from_numpy(y_train)).float()) dl_train = torch.utils.data.DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=True)
def train(name, load, lrate, weight_decay, workers, smooth, device, validation, ground_truth): if not name: name = '{}_{}'.format(lrate, weight_decay) click.echo('model output name: {}'.format(name)) torch.set_num_threads(1) train_set = BaselineSet(glob.glob('{}/**/*.seeds.png'.format(ground_truth), recursive=True), smooth=smooth) train_data_loader = DataLoader(dataset=train_set, num_workers=workers, batch_size=1, shuffle=True, pin_memory=True) val_set = BaselineSet(glob.glob('{}/**/*.seeds.png'.format(validation), recursive=True), smooth=smooth) val_data_loader = DataLoader(dataset=val_set, num_workers=workers, batch_size=1, pin_memory=True) click.echo('loading network') model = ResUNet(refine_encoder=False).to(device) if load: click.echo('loading weights') model = torch.load(load, map_location=device) criterion = nn.BCEWithLogitsLoss() opti = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lrate, weight_decay=weight_decay) def score_function(engine): val_loss = engine.state.metrics['loss'] return -val_loss def output_preprocess(output): o, target = output o = torch.sigmoid(o) o = denoising_hysteresis_thresh(o.detach().squeeze().cpu().numpy(), 0.8, 0.9, 2.5) return torch.from_numpy(o.astype('f')).unsqueeze(0).unsqueeze(0).to( device), target.double().to(device) trainer = create_supervised_trainer(model, opti, criterion, device=device, non_blocking=True) accuracy = Accuracy(output_transform=output_preprocess) precision = Precision(output_transform=output_preprocess) recall = Recall(output_transform=output_preprocess) loss = Loss(criterion) precision = Precision(average=False) recall = Recall(average=False) f1 = (precision * recall * 2 / (precision + recall)).mean() evaluator = create_supervised_evaluator(model, device=device, non_blocking=True) accuracy.attach(evaluator, 'accuracy') precision.attach(evaluator, 'precision') recall.attach(evaluator, 'recall') loss.attach(evaluator, 'loss') f1.attach(evaluator, 'f1') ckpt_handler = ModelCheckpoint('.', name, save_interval=1, n_saved=10, require_empty=False) RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss') progress_bar = ProgressBar(persist=True) progress_bar.attach(trainer, ['loss']) trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=ckpt_handler, to_save={'net': model}) trainer.add_event_handler(event_name=Events.ITERATION_COMPLETED, handler=TerminateOnNan()) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_data_loader) metrics = evaluator.state.metrics progress_bar.log_message( 'eval results - epoch {} loss: {:.4f} f1: {:.4f}, accuracy: {:.4f} recall: {:.4f} precision {:.4f}' .format(engine.state.epoch, metrics['loss'], metrics['f1'], metrics['accuracy'], metrics['recall'], metrics['precision'])) trainer.run(train_data_loader, max_epochs=1000)
def create_zero_shot_eval_engine(self, model, zero_shot_label, model_mapping, label_mapping, is_test_multilabel, cpu): # Iterate through all labels in both the train and test sets to see which labels correspond to the zero shot label (the unifying label) model_target_int = [ int for label, int in model_mapping.items() if zero_shot_label in label.lower() ] label_target_int = [ int for label, int in label_mapping.items() if zero_shot_label in label.lower() ] # There should only be one unifying label in each dataset (Possible TODO: Allow multiple labels to map to one unifying label) assert len( model_target_int ) == 1, f"Ambiguous or empty model label list when trying to map {zero_shot_label} to {model_target_int}" assert len( label_target_int ) == 1, f"Ambiguous or empty gold label list when trying to map {zero_shot_label} to {label_target_int}" model_target_int = model_target_int[0] label_target_int = label_target_int[0] def process_function(engine, batch): X, y = batch if cpu: pred = model(X.cpu()) gold = y.cpu() else: pred = model(X.cuda()) gold = y.cuda() # Get the softmax of the raw model output (logits) pred = torch.softmax(pred, dim=1) # Get the probability that the prediction is the target class pred_in_class_prob = pred[:, [model_target_int]] # Get all the probabilities of all the other classes outside the target class by finding the complement of the in class probability pred_out_class_prob = 1 - pred_in_class_prob # Create a combined tensor which acts as a set of probabilities for in vs out of the zero-shot target class. # In this, 0 is out of class, whilst 1 is in class, so the combined tensor has the out of class probabilities in the 0th column and the in-class probs in the 1st column. pred = torch.cat((pred_out_class_prob, pred_in_class_prob), dim=1) if is_test_multilabel: # If test task is multilabel, get the values from the appropriate column of the truth labels gold = gold[:, label_target_int] else: # To correspond to the above contructed tensor, we set the golds as 1 (I.e. True) if the gold label is the zero-shot label, and 0 (False) if not. gold = (gold == label_target_int).long() return pred, gold eval_engine = Engine(process_function) really_small_number = 1e-10 accuracy = Accuracy() accuracy.attach(eval_engine, "accuracy") recall = Recall() recall.attach(eval_engine, "recall") precision = Precision() precision.attach(eval_engine, "precision") f1 = (precision * recall * 2 / (precision + recall + really_small_number)) f1.attach(eval_engine, "f1") f2 = (precision * recall * 5 / ((4 * precision) + recall + really_small_number)) f2.attach(eval_engine, "f2") avg_recall = Recall(average=True) avg_recall.attach(eval_engine, "average recall") avg_precision = Precision(average=True) avg_precision.attach(eval_engine, "average precision") avg_f1 = (avg_precision * avg_recall * 2 / (avg_precision + avg_recall + really_small_number)) avg_f1.attach(eval_engine, "average f1") avg_f2 = (avg_precision * avg_recall * 5 / ((4 * avg_precision) + avg_recall + really_small_number)) avg_f2.attach(eval_engine, "average f2") return eval_engine
def train_loop(model, params, ds, min_y, base_data, model_id, device, batch_size, max_epochs=2): ds_train, ds_valid = ds min_y_train, min_y_val = min_y with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer: lr = params['lr'] mom = params['momentum'] wd = params['l2_wd'] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd) sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5) funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)} loss = funcs['loss']._loss_fn acc_metric = Accuracy(device=device) loss_metric = Loss(F.cross_entropy, device=device) acc_val_metric = Accuracy(device=device) loss_val_metric = Loss(F.cross_entropy, device=device) def train_step(engine, batch): model.train() x, y = batch x = x.to(device) y = y.to(device) - min_y_train ans = model.forward(x) l = loss(ans, y) optimizer.zero_grad() l.backward() optimizer.step() # return ans, y return l.item() trainer = Engine(train_step) # acc_metric.attach(trainer, "accuracy") # loss_metric.attach(trainer, 'loss') def train_eval_step(engine, batch): model.eval() with torch.no_grad(): x, y = batch x = x.to(device) y = y.to(device) - min_y_train ans = model.forward(x) return ans, y train_evaluator = Engine(train_eval_step) acc_metric.attach(train_evaluator, "accuracy") loss_metric.attach(train_evaluator, 'loss') def validation_step(engine, batch): model.eval() with torch.no_grad(): x, y = batch x = x.to(device) y = y.to(device) - min_y_val ans = model.forward(x) return ans, y valid_evaluator = Engine(validation_step) acc_val_metric.attach(valid_evaluator, "accuracy") loss_val_metric.attach(valid_evaluator, 'loss') @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): valid_evaluator.run(ds_valid) metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] avg_nll = metrics['loss'] print( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, valid_avg_accuracy, avg_nll)) writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch) writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch) writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def lr_scheduler(engine): metrics = valid_evaluator.state.metrics avg_nll = metrics['accuracy'] sched.step(avg_nll) @trainer.on(Events.ITERATION_COMPLETED(every=100)) def log_training_loss(engine): batch = engine.state.batch ds = DataLoader(TensorDataset(*batch), batch_size=batch_size) train_evaluator.run(ds) metrics = train_evaluator.state.metrics # metrics = engine.state.metrics accuracy = metrics['accuracy'] nll = metrics['loss'] iter = (engine.state.iteration - 1) % len(ds_train) + 1 if (iter % 100) == 0: print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}". format(engine.state.epoch, iter, len(ds_train), accuracy, nll)) writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch) writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration) writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration) writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_lr(engine): writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch) # @trainer.on(Events.EPOCH_COMPLETED) # def log_training_results(engine): # train_evaluator.run(ds_train) # metrics = train_evaluator.state.metrics # # metrics = engine.state.metrics # avg_accuracy = metrics['accuracy'] # avg_nll = metrics['loss'] # print("Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" # .format(engine.state.epoch, avg_accuracy, avg_nll)) # writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch) # writer.add_scalar("training/avg_accuracy", # avg_accuracy, engine.state.epoch) # writer.add_scalar("training/avg_error", 1. - # avg_accuracy, engine.state.epoch) @trainer.on(Events.EPOCH_COMPLETED) def validation_value(engine): metrics = valid_evaluator.state.metrics valid_avg_accuracy = metrics['accuracy'] return valid_avg_accuracy to_save = {'model': model} handler = Checkpoint( to_save, DiskSaver(os.path.join(base_data, model_id), create_dir=True), score_function=validation_value, score_name="val_acc", global_step_transform=global_step_from_engine(trainer), n_saved=None) # kick everything off trainer.add_event_handler( Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 5), handler) trainer.run(ds_train, max_epochs=max_epochs)
def train_model(l_gradient_penalty, length_scale, final_model, epochs, input_dep_ls, use_grad_norm): input_size = 28 num_classes = 10 embedding_size = 256 learnable_length_scale = False #Learnable length scale gamma = 0.999 if input_dep_ls and learnable_length_scale: #only one can be True learnable_length_scale = False ## Main (FashionMNIST) and ood (Mnist) Dataset dataset = FastFashionMNIST("data/", train=True, download=True) test_dataset = FastFashionMNIST("data/", train=False, download=True) idx = list(range(60000)) random.shuffle(idx) if final_model: train_dataset = dataset val_dataset = test_dataset else: train_dataset = torch.utils.data.Subset(dataset, indices=idx[:55000]) val_dataset = torch.utils.data.Subset(dataset, indices=idx[55000:]) dl_train = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=0, drop_last=True) dl_val = torch.utils.data.DataLoader(val_dataset, batch_size=2000, shuffle=False, num_workers=0) dl_test = torch.utils.data.DataLoader(test_dataset, batch_size=2000, shuffle=False, num_workers=0) # Model global model model = CNN_DUQ(input_size, num_classes, embedding_size, learnable_length_scale, length_scale, gamma, input_dep_ls) model = model.cuda() #model.load_state_dict(torch.load("DUQ_FM_30_FULL.pt")) # Optimiser optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4) def output_transform_bce(output): y_pred, y, _, _ = output return y_pred, y def output_transform_acc(output): y_pred, y, _, _ = output return y_pred, torch.argmax(y, dim=1) def output_transform_gp(output): y_pred, y, x, y_pred_sum = output return x, y_pred_sum def calc_gradient_penalty(x, y_pred_sum): gradients = torch.autograd.grad( outputs=y_pred_sum, inputs=x, grad_outputs=torch.ones_like(y_pred_sum), create_graph=True, retain_graph=True, )[0] gradients = gradients.flatten(start_dim=1) # L2 norm grad_norm = gradients.norm(2, dim=1) # Two sided penalty gradient_penalty = ((grad_norm - 1)**2).mean() return gradient_penalty def step(engine, batch): model.train() optimizer.zero_grad() x, y = batch y = F.one_hot(y, num_classes=10).float() x, y = x.cuda(), y.cuda() x.requires_grad_(True) z, y_pred = model(x) loss = F.binary_cross_entropy(y_pred, y) loss += l_gradient_penalty * calc_gradient_penalty(x, y_pred.sum(1)) if use_grad_norm: #gradient normalization loss /= (1 + l_gradient_penalty) x.requires_grad_(False) loss.backward() optimizer.step() with torch.no_grad(): model.eval() model.update_embeddings(x, y) return loss.item() def eval_step(engine, batch): model.eval() x, y = batch y = F.one_hot(y, num_classes=10).float() x, y = x.cuda(), y.cuda() x.requires_grad_(True) z, y_pred = model(x) return y_pred, y, x, y_pred.sum(1) trainer = Engine(step) evaluator = Engine(eval_step) metric = Accuracy(output_transform=output_transform_acc) metric.attach(evaluator, "accuracy") metric = Loss(F.binary_cross_entropy, output_transform=output_transform_bce) metric.attach(evaluator, "bce") metric = Loss(calc_gradient_penalty, output_transform=output_transform_gp) metric.attach(evaluator, "gradient_penalty") scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20], gamma=0.2) pbar = ProgressBar() pbar.attach(trainer) @trainer.on(Events.EPOCH_COMPLETED) def log_results(trainer): scheduler.step() # logging every 5 epoch if trainer.state.epoch % 5 == 0: evaluator.run(dl_val) # AUROC on FashionMNIST + Mnist / NotMnist accuracy, roc_auc_mnist = get_fashionmnist_mnist_ood(model) accuracy, roc_auc_notmnist = get_fashionmnist_notmnist_ood(model) metrics = evaluator.state.metrics print(f"Validation Results - Epoch: {trainer.state.epoch} " f"Val_Acc: {metrics['accuracy']:.4f} " f"BCE: {metrics['bce']:.2f} " f"GP: {metrics['gradient_penalty']:.6f} " f"AUROC MNIST: {roc_auc_mnist:.4f} " f"AUROC NotMNIST: {roc_auc_notmnist:.2f} ") #print(f"Sigma: {model.sigma}") # Train trainer.run(dl_train, max_epochs=epochs) # Validation evaluator.run(dl_val) val_accuracy = evaluator.state.metrics["accuracy"] # Test evaluator.run(dl_test) test_accuracy = evaluator.state.metrics["accuracy"] return model, val_accuracy, test_accuracy
def train(model, train_loader, eval_loaders, optimizer, loss_fn, n_it_max, patience, split_names, select_metric='Val accuracy_0', select_mode='max', viz=None, device='cpu', lr_scheduler=None, name=None, log_steps=None, log_epoch=False, _run=None, prepare_batch=_prepare_batch, single_pass=False, n_ep_max=None): # print(model) if not log_steps and not log_epoch: logger.warning('/!\\ No logging during training /!\\') if log_steps is None: log_steps = [] epoch_steps = len(train_loader) if log_epoch: log_steps.append(epoch_steps) if single_pass: max_epoch = 1 elif n_ep_max is None: assert n_it_max is not None max_epoch = int(n_it_max / epoch_steps) + 1 else: assert n_it_max is None max_epoch = n_ep_max all_metrics = defaultdict(dict) trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device, prepare_batch=prepare_batch) if hasattr(model, 'new_epoch_hook'): trainer.add_event_handler(Events.EPOCH_STARTED, model.new_epoch_hook) if hasattr(model, 'new_iter_hook'): trainer.add_event_handler(Events.ITERATION_STARTED, model.new_iter_hook) trainer._logger.setLevel(logging.WARNING) # trainer output is in the format (x, y, y_pred, loss, optionals) train_loss = RunningAverage(output_transform=lambda out: out[3].item(), epoch_bound=True) train_loss.attach(trainer, 'Trainer loss') if hasattr(model, 's'): met = Average(output_transform=lambda _: float('nan') if model.s is None else model.s) met.attach(trainer, 'cur_s') trainer.add_event_handler(Events.ITERATION_COMPLETED, met.completed, 'cur_s') if hasattr(model, 'arch_sampler') and model.arch_sampler.distrib_dim > 0: met = Average(output_transform=lambda _: float('nan') if model.cur_split is None else model.cur_split) met.attach(trainer, 'Trainer split') trainer.add_event_handler(Events.ITERATION_COMPLETED, met.completed, 'Trainer split') # trainer.add_event_handler(Events.EPOCH_STARTED, met.started) all_ent = Average( output_transform=lambda out: out[-1]['arch_entropy_avg'].item()) all_ent.attach(trainer, 'Trainer all entropy') trainer.add_event_handler(Events.ITERATION_COMPLETED, all_ent.completed, 'Trainer all entropy') train_ent = Average( output_transform=lambda out: out[-1]['arch_entropy_sample'].item()) train_ent.attach(trainer, 'Trainer sampling entropy') trainer.add_event_handler(Events.ITERATION_COMPLETED, train_ent.completed, 'Trainer sampling entropy') trainer.add_event_handler( Events.EPOCH_COMPLETED, lambda engine: model.check_arch_freezing( ent=train_ent.compute(), epoch=engine.state.iteration / (epoch_steps * max_epoch))) def log_always(engine, name): val = engine.state.output[-1][name] all_metrics[name][engine.state.iteration / epoch_steps] = val.mean().item() def log_always_dict(engine, name): for node, val in engine.state.output[-1][name].items(): all_metrics['node {} {}'.format( node, name)][engine.state.iteration / epoch_steps] = val.mean().item() trainer.add_event_handler(Events.ITERATION_COMPLETED, log_always_dict, name='arch_grads') trainer.add_event_handler(Events.ITERATION_COMPLETED, log_always_dict, name='arch_probas') trainer.add_event_handler(Events.ITERATION_COMPLETED, log_always_dict, name='node_grads') trainer.add_event_handler(Events.ITERATION_COMPLETED, log_always, name='task all_loss') trainer.add_event_handler(Events.ITERATION_COMPLETED, log_always, name='arch all_loss') trainer.add_event_handler(Events.ITERATION_COMPLETED, log_always, name='entropy all_loss') if n_it_max is not None: StopAfterIterations([n_it_max]).attach(trainer) # epoch_pbar = ProgressBar(bar_format='{l_bar}{bar}{r_bar}', desc=name, # persist=True, disable=not (_run or viz)) # epoch_pbar.attach(trainer, metric_names=['Train loss']) # # training_pbar = ProgressBar(bar_format='{l_bar}{bar}{r_bar}', desc=name, # persist=True, disable=not (_run or viz)) # training_pbar.attach(trainer, event_name=Events.EPOCH_COMPLETED, # closing_event_name=Events.COMPLETED) total_time = Timer(average=False) eval_time = Timer(average=False) eval_time.pause() data_time = Timer(average=False) forward_time = Timer(average=False) forward_time.attach(trainer, start=Events.EPOCH_STARTED, pause=Events.ITERATION_COMPLETED, resume=Events.ITERATION_STARTED, step=Events.ITERATION_COMPLETED) epoch_time = Timer(average=False) epoch_time.attach(trainer, start=Events.EPOCH_STARTED, pause=Events.EPOCH_COMPLETED, resume=Events.EPOCH_STARTED, step=Events.EPOCH_COMPLETED) def get_loss(y_pred, y): l = loss_fn(y_pred, y) if not torch.is_tensor(l): l, *l_details = l return l.mean() def get_member(x, n=0): if isinstance(x, (list, tuple)): return x[n] return x eval_metrics = {'loss': Loss(get_loss)} for i in range(model.n_out): out_trans = get_attr_transform(i) def extract_ys(out): x, y, y_pred, loss, _ = out return out_trans((y_pred, y)) train_acc = Accuracy(extract_ys) train_acc.attach(trainer, 'Trainer accuracy_{}'.format(i)) trainer.add_event_handler(Events.ITERATION_COMPLETED, train_acc.completed, 'Trainer accuracy_{}'.format(i)) eval_metrics['accuracy_{}'.format(i)] = \ Accuracy(output_transform=out_trans) # if isinstance(model, SSNWrapper): # model.arch_sampler.entropy().mean() evaluator = create_supervised_evaluator(model, metrics=eval_metrics, device=device, prepare_batch=prepare_batch) last_iteration = 0 patience_counter = 0 best = { 'value': float('inf') * 1 if select_mode == 'min' else -1, 'iter': -1, 'state_dict': None } def is_better(new, old): if select_mode == 'min': return new < old else: return new > old def log_results(evaluator, data_loader, iteration, split_name): evaluator.run(data_loader) metrics = evaluator.state.metrics log_metrics = {} for metric_name, metric_val in metrics.items(): log_name = '{} {}'.format(split_name, metric_name) if viz: first = iteration == 0 and split_name == split_names[0] viz.line( [metric_val], X=[iteration], win=metric_name, name=log_name, update=None if first else 'append', opts={ 'title': metric_name, 'showlegend': True, 'width': 500, 'xlabel': 'iterations' }) viz.line( [metric_val], X=[iteration / epoch_steps], win='{}epoch'.format(metric_name), name=log_name, update=None if first else 'append', opts={ 'title': metric_name, 'showlegend': True, 'width': 500, 'xlabel': 'epoch' }) if _run: _run.log_scalar(log_name, metric_val, iteration) log_metrics[log_name] = metric_val all_metrics[log_name][iteration] = metric_val return log_metrics if lr_scheduler is not None: @trainer.on(Events.EPOCH_COMPLETED) def step(_): lr_scheduler.step() # logger.warning('current lr {:.5e}'.format( # optimizer.param_groups[0]['lr'])) @trainer.on(Events.ITERATION_COMPLETED) def log_event(trainer): iteration = trainer.state.iteration if trainer.state else 0 nonlocal last_iteration, patience_counter, best if not log_steps or not \ (iteration in log_steps or iteration % log_steps[-1] == 0): return epoch_time.pause() eval_time.resume() all_metrics['training_epoch'][iteration] = iteration / epoch_steps all_metrics['training_iteration'][iteration] = iteration if hasattr(model, 'arch_sampler'): all_metrics['training_archs'][iteration] = \ model.arch_sampler().squeeze().detach() # if hasattr(model, 'distrib_gen'): # entropy = model.distrib_gen.entropy() # all_metrics['entropy'][iteration] = entropy.mean().item() # if trainer.state and len(trainer.state.metrics) > 1: # raise ValueError(trainer.state.metrics) all_metrics['data time'][iteration] = data_time.value() all_metrics['data time_ps'][iteration] = data_time.value() / max( data_time.step_count, 1.) all_metrics['forward time'][iteration] = forward_time.value() all_metrics['forward time_ps'][iteration] = forward_time.value() / max( forward_time.step_count, 1.) all_metrics['epoch time'][iteration] = epoch_time.value() all_metrics['epoch time_ps'][iteration] = epoch_time.value() / max( epoch_time.step_count, 1.) if trainer.state: # logger.warning(trainer.state.metrics) for metric, value in trainer.state.metrics.items(): all_metrics[metric][iteration] = value if viz: viz.line( [value], X=[iteration], win=metric.split()[-1], name=metric, update=None if iteration == 0 else 'append', opts={ 'title': metric, 'showlegend': True, 'width': 500, 'xlabel': 'iterations' }) iter_this_step = iteration - last_iteration for d_loader, name in zip(eval_loaders, split_names): if name == 'Train': if iteration == 0: all_metrics['Trainer loss'][iteration] = float('nan') all_metrics['Trainer accuracy_0'][iteration] = float('nan') if hasattr(model, 'arch_sampler'): all_metrics['Trainer all entropy'][iteration] = float( 'nan') all_metrics['Trainer sampling entropy'][ iteration] = float('nan') # if hasattr(model, 'cur_split'): all_metrics['Trainer split'][iteration] = float('nan') continue split_metrics = log_results(evaluator, d_loader, iteration, name) if select_metric not in split_metrics: continue if is_better(split_metrics[select_metric], best['value']): best['value'] = split_metrics[select_metric] best['iter'] = iteration best['state_dict'] = copy.deepcopy(model.state_dict()) if patience > 0: patience_counter = 0 elif patience > 0: patience_counter += iter_this_step if patience_counter >= patience: logger.info('#####') logger.info('# Early stopping Run') logger.info('#####') trainer.terminate() last_iteration = iteration eval_time.pause() eval_time.step() all_metrics['eval time'][iteration] = eval_time.value() all_metrics['eval time_ps'][iteration] = eval_time.value( ) / eval_time.step_count all_metrics['total time'][iteration] = total_time.value() epoch_time.resume() log_event(trainer) # # @trainer.on(Events.EPOCH_COMPLETED) # def log_epoch(trainer): # iteration = trainer.state.iteration if trainer.state else 0 # epoch = iteration/epoch_steps # fw_t = forward_time.value() # fw_t_ps = fw_t / forward_time.step_count # d_t = data_time.value() # d_t_ps = d_t / data_time.step_count # e_t = epoch_time.value() # e_t_ps = e_t / epoch_time.step_count # ev_t = eval_time.value() # ev_t_ps = ev_t / eval_time.step_count # logger.warning('<{}> Epoch {}/{} finished (Forward: {:.3f}s({:.3f}), ' # 'data: {:.3f}s({:.3f}), epoch: {:.3f}s({:.3f}),' # ' Eval: {:.3f}s({:.3f}), Total: ' # '{:.3f}s)'.format(type(model).__name__, epoch, # max_epoch, fw_t, fw_t_ps, d_t, d_t_ps, # e_t, e_t_ps, ev_t, ev_t_ps, # total_time.value())) data_time.attach(trainer, start=Events.STARTED, pause=Events.ITERATION_STARTED, resume=Events.ITERATION_COMPLETED, step=Events.ITERATION_STARTED) if hasattr(model, 'iter_per_epoch'): model.iter_per_epoch = len(train_loader) trainer.run(train_loader, max_epochs=max_epoch) return trainer.state.iteration, all_metrics, best
def run(model, criterion, optimizer, epochs=100, log_interval=10): vis = visdom.Visdom(env='ft_lift_ignite') train_loader = dataloaders['train'] val_loader = dataloaders['test'] # if not vis.check_connection(): # raise RuntimeError("Visdom server not running. Please run python -m visdom.server") # trainer = create_supervised_trainer(model, optimizer, criterion, device=device) # evaluator = create_supervised_evaluator(model, # metrics={'accuracy': Accuracy(criterion['label']), # 'nll': Loss(criterion['label']), # 'precision': Precision(average=True )}, # device=device) def update_model(trainer, batch): inputs, labels = batch inputs = inputs.to(device) labels = labels.to(device) #inputs, labels = _prepare_batch(batch, device=device) optimizer.zero_grad() class_output, structured_output = model(inputs) loss = criterion['label'](class_output, labels)+criterion['structured'](structured_output, labels) loss.backward() optimizer.step() return {'loss': loss.item(), 'class_output': class_output, 'structured_output': structured_output, #'inputs': inputs, 'labels': labels} trainer = Engine(update_model) # def _prepare_batch(batch, device=None, non_blocking=False): # """Prepare batch for training: pass to a device with options # """ # x, y = batch # return (convert_tensor(x, device=device, non_blocking=non_blocking), # convert_tensor(y, device=device, non_blocking=non_blocking)) def _inference(evaluator, batch): model.eval() with torch.no_grad(): inputs, labels = batch inputs = inputs.to(device) labels = labels.to(device) class_output, structured_output = model(inputs) loss = criterion['label'](class_output, labels)+criterion['structured'](structured_output, labels) return {'loss': loss.item(), 'class_output': class_output, 'structured_output': structured_output, #'inputs': inputs, 'labels': labels} evaluator = Engine(_inference) output_transform1 = lambda data: (data['class_output'], data['labels']) output_transform2 = lambda data: (data['structured_output'], data['labels']) metric_accuracy = Accuracy(output_transform=output_transform1) metric_accuracy.attach(evaluator, 'accuracy') metric_nll = Loss(criterion['label'], output_transform=output_transform1) metric_nll.attach(evaluator, 'nll') metric_precision = Precision(average=True, output_transform=output_transform1) metric_precision.attach(evaluator, 'precision') # evaluator = create_supervised_evaluator(model, # metrics={'accuracy': Accuracy(output_transform=output_transform1), # 'nll': Loss(criterion['label'], output_transform=output_transform1), # 'precision': Precision(average=True, output_transform=output_transform1)}, # device=device) handler = ModelCheckpoint('/1116/tmp/lift_models', 'myprefix', save_interval=1, n_saved=150, require_empty=False, create_dir=True) train_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Loss') train_avg_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Average Loss') train_avg_accuracy_window = create_plot_window(vis, '#Iterations', 'Accuracy', 'Training Average Accuracy') train_avg_precision_window = create_plot_window(vis, '#Iterations', 'Precision', 'Training Average Precision') val_avg_loss_window = create_plot_window(vis, '#Epochs', 'Loss', 'Validation Average Loss') val_avg_accuracy_window = create_plot_window(vis, '#Epochs', 'Accuracy', 'Validation Average Accuracy') val_avg_precision_window = create_plot_window(vis, '#Epochs', 'Precision', 'Validation Average Precison') @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % log_interval == 0: print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}" "".format(engine.state.epoch, iter, len(train_loader), engine.state.output['loss'])) vis.line(X=np.array([engine.state.iteration]), Y=np.array([engine.state.output['loss']]), update='append', win=train_loss_window) @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(engine): evaluator.run(train_loader) metrics = evaluator.state.metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['nll'] avg_precision = metrics['precision'] print("Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f}" .format(engine.state.epoch, avg_accuracy, avg_nll, avg_precision)) vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_accuracy]), win=train_avg_accuracy_window, update='append') vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_nll]), win=train_avg_loss_window, update='append') vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_precision]), win=train_avg_precision_window, update='append') @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['nll'] avg_precision = metrics['precision'] print("Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f}" .format(engine.state.epoch, avg_accuracy, avg_nll, avg_precision)) vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_accuracy]), win=val_avg_accuracy_window, update='append') vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_nll]), win=val_avg_loss_window, update='append') vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_precision]), win=val_avg_precision_window, update='append') # kick everything off trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, {'mymodel': model}) trainer.run(train_loader, max_epochs=epochs)