Beispiel #1
0
def test_pbar_output_warning(capsys):
    loader = [1, 2, 3, 4, 5]

    def update_fn(engine, batch):
        return torch.zeros(1, 2, 3, 4)

    engine = Engine(update_fn)

    pbar = ProgressBar(desc="Output tensor")
    pbar.attach(engine, output_transform=lambda x: x)
    with pytest.warns(UserWarning):
        engine.run(loader, max_epochs=1)
Beispiel #2
0
def train():

    writer = SummaryWriter()

    net, optimiser, lr_scheduler, train_loader, val_loader = cifar10_experiment()

    # Pre-training pruning using SKIP
    keep_masks = SNIP(net, 0.05, train_loader, device)  # TODO: shuffle?
    apply_prune_mask(net, keep_masks)

    trainer = create_supervised_trainer(net, optimiser, F.nll_loss, device)
    evaluator = create_supervised_evaluator(net, {
        'accuracy': Accuracy(),
        'nll': Loss(F.nll_loss)
    }, device)

    pbar = ProgressBar()
    pbar.attach(trainer)

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        lr_scheduler.step()
        iter_in_epoch = (engine.state.iteration - 1) % len(train_loader) + 1
        if engine.state.iteration % LOG_INTERVAL == 0:
            # pbar.log_message("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
            #       "".format(engine.state.epoch, iter_in_epoch, len(train_loader), engine.state.output))
            writer.add_scalar("training/loss", engine.state.output,
                              engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_epoch(engine):
        evaluator.run(val_loader)

        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['nll']

        # pbar.log_message("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
        #       .format(engine.state.epoch, avg_accuracy, avg_nll))

        writer.add_scalar("validation/loss", avg_nll, engine.state.iteration)
        writer.add_scalar("validation/accuracy", avg_accuracy,
                          engine.state.iteration)

    trainer.run(train_loader, EPOCHS)

    # Let's look at the final weights
    # for name, param in net.named_parameters():
    #     if name.endswith('weight'):
    #         writer.add_histogram(name, param)

    writer.close()
    def makeTrainer(self):
        self.trainer = Engine(self.train_a_batch)
        self.evaluator = Engine(self.eval_a_batch)

        pbar = ProgressBar(persist=True, postfix=self.metrics)
        pbar.attach(self.trainer)
        pbar.attach(self.evaluator)

        ## attach hooks 
        self.trainer.add_event_handler(Events.EPOCH_COMPLETED, self.validate)
        self.trainer.add_event_handler(Events.EPOCH_COMPLETED, self.saveModel)
        self.trainer.add_event_handler(Events.ITERATION_COMPLETED, self.zeroMetrics)
        self.trainer.add_event_handler(Events.COMPLETED, self.saveModel)
Beispiel #4
0
def test_pbar_on_custom_events(capsys):

    engine = Engine(update_fn)
    pbar = ProgressBar()
    with pytest.warns(DeprecationWarning,
                      match="CustomPeriodicEvent is deprecated"):
        cpe = CustomPeriodicEvent(n_iterations=15)

    with pytest.raises(ValueError,
                       match=r"not in allowed events for this engine"):
        pbar.attach(engine,
                    event_name=cpe.Events.ITERATIONS_15_COMPLETED,
                    closing_event_name=Events.EPOCH_COMPLETED)
Beispiel #5
0
def test_pbar_on_custom_events(capsys):

    engine = Engine(update_fn)
    pbar = ProgressBar()
    cpe = CustomPeriodicEvent(n_iterations=15)

    with pytest.raises(
            ValueError,
            match=
            r"Logging and closing events should be only ignite.engine.Events"):
        pbar.attach(engine,
                    event_name=cpe.Events.ITERATIONS_15_COMPLETED,
                    closing_event_name=Events.EPOCH_COMPLETED)
def run(train_batch_size, val_batch_size, epochs, lr, momentum, display_gpu_info):
    train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)
    model = Net()
    device = "cpu"

    if torch.cuda.is_available():
        device = "cuda"

    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
    trainer = create_supervised_trainer(model, optimizer, F.nll_loss, device=device)
    evaluator = create_supervised_evaluator(
        model, metrics={"accuracy": Accuracy(), "nll": Loss(F.nll_loss)}, device=device
    )

    RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")

    if display_gpu_info:
        from ignite.contrib.metrics import GpuInfo

        GpuInfo().attach(trainer, name="gpu")

    pbar = ProgressBar(persist=True)
    pbar.attach(trainer, metric_names="all")

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics["accuracy"]
        avg_nll = metrics["nll"]
        pbar.log_message(
            "Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}".format(
                engine.state.epoch, avg_accuracy, avg_nll
            )
        )

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics["accuracy"]
        avg_nll = metrics["nll"]
        pbar.log_message(
            "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}".format(
                engine.state.epoch, avg_accuracy, avg_nll
            )
        )

        pbar.n = pbar.last_print_n = 0

    trainer.run(train_loader, max_epochs=epochs)
def add_progress_bar(trainer, evaluator, validation_loader, epoch_length):
    """
    "I can't believe it's not Keras"
    Running average accuracy and loss metrics + TQDM progressbar
    """
    training_history = {'accuracy': [], 'loss': []}
    validation_history = {'accuracy': [], 'loss': []}
    last_epoch = []

    RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'loss')
    RunningAverage(Accuracy(output_transform=lambda x: (x[1], x[2]))).attach(
        trainer, 'accuracy')

    prog_bar = ProgressBar()
    prog_bar.attach(trainer, ['loss', 'accuracy'])
    prog_bar.pbar_cls = tqdm.tqdm

    prog_bar_vd = ProgressBar()
    prog_bar_vd.attach(evaluator)
    prog_bar_vd.pbar_cls = tqdm.tqdm

    from ignite.handlers import Timer

    timer = Timer(average=True)
    timer.attach(trainer,
                 start=Events.EPOCH_STARTED,
                 resume=Events.EPOCH_STARTED,
                 pause=Events.EPOCH_COMPLETED,
                 step=Events.EPOCH_COMPLETED)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(trainer):
        metrics = trainer.state.metrics
        accuracy = metrics['accuracy'] * 100
        loss = metrics['nll']
        last_epoch.append(0)
        training_history['accuracy'].append(accuracy)
        training_history['loss'].append(loss)
        train_msg = "Train Epoch {}:  acc: {:.2f}% loss: {:.2f}, train time: {:.2f}s".format(
            trainer.state.epoch, accuracy, loss, timer.value())

        evaluator.run(validation_loader, epoch_length=epoch_length)
        metrics = evaluator.state.metrics
        accuracy = metrics['accuracy'] * 100
        loss = metrics['nll']
        validation_history['accuracy'].append(accuracy)
        validation_history['loss'].append(loss)
        val_msg = "Valid Epoch {}:  acc: {:.2f}% loss: {:.2f}".format(
            trainer.state.epoch, accuracy, loss)

        prog_bar_vd.log_message(train_msg + " --- " + val_msg)
Beispiel #8
0
def test_pbar_for_validation(capsys):
    loader = [1, 2, 3, 4, 5]
    engine = Engine(update_fn)

    pbar = ProgressBar(desc="Validation")
    pbar.attach(engine)
    engine.run(loader, max_epochs=1)

    captured = capsys.readouterr()
    err = captured.err.split("\r")
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    expected = "Validation: [4/5]  80%|████████   [00:00<00:00]"
    assert err[-1] == expected
Beispiel #9
0
def create_evaluator(args, model):
    def inference(engine, batch):
        model.eval()
        with torch.no_grad():
            batch = {
                name: input_tensor.to(args.device)
                for name, input_tensor in batch.items()
            }
            lm_logits, mc_logits, *_ = model(
                input_ids=batch["input_ids"],
                token_type_ids=batch["token_type_ids"],
                mc_token_ids=batch["mc_token_ids"],
                lm_labels=batch["lm_labels"],
                mc_labels=batch["mc_labels"],
                persona=batch["persona"],
                history=batch["history"],
                effects=batch["effects"],
            )
            lm_logits_flat_shifted = lm_logits[..., :-1, :].contiguous().view(
                -1, lm_logits.size(-1))
            lm_labels_flat_shifted = batch["lm_labels"][:, 0, :,
                                                        1:].contiguous().view(
                                                            -1)
            return (lm_logits_flat_shifted,
                    mc_logits), (lm_labels_flat_shifted, batch["mc_labels"])

    evaluator = Engine(inference)
    metrics = {
        "nll":
        Loss(torch.nn.CrossEntropyLoss(ignore_index=-100),
             output_transform=lambda x: (x[0][0], x[1][0])),
        # the accuracy is a filler since multiple-choice is not used.
        "accuracy":
        Accuracy(
            #output_transform=lambda x: (torch.argmax(x[0][1].view((-1,)), dim=0, keepdim=True), x[1][1][:, 0])),
            output_transform=lambda x:
            (torch.argmax(x[0][1].squeeze(1), dim=-1), x[1][1][:, 0])),
        "ppl":
        Perplexity(output_transform=lambda x: (x[0][0], None)),
    }

    for name, metric in metrics.items():
        metric.attach(evaluator, name)
    if args.local_rank in [-1, 0]:
        pbar = ProgressBar(persist=True)
        pbar.attach(evaluator)
        evaluator.add_event_handler(
            Events.COMPLETED, lambda _: pbar.log_message(
                "Validation: %s" % pformat(evaluator.state.metrics)))
    return evaluator
Beispiel #10
0
def test_tqdm_logger_epoch_length(capsys):
    loader = list(range(100))
    engine = Engine(update_fn)
    pbar = ProgressBar(persist=True)
    pbar.attach(engine)
    engine.run(loader, epoch_length=50)

    captured = capsys.readouterr()
    err = captured.err.split("\r")
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    actual = err[-1]
    expected = "Iteration: [50/50] 100%|██████████ [00:00<00:00]"
    assert actual == expected
def run(train_batch_size, val_batch_size, epochs, lr, momentum):
    train_loader, val_loader = get_data_loaders(train_batch_size,
                                                val_batch_size)
    model = Net()
    device = 'cpu'

    if torch.cuda.is_available():
        device = 'cuda'

    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        F.nll_loss,
                                        device=device)
    evaluator = create_supervised_evaluator(model,
                                            metrics={
                                                'accuracy': Accuracy(),
                                                'nll': Loss(F.nll_loss)
                                            },
                                            device=device)

    RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

    pbar = ProgressBar(persist=True)
    pbar.attach(trainer, ['loss'])

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['nll']
        pbar.log_message(
            "Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
            .format(engine.state.epoch, avg_accuracy, avg_nll))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['nll']
        pbar.log_message(
            "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
            .format(engine.state.epoch, avg_accuracy, avg_nll))

        pbar.n = pbar.last_print_n = 0

    trainer.run(train_loader, max_epochs=epochs)
    def init_function(h_model):
        h_criterion = torch.nn.CrossEntropyLoss()
        h_evaluator = SupervisedEvaluator(model=h_model, criterion=h_criterion, device=device)
        h_train_evaluator = SupervisedEvaluator(model=h_model, criterion=h_criterion, device=device)
        h_optimizer = torch.optim.Adam(params=h_model.parameters(), lr=1e-3)
        h_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(h_optimizer, 'max', verbose=True, patience=5,
                                                                    factor=0.5)
        h_trainer = SupervisedTrainer(model=h_model, optimizer=h_optimizer, criterion=h_criterion, device=device)

        # Tqdm logger
        h_pbar = ProgressBar(persist=False, bar_format=config.IGNITE_BAR_FORMAT)
        h_pbar.attach(h_trainer.engine, metric_names='all')
        h_tqdm_logger = TqdmLogger(pbar=h_pbar)
        # noinspection PyTypeChecker
        h_tqdm_logger.attach_output_handler(
            h_evaluator.engine,
            event_name=Events.COMPLETED,
            tag="validation",
            global_step_transform=global_step_from_engine(h_trainer.engine),
        )
        # noinspection PyTypeChecker
        h_tqdm_logger.attach_output_handler(
            h_train_evaluator.engine,
            event_name=Events.COMPLETED,
            tag="train",
            global_step_transform=global_step_from_engine(h_trainer.engine),
        )

        # Learning rate scheduling
        # The PyTorch Ignite LRScheduler class does not work with ReduceLROnPlateau
        h_evaluator.engine.add_event_handler(Events.COMPLETED,
                                             lambda engine: h_lr_scheduler.step(engine.state.metrics['accuracy']))

        # Model checkpoints
        h_handler = ModelCheckpoint(config.MODELS_DIR, run.replace('/', '-'), n_saved=1, create_dir=True,
                                    require_empty=False, score_name='acc',
                                    score_function=lambda engine: engine.state.metrics['accuracy'],
                                    global_step_transform=global_step_from_engine(trainer.engine))
        h_evaluator.engine.add_event_handler(Events.EPOCH_COMPLETED, h_handler, {'m': model})

        # Early stopping
        h_es_handler = EarlyStopping(patience=15,
                                     min_delta=0.0001,
                                     score_function=lambda engine: engine.state.metrics['accuracy'],
                                     trainer=h_trainer.engine, cumulative_delta=True)
        h_es_handler.logger.setLevel(logging.DEBUG)
        h_evaluator.engine.add_event_handler(Events.COMPLETED, h_es_handler)

        return h_trainer, h_train_evaluator, h_evaluator
Beispiel #13
0
def test_pbar_with_tqdm_kwargs(capsys):
    n_epochs = 10
    loader = [1, 2, 3, 4, 5]
    engine = Engine(update_fn)

    pbar = ProgressBar(desc="My description: ")
    pbar.attach(engine, output_transform=lambda x: x)
    engine.run(loader, max_epochs=n_epochs)

    captured = capsys.readouterr()
    err = captured.err.split("\r")
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    expected = "My description:  [10/10]: [4/5]  80%|████████  , output=1 [00:00<00:00]"
    assert err[-1] == expected
Beispiel #14
0
def train():
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    model = Bert_SQG()
    optimizer = AdamW(model.parameters(), lr=3e-5)

    ds = dataloader.BertSQG_DataClass()
    dl = DataLoader(ds, num_workers=4, batch_size=4)
    scheduler = PiecewiseLinear(optimizer, "lr",
                                [(0, 3e-5),
                                 (EPOCHS * len(ds) // BATCH_SIZE, 0.0)])
    metrics = {"nll": Loss(torch.nn.CrossEntropyLoss(ignore_index=-1))}

    def update(engine, batch):
        model.train()
        for i in range(0, len(batch) - 1):
            x = batch[i].to(device)
            y = batch[i + 1].to(device)
            y_prime = model(x)
            loss = criterion(y_prime[-1], y[-1]) / ITERATION_STEP
            loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        if engine.state.iteration % ITERATION_STEP == 0:
            optimizer.step()
            optimizer.zero_grad()
        return loss.item()

    trainer = Engine(update)
    trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
    RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")
    pbar = ProgressBar(persist=True)
    pbar.attach(trainer, metric_names=["loss"])
    tb_logger = TensorboardLogger(log_dir='./logs')
    tb_logger.attach(trainer,
                     log_handler=OutputHandler(tag="training",
                                               metric_names=["loss"]),
                     event_name=Events.ITERATION_COMPLETED)
    tb_logger.attach(trainer,
                     log_handler=OptimizerParamsHandler(optimizer),
                     event_name=Events.ITERATION_STARTED)

    checkpoint_handler = ModelCheckpoint('./checkpoint',
                                         '_checkpoint',
                                         save_interval=1,
                                         n_saved=3)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler,
                              {'bert_sqg': getattr(model, 'module', model)})
    trainer.run(dl, max_epochs=EPOCHS)
    tb_loger.close()
Beispiel #15
0
def test_pbar_with_str_output(capsys):
    n_epochs = 2
    loader = [1, 2]
    engine = Engine(update_fn)

    pbar = ProgressBar()
    pbar.attach(engine, output_transform=lambda x: "red")

    engine.run(loader, max_epochs=n_epochs)

    captured = capsys.readouterr()
    err = captured.err.split('\r')
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    expected = u'Epoch [2/2]: [1/2]  50%|█████     , output=red [00:00<00:00]'
    assert err[-1] == expected
Beispiel #16
0
def test_pbar_with_max_epochs_set_to_one(capsys):
    n_epochs = 1
    loader = [1, 2]
    engine = Engine(update_fn)

    pbar = ProgressBar()
    pbar.attach(engine, ["a"])

    engine.run(loader, max_epochs=n_epochs)

    captured = capsys.readouterr()
    err = captured.err.split("\r")
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    expected = "Iteration: [1/2]  50%|█████     , a=1 [00:00<00:00]"
    assert err[-1] == expected
Beispiel #17
0
def test_pbar_on_epochs(capsys):

    n_epochs = 10
    loader = [1, 2, 3, 4, 5]
    engine = Engine(update_fn)

    pbar = ProgressBar()
    pbar.attach(engine, event_name=Events.EPOCH_STARTED, closing_event_name=Events.COMPLETED)
    engine.run(loader, max_epochs=n_epochs)

    captured = capsys.readouterr()
    err = captured.err.split("\r")
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    actual = err[-1]
    expected = "Epoch: [9/10]  90%|█████████  [00:00<00:00]"
    assert actual == expected
Beispiel #18
0
def test_pbar_on_callable_events(capsys):

    n_epochs = 1
    loader = list(range(100))
    engine = Engine(update_fn)

    pbar = ProgressBar()
    pbar.attach(engine, event_name=Events.ITERATION_STARTED(every=10), closing_event_name=Events.EPOCH_COMPLETED)
    engine.run(loader, max_epochs=n_epochs)

    captured = capsys.readouterr()
    err = captured.err.split("\r")
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    actual = err[-1]
    expected = "Iteration: [90/100]  90%|█████████  [00:00<00:00]"
    assert actual == expected
Beispiel #19
0
def test_pbar(capsys):

    n_epochs = 2
    loader = [1, 2]
    engine = Engine(update_fn)

    pbar = ProgressBar()
    pbar.attach(engine, ['a'])

    engine.run(loader, max_epochs=n_epochs)

    captured = capsys.readouterr()
    err = captured.err.split('\r')
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    expected = u'Epoch 2: [1/2]  50%|█████     , a=1.00e+00 [00:00<00:00]'
    assert err[-1] == expected
def get_tester(option, device):
    pose_encoder = _load_model(PoseEncoder, option.encoder_path, device)
    pose_decoder = _load_model(PoseDecoder, option.decoder_path, device)

    l2_loss = nn.MSELoss()
    l2_loss.to(device)

    output_dir = option.output_dir

    def step(engine, origin_pose):
        origin_pose = origin_pose.to(device)

        z = pose_encoder(origin_pose)
        recon_pose = pose_decoder(z)

        recon_loss = l2_loss(recon_pose, origin_pose)

        return {
            "recon_loss": recon_loss.item(),
            "recon_pose": recon_pose,
            "origin_pose": origin_pose,
            "z": z
        }

    tester = Engine(step)

    RunningAverage(output_transform=lambda x: x["recon_loss"]).attach(tester, 'loss')
    pbar = ProgressBar()
    pbar.attach(tester, metric_names=["loss"])

    @tester.on(Events.ITERATION_COMPLETED)
    def save_result(engine):
        show_pose([
            engine.state.output["origin_pose"],
            engine.state.output["recon_pose"]
        ],
            os.path.join(output_dir, IMG_FNAME.format(iter=engine.state.iteration))
        )

    @tester.on(Events.STARTED)
    def mkdir(engine):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

    return tester
Beispiel #21
0
    def train_model(self, n_epochs, train_loader, val_loader, eval_before_start=True):
        # Attach evaluation to trainer: we evaluate when we start the training and at the end of each epoch
        self.trainer.add_event_handler(Events.EPOCH_COMPLETED, lambda _: self.evaluator.run(val_loader))
        self.trainer.add_event_handler(Events.EPOCH_COMPLETED, lambda _: self.update_epoch())
        if eval_before_start:
            self.trainer.add_event_handler(Events.STARTED, lambda _: self.evaluator.run(val_loader))

        # Linearly decrease the learning rate from lr to zero
        scheduler = PiecewiseLinear(self.optimizer, "lr", [(0, self.lr), (n_epochs * len(train_loader), 0.0)])
        self.trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)

        # Prepare metrics
        RunningAverage(output_transform=lambda x: x).attach(self.trainer, "loss")
        metrics = {"nll": Loss(torch.nn.CrossEntropyLoss(ignore_index=-1), output_transform=lambda x: (x[0][0], x[1][0])),
                   "accuracy": Accuracy(output_transform=lambda x: (x[0][1], x[1][1]))}
        metrics["average_ppl"] = MetricsLambda(math.exp, metrics["nll"])
        for name, metric in metrics.items():
            metric.attach(self.evaluator, name)

        # On the main process: add progress bar, tensorboard, checkpoints and save model
        pbar = ProgressBar(persist=True)
        pbar.attach(self.trainer, metric_names=["loss"])

        if not self.verbose:
            pbar_eval = ProgressBar(persist=False)
            pbar_eval.attach(self.evaluator)

        self.evaluator.add_event_handler(Events.STARTED, lambda _: self.logger.info(f'Beginning validation for epoch {self.epoch}...'))
        self.evaluator.add_event_handler(Events.COMPLETED, lambda _: pbar.log_message("Validation: %s" % pformat(self.evaluator.state.metrics)))

        self.tb_logger.attach(self.trainer, log_handler=OutputHandler(tag="training", metric_names=["loss"]), event_name=Events.ITERATION_COMPLETED)
        self.tb_logger.attach(self.trainer, log_handler=OptimizerParamsHandler(self.optimizer), event_name=Events.ITERATION_STARTED)
        self.tb_logger.attach(self.evaluator, log_handler=OutputHandler(tag="validation", metric_names=list(metrics.keys()), another_engine=self.trainer),
                              event_name=Events.EPOCH_COMPLETED)

        self.trainer.add_event_handler(Events.EPOCH_COMPLETED, self.checkpoint_handler,
                                       {'mymodel': getattr(self.model, 'module', self.model)})  # "getattr" takes care of distributed encapsulation

        # Run the training
        self.trainer.run(train_loader, max_epochs=n_epochs)

        # On the main process: close tensorboard logger and rename the last checkpoint (for easy re-loading with OpenAIGPTModel.from_pretrained method)
        if n_epochs > 0:
            os.rename(self.checkpoint_handler._saved[-1][1][-1], os.path.join(cfg.checkpoint_log_folder, self.name, WEIGHTS_NAME))
            self.tb_logger.close()
Beispiel #22
0
def add_logging_and_checkpoint_saving(trainer,
                                      evaluator,
                                      metrics,
                                      model,
                                      optimizer,
                                      args,
                                      prefix=""):
    """ Add to training engine tensorboard logging, progress bar with average loss, checkpoint saving and save training config. """
    # Add progress bar with average loss
    RunningAverage(output_transform=lambda x: x).attach(
        trainer, prefix + "loss")
    pbar = ProgressBar(persist=True)
    pbar.attach(trainer, metric_names=[prefix + "loss"])
    evaluator.add_event_handler(
        Events.COMPLETED, lambda _: pbar.log_message(
            "Validation: %s" % pformat(evaluator.state.metrics)))

    # Add tensorboard logging with training and evaluation metrics
    tb_logger = TensorboardLogger(log_dir=None)
    tb_logger.attach(trainer,
                     log_handler=OutputHandler(tag="training",
                                               metric_names=[prefix + "loss"]),
                     event_name=Events.ITERATION_COMPLETED)
    tb_logger.attach(trainer,
                     log_handler=OptimizerParamsHandler(optimizer),
                     event_name=Events.ITERATION_STARTED)

    @evaluator.on(Events.COMPLETED)
    def tb_log_metrics(engine):
        for name in metrics.keys():
            tb_logger.writer.add_scalar(name, engine.state.metrics[name],
                                        trainer.state.iteration)

    # Add checkpoint saving after each epoch - take care of distributed encapsulation ('getattr()')
    checkpoint_handler = ModelCheckpoint(tb_logger.writer.log_dir,
                                         'checkpoint',
                                         save_interval=1,
                                         n_saved=3)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler,
                              {'mymodel': getattr(model, 'module', model)})

    # Save training configuration
    torch.save(args, os.path.join(tb_logger.writer.log_dir, CONFIG_NAME))

    return checkpoint_handler, tb_logger
Beispiel #23
0
def test_pbar_output_tensor(capsys):
    loader = [1, 2, 3, 4, 5]

    def update_fn(engine, batch):
        return torch.Tensor([batch, 0])

    engine = Engine(update_fn)

    pbar = ProgressBar(desc="Output tensor")
    pbar.attach(engine, output_transform=lambda x: x)
    engine.run(loader, max_epochs=1)

    captured = capsys.readouterr()
    err = captured.err.split('\r')
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    expected = u'Output tensor: [4/5]  80%|████████  , output_0=5.00e+00, output_1=0.00e+00 [00:00<00:00]'
    assert err[-1] == expected
Beispiel #24
0
def test_pbar_no_metric_names(capsys):

    n_epochs = 2
    loader = [1, 2]
    engine = Engine(update_fn)

    pbar = ProgressBar()
    pbar.attach(engine)

    engine.run(loader, max_epochs=n_epochs)

    captured = capsys.readouterr()
    err = captured.err.split('\r')
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    actual = err[-1]
    expected = u'Epoch [2/2]: [1/2]  50%|█████      [00:00<00:00]'
    assert actual == expected
def assign_event_handlers(trainer, evaluator, val_set):
    pbar = ProgressBar()
    pbar.attach(trainer, ['loss'])

    early_stop = EarlyStopping(patience=2, score_function=lambda e: -e.state.metrics['loss'], trainer=trainer)
    evaluator.add_event_handler(Events.COMPLETED, early_stop)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        print("\nTraining Results - Epoch: {} : Avg loss: {:.3f}"
              .format(trainer.state.epoch, trainer.state.metrics['avg_loss']))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_set)
        metrics_eval = evaluator.state.metrics
        print("Validation Results - Epoch: {} Avg loss: {:.3f}, Avg abs. error: {:.2f}"
              .format(trainer.state.epoch, metrics_eval['loss'], metrics_eval['mae']))
Beispiel #26
0
    def _test(out_tensor, out_msg):
        loader = [1, 2, 3, 4, 5]

        def update_fn(engine, batch):
            return out_tensor

        engine = Engine(update_fn)

        pbar = ProgressBar(desc="Output tensor")
        pbar.attach(engine, output_transform=lambda x: x)
        engine.run(loader, max_epochs=1)

        captured = capsys.readouterr()
        err = captured.err.split("\r")
        err = list(map(lambda x: x.strip(), err))
        err = list(filter(None, err))
        expected = f"Output tensor: [4/5]  80%|████████  , {out_msg} [00:00<00:00]"
        assert err[-1] == expected
Beispiel #27
0
def test_pbar_wrong_events_order():

    engine = Engine(update_fn)
    pbar = ProgressBar()

    with pytest.raises(ValueError, match="should be called before closing event"):
        pbar.attach(engine, event_name=Events.COMPLETED, closing_event_name=Events.COMPLETED)

    with pytest.raises(ValueError, match="should be called before closing event"):
        pbar.attach(engine, event_name=Events.COMPLETED, closing_event_name=Events.EPOCH_COMPLETED)

    with pytest.raises(ValueError, match="should be called before closing event"):
        pbar.attach(engine, event_name=Events.COMPLETED, closing_event_name=Events.ITERATION_COMPLETED)

    with pytest.raises(ValueError, match="should be called before closing event"):
        pbar.attach(engine, event_name=Events.EPOCH_COMPLETED, closing_event_name=Events.EPOCH_COMPLETED)

    with pytest.raises(ValueError, match="should be called before closing event"):
        pbar.attach(engine, event_name=Events.ITERATION_COMPLETED, closing_event_name=Events.ITERATION_STARTED)
Beispiel #28
0
def test_pbar_file(tmp_path):
    n_epochs = 2
    loader = [1, 2]
    engine = Engine(update_fn)

    file_path = tmp_path / "temp.txt"
    file = open(str(file_path), "w+")

    pbar = ProgressBar(file=file)
    pbar.attach(engine, ["a"])
    engine.run(loader, max_epochs=n_epochs)

    file.close()  # Force a flush of the buffer. file.flush() does not work.

    file = open(str(file_path), "r")
    lines = file.readlines()

    expected = "Epoch [2/2]: [1/2]  50%|█████     , a=1 [00:00<00:00]\n"
    assert lines[-2] == expected
Beispiel #29
0
def test_pbar_with_str_output(capsys):
    n_epochs = 2
    loader = [1, 2]
    engine = Engine(update_fn)

    pbar = ProgressBar()
    pbar.attach(engine, output_transform=lambda x: "red")

    engine.run(loader, max_epochs=n_epochs)

    captured = capsys.readouterr()
    err = captured.err.split("\r")
    err = list(map(lambda x: x.strip(), err))
    err = list(filter(None, err))
    if get_tqdm_version() < Version("4.49.0"):
        expected = "Epoch [2/2]: [1/2]  50%|█████     , output=red [00:00<00:00]"
    else:
        expected = "Epoch [2/2]: [1/2]  50%|█████     , output=red [00:00<?]"
    assert err[-1] == expected
Beispiel #30
0
    def attach(training_engine, validation_engine, verbose=VERBOSE_BATCH_WISE):
        """
        """
        def attach_running_average(engine, metric_name):
            RunningAverage(output_transform=lambda x: x[metric_name]).attach(
                engine, metric_name)

        training_metric_names = ["loss", "accuracy", "|param|", "|grad|"]
        for metric_name in training_metric_names:
            attach_running_average(training_engine, metric_name)

        # If the verbosity is set, progress bar would be shown for mini-batch iterations
        if verbose >= VERBOSE_BATCH_WISE:
            pbar = ProgressBar(bar_format=None, ncols=120)
            pbar.attach(training_engine, training_metric_names)

        # If the verbosity is set, statistics would be shown after each epoch
        if verbose >= VERBOSE_BATCH_WISE:

            @training_engine.on(Events.EPOCH_COMPLETED)
            def print_training_logs(engine):
                print(
                    f'Epoch {engine.state.epoch} >> |params|={engine.state.metrics["|param|"]:.2e} |grad|={engine.state.metrics["|grad|"]:.2e} loss={engine.state.metrics["loss"]:.4e} accuracy={engine.state.metrics["accuracy"]:.4f}'
                )

        validation_metric_names = ["loss", "accuracy"]
        for metric_name in validation_metric_names:
            attach_running_average(validation_engine, metric_name)

        # If the verbosity is set, progress bar would be shown for mini-batch iterations
        if verbose >= VERBOSE_BATCH_WISE:
            pbar = ProgressBar(bar_format=None, ncols=120)
            pbar.attach(validation_engine, validation_metric_names)

        # If the verbosity is set, statistics would be shown after each epoch
        if verbose >= VERBOSE_BATCH_WISE:

            @validation_engine.on(Events.EPOCH_COMPLETED)
            def print_validation_logs(engine):
                print(
                    f'Validation >> loss={engine.state.metrics["loss"]:.4e} accuracy={engine.state.metrics["accuracy"]:.4f} best_loss={engine.best_loss:.4e}'
                )