def on_epoch_start(self, runner: IRunner) -> None:
        """
        Set loaders for current epoch.
        If validation is not required then the first loader
        from loaders used in current epoch will be used
        as validation loader.
        Metrics from the latest epoch with true
        validation loader will be used
        in the epochs where this loader is missing.

        Args:
            runner (IRunner): current runner

        Raises:
            ValueError: if there are no loaders in epoch
        """
        epoch_num = runner.epoch
        # loaders to use in current epoch
        epoch_loaders = OrderedDict()
        for name, loader in self.loaders.items():
            period = self.loader_periods.get(name, 1)
            # ignore loaders where period - 0
            if period > 0 and epoch_num % period == 0:
                epoch_loaders[name] = loader
        if len(epoch_loaders) == 0:
            raise ValueError(f"There is no loaders in epoch {epoch_num}!")
        first_loader = next(iter(epoch_loaders.keys()))
        runner.valid_loader = (self.valid_loader if self.valid_loader
                               in epoch_loaders else first_loader)
        runner.loaders = epoch_loaders
Ejemplo n.º 2
0
    def _exit_profiler(self, runner: IRunner) -> None:
        loader_key = runner.loader_key
        epoch = runner.stage_epoch_step

        if not self._should_use_profiler(loader_key,
                                         epoch) or self.profiler is None:
            return

        if self.stats is None:
            self.profiler.__exit__(None, None, None)

            if "on_trace_ready" not in self.profiler_kwargs and self.export_chrome_trace_path:
                self.profiler.export_chrome_trace(
                    self.export_chrome_trace_path)

            if self.export_stacks_kwargs is not None:
                self.profiler.export_stacks(**self.export_stacks_kwargs)

            self.stats = self.profiler.key_averages()
            table_txt = self.stats.table(
                sort_by="cpu_time_total")  # , row_limit=100)

            with TemporaryDirectory() as tmp_dir:
                artifact_path = os.path.join(tmp_dir, "profiler_table.txt")
                with open(artifact_path, "w") as f:
                    f.write(table_txt)
                runner.log_artifact(
                    tag="profiler",
                    artifact="profiler.txt",
                    path_to_artifact=artifact_path,
                )

            print(table_txt)
Ejemplo n.º 3
0
    def on_epoch_end(self, runner: IRunner) -> None:
        """Epoch end hook.

        Args:
            runner (IRunner): current runner
        """
        if runner.stage_name.startswith("infer"):
            return

        runner.valid_metrics = {
            k.replace(f"{runner.valid_loader}_", ""): v
            for k, v in runner.epoch_metrics.items()
            if k.startswith(runner.valid_loader)
        }
        assert (
            runner.main_metric in runner.valid_metrics
        ), f"{runner.main_metric} value is not available by the epoch end"

        current_valid_metric = runner.valid_metrics[runner.main_metric]
        if runner.minimize_metric:
            best_valid_metric = runner.best_valid_metrics.get(
                runner.main_metric, float("+inf"))
            is_best = current_valid_metric < best_valid_metric
        else:
            best_valid_metric = runner.best_valid_metrics.get(
                runner.main_metric, float("-inf"))
            is_best = current_valid_metric > best_valid_metric

        if is_best:
            runner.is_best_valid = True
            runner.best_valid_metrics = runner.valid_metrics.copy()
Ejemplo n.º 4
0
    def on_stage_start(self, runner: IRunner) -> None:
        """Setup model for stage.

        .. note::

            If CheckpointCallback initialized with
            ``resume`` (as path to checkpoint file)
            or ``resume`` (as filename)
            and ``resume_dir`` (as directory with file)
            then will be performed loading checkpoint.

        Args:
            runner (IRunner): current runner
        """
        if runner.device.type == "xla":
            from torch_xla.core.xla_model import save
        else:
            from torch import save
        self._save_fn = save

        if getattr(runner, "resume", None) is not None:
            self.resume = runner.resume
            runner.resume = None
        elif getattr(runner, "autoresume", None) is not None:
            self.resume_dir = runner.logdir / "checkpoints"
            self.resume = f"{runner.autoresume}_full.pth"
            runner.autoresume = None

        for key in self._keys_from_state:
            value = getattr(runner, key, None)
            if value is not None:
                setattr(self, key, value)

        if self.resume_dir is not None:
            self.resume = str(self.resume_dir) + "/" + str(self.resume)

        if self.resume is not None:
            self._load_runner(runner, mapping=self.resume, load_full=True)
            self.resume = None
        else:
            checkpoint_exists = False
            need_load_full = False
            if isinstance(self.load_on_stage_start, str):
                checkpoint_exists = os.path.isfile(
                    "{}/checkpoints/{}.pth".format(runner.logdir,
                                                   self.load_on_stage_start))
                need_load_full = self.load_on_stage_start.endswith("full")
            elif isinstance(self.load_on_stage_start, dict):
                required_files = _required_files(
                    runner.logdir, self.load_on_stage_start).keys()
                checkpoint_exists = all(
                    os.path.isfile(file) for file in required_files)

            if self.load_on_stage_start is not None and checkpoint_exists:
                self._load_runner(
                    runner,
                    mapping=self.load_on_stage_start,
                    load_full=need_load_full,
                )
Ejemplo n.º 5
0
    def on_epoch_start(self, runner: IRunner) -> None:
        """Epoch start hook.

        Args:
            runner (IRunner): current runner
        """
        runner.valid_metrics = defaultdict(None)
        runner.is_best_valid = False
Ejemplo n.º 6
0
def _load_checkpoint(*,
                     filename,
                     runner: IRunner,
                     load_full: bool = True) -> None:
    """
    Load checkpoint from a file.

    Arguments:
        filename (str): path to checkpoint
        runner (IRunner): current runner
        load_full (bool): if true (default) then will be performed
            loading states for criterion, optimizer and scheduler.
            File should contain keys required for
            loading model (``'model_state_dict'``),
            criterion (``'criterion_state_dict'``) (only for full load),
            optimizer (``'optimizer_state_dict'``),
            scheduler (``'scheduler_state_dict'``).

    Raises:
        FileNotFoundError: when file specified in ``filename``
            is not exist.
    """
    if not os.path.isfile(filename):
        raise FileNotFoundError(f"No checkpoint found at {filename}!")

    print(f"=> Loading checkpoint {filename}")
    checkpoint = utils.load_checkpoint(filename)

    if not runner.stage_name.startswith("infer") and load_full:
        runner.stage_name = checkpoint["stage_name"]
        runner.epoch = checkpoint["epoch"]
        runner.global_epoch = checkpoint["global_epoch"]
        # @TODO: should we also load,
        # checkpoint_data, main_metric, minimize_metric, valid_loader ?
        # epoch_metrics, valid_metrics ?

    if load_full:
        utils.unpack_checkpoint(
            checkpoint,
            model=runner.model,
            criterion=runner.criterion,
            optimizer=runner.optimizer,
            scheduler=runner.scheduler,
        )

        print(f"loaded state checkpoint {filename} "
              f"(global epoch {checkpoint['global_epoch']}, "
              f"epoch {checkpoint['epoch']}, "
              f"stage {checkpoint['stage_name']})")
    else:
        utils.unpack_checkpoint(
            checkpoint,
            model=runner.model,
        )

        print(f"loaded model checkpoint {filename}")
Ejemplo n.º 7
0
    def on_epoch_end(self, runner: IRunner) -> None:
        """Store validation metrics and use latest validation score
        when validation loader is not required.

        Args:
            runner (IRunner): current runner
        """
        if self.valid_loader in runner.loaders:
            self.valid_metrics = runner.valid_metrics.copy()
        elif self.valid_metrics is not None:
            # use previous score on validation
            runner.valid_metrics = self.valid_metrics
            runner.is_best_valid = False
Ejemplo n.º 8
0
    def update_optimizer(self, runner: IRunner) -> None:
        """@TODO: Docs. Contribution is welcome.

        Args:
            runner (IRunner): current runner
        """
        lr, momentum = self._update_optimizer(optimizer=self._optimizer)

        if self.optimizer_key is not None:
            runner.batch_metrics[f"lr_{self.optimizer_key}"] = lr
            runner.batch_metrics[f"momentum_{self.optimizer_key}"] = momentum
        else:
            runner.batch_metrics["lr"] = lr
            runner.batch_metrics["momentum"] = momentum
Ejemplo n.º 9
0
    def update_optimizer(self, runner: IRunner) -> None:
        """Update learning rate and momentum in runner.

        Args:
            runner: current runner
        """
        lr, momentum = self._update_optimizer(optimizer=self._optimizer)

        if self.optimizer_key is not None:
            runner.batch_metrics[f"lr_{self.optimizer_key}"] = lr
            runner.batch_metrics[f"momentum_{self.optimizer_key}"] = momentum
        else:
            runner.batch_metrics["lr"] = lr
            runner.batch_metrics["momentum"] = momentum
Ejemplo n.º 10
0
    def on_epoch_end(self, runner: IRunner) -> None:
        """On epoch end event.

        Args:
            runner: current runner
        """
        lr = self._optimizer.param_groups[0]["lr"]
        lr_name = (f"lr/{self.optimizer_key}"
                   if self.optimizer_key is not None else "lr")
        runner.epoch_metrics[lr_name] = lr

        momentum = utils.get_optimizer_momentum(self._optimizer)
        if momentum is not None:
            momentum_name = (f"momentum/{self.optimizer_key}"
                             if self.optimizer_key is not None else "momentum")
            runner.epoch_metrics[momentum_name] = momentum
Ejemplo n.º 11
0
    def on_epoch_start(self, runner: IRunner) -> None:
        """Epoch start hook.

        Args:
            runner (IRunner): current runner
        """
        runner.epoch_metrics = defaultdict(None)
Ejemplo n.º 12
0
    def on_stage_start(self, runner: IRunner) -> None:
        """Stage start hook.

        Args:
            runner: current runner
        """
        self.reduced_metric = self.reduced_metric or runner.main_metric

        scheduler = runner.get_attr(
            key="scheduler", inner_key=self.scheduler_key
        )
        assert scheduler is not None
        self._scheduler = scheduler

        if self.mode is None:
            if isinstance(scheduler, BatchScheduler):
                self.mode = "batch"
            else:
                self.mode = "epoch"

        if (
            isinstance(scheduler, OneCycleLRWithWarmup)
            and self.mode == "batch"
        ):
            scheduler.reset()
        assert self.mode is not None
Ejemplo n.º 13
0
    def on_batch_start(self, runner: IRunner) -> None:
        """Batch start hook.

        Args:
            runner: current runner
        """
        runner.batch_metrics = defaultdict(None)
Ejemplo n.º 14
0
 def on_stage_start(self, runner: IRunner):
     """Checks that the current stage has correct criterion."""
     criterion = runner.get_attr(
         key="criterion", inner_key=self.criterion_key
     )
     assert criterion is not None
     self._criterion = criterion
Ejemplo n.º 15
0
    def on_batch_start(self, runner: IRunner) -> None:
        """Mixes data according to Cutmix algorithm.

        Args:
            runner: current runner
        """
        if not self.is_needed:
            return

        if self.alpha > 0:
            self.lam = np.random.beta(self.alpha, self.alpha)
        else:
            self.lam = 1

        self.index = torch.randperm(runner.input[self.fields[0]].shape[0])
        self.index.to(runner.device)

        bbx1, bby1, bbx2, bby2 = self._rand_bbox(
            runner.input[self.fields[0]].shape, self.lam)

        for f in self.fields:
            runner.input[f][:, :, bbx1:bbx2,
                            bby1:bby2] = runner.input[f][self.index, :,
                                                         bbx1:bbx2, bby1:bby2]

        self.lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) /
                        (runner.input[self.fields[0]].shape[-1] *
                         runner.input[self.fields[0]].shape[-2]))
Ejemplo n.º 16
0
 def on_loader_start(self, runner: IRunner) -> None:
     """Loader start hook.
     Args:
         runner (IRunner): current runner
     """
     runner.loader_metrics = defaultdict(None)
     self.meters = defaultdict(meters.AverageValueMeter)
Ejemplo n.º 17
0
    def on_epoch_end(self, runner: IRunner):
        """Check if iterated specified number of epochs.

        Args:
            runner: current runner
        """
        if runner.epoch >= self.num_epoch_steps:
            runner.need_early_stop = True
Ejemplo n.º 18
0
    def step_batch(self, runner: IRunner) -> None:
        """@TODO: Docs. Contribution is welcome.

        Args:
            runner (IRunner): current runner
        """
        lr, momentum = self._scheduler_step(scheduler=self._scheduler)

        if self.scheduler_key is not None:
            runner.batch_metrics[f"lr/{self.scheduler_key}"] = lr
            if momentum is not None:
                runner.batch_metrics[
                    f"momentum/{self.scheduler_key}"] = momentum
        else:
            runner.batch_metrics["lr"] = lr
            if momentum is not None:
                runner.batch_metrics["momentum"] = momentum
Ejemplo n.º 19
0
 def on_batch_end(self, runner: IRunner) -> None:
     """Batch end hook.
     Args:
         runner (IRunner): current runner
     """
     runner.batch_metrics = self._process_metrics(runner.batch_metrics)
     for key, value in runner.batch_metrics.items():
         self.meters[key].add(value, runner.batch_size)
Ejemplo n.º 20
0
    def step_batch(self, runner: IRunner) -> None:
        """Update learning rate and momentum in runner.

        Args:
            runner (IRunner): current runner
        """
        lr, momentum = self._scheduler_step(scheduler=self._scheduler)

        if self.scheduler_key is not None:
            runner.batch_metrics[f"lr/{self.scheduler_key}"] = lr
            if momentum is not None:
                runner.batch_metrics[
                    f"momentum/{self.scheduler_key}"] = momentum
        else:
            runner.batch_metrics["lr"] = lr
            if momentum is not None:
                runner.batch_metrics["momentum"] = momentum
Ejemplo n.º 21
0
    def on_batch_end(self, runner: IRunner):
        """Check if iterated specified number of batches.

        Args:
            runner: current runner
        """
        if runner.loader_batch_step >= self.num_batch_steps:
            runner.need_early_stop = True
Ejemplo n.º 22
0
    def __init__(
        self,
        input_key: str = "features",
        target_key: str = "target",
        loss_key: str = "loss",
        augemention_prefix: str = "augment",
        projection_prefix: str = "projection",
        embedding_prefix: str = "embedding",
    ):
        """Init."""
        IRunner.__init__(self)

        self._target_key = target_key
        self._loss_key = loss_key
        self._projection_prefix = projection_prefix
        self._augemention_prefix = augemention_prefix
        self._embedding_prefix = embedding_prefix
        self._input_key = input_key
Ejemplo n.º 23
0
    def on_stage_start(self, runner: IRunner) -> None:
        """Checks that the current stage has correct optimizer.

        Args:
            runner(IRunner): current runner
        """
        self._optimizer = runner.get_attr(key="optimizer",
                                          inner_key=self.optimizer_key)
        assert self._optimizer is not None
Ejemplo n.º 24
0
    def on_exception(self, runner: IRunner):
        """Called if an Exception was raised."""
        exception = runner.exception
        if not utils.is_exception(exception):
            return

        if isinstance(exception, KeyboardInterrupt):
            self.tqdm.write("Early exiting")
            runner.need_exception_reraise = False
Ejemplo n.º 25
0
 def on_loader_end(self, runner: IRunner) -> None:
     """Loader end hook.
     Args:
         runner (IRunner): current runner
     """
     for key, value in self.meters.items():
         value = value.mean
         runner.loader_metrics[key] = value
     for key, value in runner.loader_metrics.items():
         runner.epoch_metrics[f"{runner.loader_name}_{key}"] = value
Ejemplo n.º 26
0
    def step_epoch(self, runner: IRunner) -> None:
        """@TODO: Docs. Contribution is welcome.

        Args:
            runner (IRunner): current runner
        """
        reduced_metric = runner.valid_metrics[self.reduced_metric]
        lr, momentum = self._scheduler_step(scheduler=self._scheduler,
                                            reduced_metric=reduced_metric)

        if self.scheduler_key is not None:
            runner.epoch_metrics[f"lr/{self.scheduler_key}"] = lr
            if momentum is not None:
                runner.epoch_metrics[
                    f"momentum/{self.scheduler_key}"] = momentum
        else:
            runner.epoch_metrics["lr"] = lr
            if momentum is not None:
                runner.epoch_metrics["momentum"] = momentum
Ejemplo n.º 27
0
    def step_epoch(self, runner: IRunner) -> None:
        """Update momentum in runner.

        Args:
            runner: current runner
        """
        reduced_metric = runner.valid_metrics[self.reduced_metric]
        lr, momentum = self._scheduler_step(scheduler=self._scheduler,
                                            reduced_metric=reduced_metric)

        if self.scheduler_key is not None:
            runner.epoch_metrics[f"lr/{self.scheduler_key}"] = lr
            if momentum is not None:
                runner.epoch_metrics[
                    f"momentum/{self.scheduler_key}"] = momentum
        else:
            runner.epoch_metrics["lr"] = lr
            if momentum is not None:
                runner.epoch_metrics["momentum"] = momentum
Ejemplo n.º 28
0
    def on_epoch_end(self, runner: IRunner) -> None:
        """On epoch end event.

        Args:
            runner (IRunner): current runner
        """
        if self.decouple_weight_decay:
            for i, wd in enumerate(self._optimizer_wd):
                self._optimizer.param_groups[i]["weight_decay"] = wd

        lr = self._optimizer.param_groups[0]["lr"]
        lr_name = (f"lr/{self.optimizer_key}"
                   if self.optimizer_key is not None else "lr")
        runner.epoch_metrics[lr_name] = lr

        momentum = utils.get_optimizer_momentum(self._optimizer)
        if momentum is not None:
            momentum_name = (f"momentum/{self.optimizer_key}"
                             if self.optimizer_key is not None else "momentum")
            runner.epoch_metrics[momentum_name] = momentum
Ejemplo n.º 29
0
    def on_stage_start(self, runner: IRunner) -> None:
        """Stage start hook.

        Args:
            runner (IRunner): current runner
        """
        optimizer = runner.get_attr(key="optimizer",
                                    inner_key=self.optimizer_key)
        assert optimizer is not None
        self._optimizer = optimizer
        self.init_lr = optimizer.defaults["lr"]
    def on_epoch_end(self, runner: IRunner) -> None:
        """Check if validation metric should be
        dropped for current epoch.

        Args:
            runner (IRunner): current runner
        """
        valid_metric_name = f"{runner.valid_loader}_{runner.main_metric}"
        if self.valid_loader not in runner.loaders:
            runner.epoch_metrics[valid_metric_name] = (
                float("+inf") if runner.minimize_metric else float("-inf"))