Ejemplo n.º 1
0
def main_worker(args, unknown_args):
    """Runs main worker thread from model training."""
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seed(args.seed)
    prepare_cudnn(args.deterministic, args.benchmark)

    config.setdefault("distributed_params", {})["apex"] = args.apex
    config.setdefault("distributed_params", {})["amp"] = args.amp
    expdir = Path(args.expdir)

    # optuna objective
    def objective(trial: optuna.trial):
        trial, trial_config = _process_trial_config(trial, config.copy())
        experiment, runner, trial_config = prepare_config_api_components(
            expdir=expdir, config=trial_config)
        # @TODO: here we need better solution.
        experiment._trial = trial  # noqa: WPS437

        if experiment.logdir is not None and get_rank() <= 0:
            dump_environment(trial_config, experiment.logdir, args.configs)
            dump_code(args.expdir, experiment.logdir)

        runner.run_experiment(experiment)

        return runner.best_valid_metrics[runner.main_metric]

    # optuna direction
    direction = ("minimize" if config.get("stages", {}).get(
        "stage_params", {}).get("minimize_metric", True) else "maximize")

    # optuna study
    study_params = config.pop("study_params", {})

    # optuna sampler
    sampler_params = study_params.pop("sampler_params", {})
    optuna_sampler_type = sampler_params.pop("sampler", None)
    optuna_sampler = (optuna.samplers.__dict__[optuna_sampler_type](
        **sampler_params) if optuna_sampler_type is not None else None)

    # optuna pruner
    pruner_params = study_params.pop("pruner_params", {})
    optuna_pruner_type = pruner_params.pop("pruner", None)
    optuna_pruner = (optuna.pruners.__dict__[optuna_pruner_type](
        **pruner_params) if optuna_pruner_type is not None else None)

    study = optuna.create_study(
        direction=direction,
        storage=args.storage or study_params.pop("storage", None),
        study_name=args.study_name or study_params.pop("study_name", None),
        sampler=optuna_sampler,
        pruner=optuna_pruner,
    )
    study.optimize(
        objective,
        n_trials=args.n_trials,
        timeout=args.timeout,
        n_jobs=args.n_jobs or 1,
        gc_after_trial=args.gc_after_trial,
        show_progress_bar=args.show_progress_bar,
    )
Ejemplo n.º 2
0
    def on_epoch_start(self, runner: "IRunner"):
        """Event handler for epoch start.

        Args:
            runner: IRunner instance.

        Raises:
            RunnerException: if current DataLoader is empty.
        """
        assert self.loaders is not None

        for loader_key, loader in self.loaders.items():
            if len(loader) == 0:
                raise RunnerException(
                    f"DataLoader with name {loader_key} is empty.")

        if not self.is_infer_stage:
            assert self.valid_loader in self.loaders.keys(), (
                f"'{self.valid_loader}' "
                f"should be in provided loaders: {list(self.loaders.keys())}")
        else:
            assert not any(
                x.startswith(SETTINGS.loader_train_prefix)
                for x in self.loaders.keys()
            ), "for inference no train loader should be passed"

        set_global_seed(self.experiment.initial_seed + self.global_epoch + 1)
Ejemplo n.º 3
0
    def on_loader_start(self, runner: "IRunner"):
        """Event handler for loader start.

        Args:
            runner: IRunner instance.

        Raises:
            RunnerException: if current DataLoader is empty.
        """
        assert self.loader is not None

        self.loader_len = len(self.loader)
        if self.loader_len == 0:
            raise RunnerException(
                f"DataLoader with name {self.loader_key} is empty.")
        self.loader_batch_size = (self.loader.batch_sampler.batch_size
                                  if self.loader.batch_sampler is not None else
                                  self.loader.batch_size)
        self.loader_sample_step = 0

        self.is_train_loader = self.loader_key.startswith(
            SETTINGS.loader_train_prefix)
        self.is_valid_loader = self.loader_key.startswith(
            SETTINGS.loader_valid_prefix)
        self.is_infer_loader = self.loader_key.startswith(
            SETTINGS.loader_infer_prefix)
        maybe_recursive_call(self.model, "train", mode=self.is_train_loader)

        if isinstance(self.loader.sampler, DistributedSampler):
            self.loader.sampler.set_epoch(self.epoch)

        set_global_seed(self.experiment.initial_seed + self.global_epoch + 1)
Ejemplo n.º 4
0
 def _setup_callbacks(self):
     set_global_seed(self.seed + max(0, self.engine.rank) + self.global_epoch_step)
     callbacks = self.get_callbacks(self.stage_key)
     callbacks = filter_callbacks_by_node(callbacks)
     callbacks = sort_callbacks_by_order(callbacks)
     self.callbacks = callbacks
     self._check_callbacks()
Ejemplo n.º 5
0
    def _setup_components(self) -> None:
        set_global_seed(self.seed + max(0, self.engine.process_index) +
                        self.epoch_step)
        self.model = self._setup_model()
        self.criterion = self._setup_criterion()
        self.optimizer = self._setup_optimizer(model=self.model)
        self.scheduler = self._setup_scheduler(optimizer=self.optimizer)

        if isinstance(self.model, torch.nn.Module):
            self.model = self.engine.prepare(self.model)
        elif isinstance(self.model, dict):
            self.model = {
                k: self.engine.prepare(v)
                for k, v in self.model.items()
            }
        else:
            raise NotImplementedError()

        if isinstance(self.optimizer, torch.optim.Optimizer):
            self.optimizer = self.engine.prepare(self.optimizer)
        elif isinstance(self.optimizer, dict):
            self.optimizer = {
                k: self.engine.prepare(v)
                for k, v in self.optimizer.items()
            }
        elif self.optimizer is None:
            pass
        else:
            raise NotImplementedError()
Ejemplo n.º 6
0
 def _setup_loaders(self) -> None:
     set_global_seed(self.seed + max(0, self.engine.process_index) +
                     self.epoch_step)
     loaders = self.get_loaders()
     self.loaders = {
         key: self.engine.prepare(value)
         for key, value in loaders.items()
     }
Ejemplo n.º 7
0
 def _setup_components(self) -> None:
     set_global_seed(self.seed + self.engine.rank + self.global_epoch_step)
     self.model, self.criterion, self.optimizer, self.scheduler = self.engine.init_components(
         model_fn=self._get_model,
         criterion_fn=self._get_criterion,
         optimizer_fn=self._get_optimizer,
         scheduler_fn=self._get_scheduler,
     )
Ejemplo n.º 8
0
def main(args, unknown_args):
    """Runs the ``catalyst-dl tune`` script."""
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seed(args.seed)
    prepare_cudnn(args.deterministic, args.benchmark)

    # optuna objective
    def objective(trial: optuna.trial):
        trial, trial_config = _process_trial_config(trial, config.copy())
        runner: ConfigRunner = get_config_runner(expdir=Path(args.expdir), config=trial_config)
        # @TODO: here we need better solution.
        runner._trial = trial  # noqa: WPS437

        if get_rank() <= 0:
            dump_environment(logdir=runner.logdir, config=config, configs_path=args.configs)
            dump_code(expdir=args.expdir, logdir=runner.logdir)

        runner.run()

        return trial.best_score

    # optuna study
    study_params = config.pop("study", {})

    # optuna sampler
    sampler_params = study_params.pop("sampler", {})
    optuna_sampler_type = sampler_params.pop("_target_", None)
    optuna_sampler = (
        optuna.samplers.__dict__[optuna_sampler_type](**sampler_params)
        if optuna_sampler_type is not None
        else None
    )

    # optuna pruner
    pruner_params = study_params.pop("pruner", {})
    optuna_pruner_type = pruner_params.pop("_target_", None)
    optuna_pruner = (
        optuna.pruners.__dict__[optuna_pruner_type](**pruner_params)
        if optuna_pruner_type is not None
        else None
    )

    study = optuna.create_study(
        direction=args.direction or study_params.pop("direction", "minimize"),
        storage=args.storage or study_params.pop("storage", None),
        study_name=args.study_name or study_params.pop("study_name", None),
        sampler=optuna_sampler,
        pruner=optuna_pruner,
        **study_params,
    )
    study.optimize(
        objective,
        n_trials=args.n_trials,
        timeout=args.timeout,
        n_jobs=args.n_jobs or 1,
        gc_after_trial=args.gc_after_trial,
        show_progress_bar=args.show_progress_bar,
    )
Ejemplo n.º 9
0
 def _prepare_seed(self):
     seed = self._seed + random.randrange(_SEED_RANGE)
     set_global_seed(seed)
     if self.seeds is None:
         seed = random.randrange(_SEED_RANGE)
     else:
         seed = random.choice(self.seeds)
     set_global_seed(seed)
     return seed
Ejemplo n.º 10
0
    def on_stage_start(self, runner: "IRunner"):
        """Event handler for stage start.

        Args:
            runner: IRunner instance.
        """
        assert self.stage is not None

        set_global_seed(self.experiment.initial_seed + self.global_epoch + 1)
Ejemplo n.º 11
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        resume: str = None,
        fp16: Union[Dict, bool] = None,
        initial_seed: int = 42,
    ) -> Generator:
        """
        Runs model inference on PyTorch Dataloader and returns
        python generator with model predictions from `runner.predict_batch`.
        Cleans up the experiment info to avoid possible collisions.
        Sets `is_train_loader` and `is_valid_loader` to `False` while
        keeping `is_infer_loader` as True. Moves model to evaluation mode.

        Args:
            loader: loader to predict
            model: model to use for prediction
            resume: path to checkpoint to resume
            fp16 (Union[Dict, bool]): fp16 settings (same as in `train`)
            initial_seed: seed to use before prediction

        Yields:
            bathes with model predictions
        """
        fp16 = _resolve_bool_fp16(fp16)

        if model is not None:
            self.model = model
        assert self.model is not None

        if resume is not None:
            checkpoint = load_checkpoint(resume)
            unpack_checkpoint(checkpoint, model=self.model)

        self.experiment = None
        set_global_seed(initial_seed)
        (model, _, _, _, device) = process_components(  # noqa: WPS122
            model=self.model,
            distributed_params=fp16,
            device=self.device,
        )
        self._prepare_inner_state(
            stage="infer",
            model=model,
            device=device,
            is_train_loader=False,
            is_valid_loader=False,
            is_infer_loader=True,
        )
        maybe_recursive_call(self.model, "train", mode=False)

        set_global_seed(initial_seed)
        for batch in loader:
            yield self.predict_batch(batch)
Ejemplo n.º 12
0
def run_sampler(
    *,
    config,
    logdir,
    algorithm_fn,
    environment_fn,
    sampler_fn,
    vis,
    infer,
    seed=42,
    id=None,
    resume=None,
    db=True,
    exploration_power=1.0,
    sync_epoch=False
):
    config_ = copy.deepcopy(config)
    id = 0 if id is None else id
    set_global_seed(seed + id)

    db_server = DATABASES.get_from_params(
        **config.get("db", {}), sync_epoch=sync_epoch
    ) if db else None

    env = environment_fn(**config_["environment"], visualize=vis)
    agent = algorithm_fn.prepare_for_sampler(env_spec=env, config=config_)

    exploration_params = config_["sampler"].pop("exploration_params", None)
    exploration_handler = ExplorationHandler(env=env, *exploration_params) \
        if exploration_params is not None \
        else None
    if exploration_handler is not None:
        exploration_handler.set_power(exploration_power)

    mode = "infer" if infer else "train"
    valid_seeds = config_["sampler"].pop("valid_seeds")
    seeds = valid_seeds if infer else None

    sampler = sampler_fn(
        agent=agent,
        env=env,
        db_server=db_server,
        exploration_handler=exploration_handler,
        **config_["sampler"],
        logdir=logdir,
        id=id,
        mode=mode,
        seeds=seeds
    )

    if resume is not None:
        sampler.load_checkpoint(filepath=resume)

    sampler.run()
Ejemplo n.º 13
0
    def on_experiment_start(self, runner: "IRunner"):
        """Event handler for experiment start.

        Args:
            runner: IRunner instance.

        .. note::
            This event work only on IRunner.
        """
        assert self.experiment is not None

        set_global_seed(self.experiment.initial_seed + self.global_epoch + 1)
Ejemplo n.º 14
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: TorchModel = None,
        engine: Union["Engine", str] = None,
        seed: int = 42,
        # extra info
        resume: str = None,
        # engine extra params,
        cpu: bool = False,
        fp16: bool = False,
    ) -> Generator:
        """
        Runs model inference on PyTorch DataLoader and returns
        python generator with model predictions from `runner.predict_batch`.

        Args:
            loader: loader to predict
            model: model to use for prediction
            engine: engine to use for prediction
            seed: random seed to use before prediction
            resume: path to checkpoint for model
            cpu: boolean flag to force CPU usage
            fp16: boolean flag to use half-precision

        Yields:
            bathes with model predictions

        .. note::
            Please follow the `minimal examples`_ sections for use cases.

            .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples  # noqa: E501, W505
        """
        self.engine = engine or get_available_engine(cpu=cpu, fp16=fp16)

        if model is not None:
            self.model = model
        assert self.model is not None

        if resume is not None:
            self.engine.wait_for_everyone()
            unwrapped_model = self.engine.unwrap_model(self.model)
            unwrapped_model.load_state_dict(load_checkpoint(resume))

        self.model = self.engine.prepare(self.model)
        maybe_recursive_call(self.model, "train", mode=False)
        loader = self.engine.prepare(loader)

        set_global_seed(seed)
        for batch in loader:
            yield self.predict_batch(batch)
Ejemplo n.º 15
0
def config_main(args, unknown_args):
    """Yaml config catalyst-dl run entry point."""
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seed(args.seed)
    prepare_cudnn(args.deterministic, args.benchmark)

    runner: ConfigRunner = get_config_runner(expdir=args.expdir, config=config)

    if get_rank() <= 0:
        dump_environment(logdir=runner.logdir, config=config, configs_path=args.configs)
        dump_code(expdir=args.expdir, logdir=runner.logdir)

    runner.run()
Ejemplo n.º 16
0
    def on_epoch_start(self, runner: "IRunner"):
        """Event handler."""
        self.global_epoch_step += 1
        self.stage_epoch_step += 1
        self.epoch_metrics: Dict = defaultdict(None)
        # storage for pure epoch-based metrics, like lr/momentum
        self.epoch_metrics["_epoch_"] = {}

        assert self.loaders is not None
        for loader_key, loader in self.loaders.items():
            if len(loader) == 0:
                raise RunnerException(f"DataLoader with name {loader_key} is empty.")
        set_global_seed(self.seed + self.engine.rank + self.global_epoch_step)
Ejemplo n.º 17
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seed(args.seed)

    Experiment, Runner = import_experiment_and_runner(Path(args.expdir))

    experiment = Experiment(config)
    runner = Runner()

    if experiment.logdir is not None:
        dump_config(config, experiment.logdir, args.configs)
        dump_code(args.expdir, experiment.logdir)

    runner.run_experiment(experiment, check=args.check)
Ejemplo n.º 18
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        engine: Union["IEngine", str] = None,
        seed: int = 42,
        # engine extra params,
        fp16: bool = False,
        amp: bool = False,
        apex: bool = False,
        ddp: bool = False,
    ) -> Generator:
        """
        Runs model inference on PyTorch DataLoader and returns
        python generator with model predictions from `runner.predict_batch`.

        Args:
            loader: loader to predict
            model: model to use for prediction
            engine: engine to use for prediction
            seed: random seed to use before prediction
            fp16: boolean flag to use half-precision training (AMP > APEX)
            amp: boolean flag to use amp half-precision
            apex: boolean flag to use apex half-precision
            ddp: if `True` will start training in distributed mode.
                Note: Works only with python scripts. No jupyter support.

        Yields:
            bathes with model predictions
        """
        self._engine = engine or get_available_engine(
            fp16=fp16, ddp=ddp, amp=amp, apex=apex)

        if model is not None:
            self.model = model
        assert self.model is not None

        # if resume is not None:
        #     checkpoint = load_checkpoint(resume)
        #     unpack_checkpoint(checkpoint, model=self.model)

        self.model = self.engine.sync_device(self.model)
        maybe_recursive_call(self.model, "train", mode=False)

        set_global_seed(seed)
        for batch in loader:
            yield self.predict_batch(batch)
Ejemplo n.º 19
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seed(args.seed)

    if args.logdir is not None:
        os.makedirs(args.logdir, exist_ok=True)
        dump_config(config, args.logdir, args.configs)

    if args.expdir is not None:
        module = import_module(expdir=args.expdir)  # noqa: F841

    env = ENVIRONMENTS.get_from_params(**config["environment"])

    algorithm_name = config["algorithm"].pop("algorithm")
    if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES:
        ALGORITHMS = OFFPOLICY_ALGORITHMS
        trainer_fn = OffpolicyTrainer
        sync_epoch = False
        weights_sync_mode = "critic" if env.discrete_actions else "actor"
    elif algorithm_name in ONPOLICY_ALGORITHMS_NAMES:
        ALGORITHMS = ONPOLICY_ALGORITHMS
        trainer_fn = OnpolicyTrainer
        sync_epoch = True
        weights_sync_mode = "actor"
    else:
        # @TODO: add registry for algorithms, trainers, samplers
        raise NotImplementedError()

    db_server = DATABASES.get_from_params(
        **config.get("db", {}), sync_epoch=sync_epoch
    )

    algorithm_fn = ALGORITHMS.get(algorithm_name)
    algorithm = algorithm_fn.prepare_for_trainer(env_spec=env, config=config)

    if args.resume is not None:
        algorithm.load_checkpoint(filepath=args.resume)

    trainer = trainer_fn(
        algorithm=algorithm,
        env_spec=env,
        db_server=db_server,
        logdir=args.logdir,
        weights_sync_mode=weights_sync_mode,
        **config["trainer"],
    )

    trainer.run()
Ejemplo n.º 20
0
def main_worker(cfg: DictConfig):
    set_global_seed(cfg.args.seed)
    prepare_cudnn(cfg.args.deterministic, cfg.args.benchmark)

    import_module(hydra.utils.to_absolute_path(cfg.args.expdir))

    experiment = hydra.utils.instantiate(cfg.experiment, cfg=cfg)
    runner = hydra.utils.instantiate(cfg.runner)

    if experiment.logdir is not None and get_rank() <= 0:
        dump_environment(cfg, experiment.logdir)
        dump_code(
            hydra.utils.to_absolute_path(cfg.args.expdir), experiment.logdir
        )

    runner.run_experiment(experiment)
Ejemplo n.º 21
0
def main(args, unknown_args):
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seed(args.seed)

    if args.logdir is not None:
        os.makedirs(args.logdir, exist_ok=True)
        dump_config(args.configs, args.logdir)

    if args.expdir is not None:
        module = import_module(expdir=args.expdir)  # noqa: F841

    algorithm_name = config["algorithm"].pop("algorithm")
    if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES:
        ALGORITHMS = OFFPOLICY_ALGORITHMS
        trainer_fn = OffpolicyTrainer
        sync_epoch = False
    else:
        ALGORITHMS = ONPOLICY_ALGORITHMS
        trainer_fn = OnpolicyTrainer
        sync_epoch = True

    db_server = DATABASES.get_from_params(**config.get("db", {}),
                                          sync_epoch=sync_epoch)

    env = ENVIRONMENTS.get_from_params(**config["environment"])

    algorithm_fn = ALGORITHMS.get(algorithm_name)
    algorithm = algorithm_fn.prepare_for_trainer(env_spec=env, config=config)

    if args.resume is not None:
        algorithm.load_checkpoint(filepath=args.resume)

    trainer = trainer_fn(
        algorithm=algorithm,
        env_spec=env,
        db_server=db_server,
        **config["trainer"],
        logdir=args.logdir,
    )

    def on_exit():
        for p in trainer.get_processes():
            p.terminate()

    atexit.register(on_exit)

    trainer.run()
Ejemplo n.º 22
0
    def __init__(
        self,
        agent: Union[ActorSpec, CriticSpec],
        env: EnvironmentSpec,
        db_server: DBSpec = None,
        exploration_handler: ExplorationHandler = None,
        logdir: str = None,
        id: int = 0,
        mode: str = "infer",
        buffer_size: int = int(1e4),
        weights_sync_period: int = 1,
        seeds: List = None,
        episode_limit: int = None,
        force_store: bool = False,
        gc_period: int = 10,
    ):
        self._device = UtilsFactory.prepare_device()
        self._seed = 42 + id
        set_global_seed(self._seed)
        self._sampler_id = id

        self._infer = mode == "infer"
        self.seeds = seeds

        # logging
        self._prepare_logger(logdir, mode)

        # environment, model, exploration & action handlers
        self.env = env
        self.agent = agent
        self.exploration_handler = exploration_handler
        self.episode_index = 0
        self.episode_runner = EpisodeRunner(env=self.env,
                                            agent=self.agent,
                                            device=self._device,
                                            capacity=buffer_size,
                                            deterministic=self._infer)

        # synchronization configuration
        self.db_server = db_server
        self.weights_sync_period = weights_sync_period
        self.episode_limit = episode_limit or _BIG_NUM
        self._force_store = force_store
        self._sampler_weight_mode = \
            "critic" if env.discrete_actions else "actor"
        self._gc_period = gc_period
Ejemplo n.º 23
0
def main_worker(args, unknown_args):
    """Runs main worker thread from model training."""
    args, config = parse_args_uargs(args, unknown_args)
    set_global_seed(args.seed)
    prepare_cudnn(args.deterministic, args.benchmark)

    config.setdefault("distributed_params", {})["apex"] = args.apex
    config.setdefault("distributed_params", {})["amp"] = args.amp

    experiment, runner, config = prepare_config_api_components(expdir=Path(
        args.expdir),
                                                               config=config)

    if experiment.logdir is not None and get_rank() <= 0:
        dump_environment(config, experiment.logdir, args.configs)
        dump_code(args.expdir, experiment.logdir)

    runner.run_experiment(experiment)
Ejemplo n.º 24
0
def main(args, _=None):
    """Run the ``catalyst-contrib image2embeddings`` script."""
    global IMG_SIZE

    set_global_seed(args.seed)
    prepare_cudnn(args.deterministic, args.benchmark)

    IMG_SIZE = (args.img_size, args.img_size)  # noqa: WPS442

    if args.traced_model is not None:
        device = get_device()
        model = torch.jit.load(str(args.traced_model), map_location=device)
    else:
        model = ResnetEncoder(arch=args.arch, pooling=args.pooling)
        model = model.eval()
        model, _, _, _, device = process_components(model=model)

    df = pd.read_csv(args.in_csv)
    df = df.reset_index().drop("index", axis=1)
    df = list(df.to_dict("index").values())

    open_fn = ImageReader(input_key=args.img_col,
                          output_key="image",
                          rootpath=args.rootpath)

    dataloader = get_loader(
        df,
        open_fn,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        dict_transform=dict_transformer,
    )

    features = []
    dataloader = tqdm(dataloader) if args.verbose else dataloader
    with torch.no_grad():
        for batch in dataloader:
            batch_features = model(batch["image"].to(device))
            batch_features = batch_features.cpu().detach().numpy()
            features.append(batch_features)

    features = np.concatenate(features, axis=0)
    np.save(args.out_npy, features)
Ejemplo n.º 25
0
def main(cfg: DictConfig):
    """
    Hydra config catalyst-dl run entry point

    Args:
        cfg: (DictConfig) configuration

    """
    cfg = prepare_hydra_config(cfg)
    set_global_seed(cfg.args.seed)
    prepare_cudnn(cfg.args.deterministic, cfg.args.benchmark)

    import_module(hydra.utils.to_absolute_path(cfg.args.expdir))
    runner = hydra.utils.instantiate(cfg.runner, cfg=cfg)

    if get_rank() <= 0:
        dump_environment(logdir=runner.logdir, config=cfg)
        dump_code(expdir=hydra.utils.to_absolute_path(cfg.args.expdir),
                  logdir=runner.logdir)

    runner.run()
Ejemplo n.º 26
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        engine: Union["IEngine", str] = None,
        seed: int = 42,
    ) -> Generator:
        """
        Runs model inference on PyTorch DataLoader and returns
        python generator with model predictions from `runner.predict_batch`.

        Args:
            loader: loader to predict
            model: model to use for prediction
            engine: engine to use for prediction
            seed: random seed to use before prediction

        Yields:
            bathes with model predictions
        """
        if engine is not None:
            self.engine = engine
        if self.engine is None:
            self.engine = get_available_engine()

        if model is not None:
            self.model = model
        assert self.model is not None

        # if resume is not None:
        #     checkpoint = load_checkpoint(resume)
        #     unpack_checkpoint(checkpoint, model=self.model)

        self.model = self.engine.sync_device(self.model)
        maybe_recursive_call(self.model, "train", mode=False)

        set_global_seed(seed)
        for batch in loader:
            yield self.predict_batch(batch)
Ejemplo n.º 27
0
    def on_loader_start(self, runner: "IRunner"):
        """Event handler."""
        assert self.loader is not None
        self.is_train_loader: bool = self.loader_key.startswith("train")
        self.is_valid_loader: bool = self.loader_key.startswith("valid")
        self.is_infer_loader: bool = self.loader_key.startswith("infer")
        assert self.is_train_loader or self.is_valid_loader or self.is_infer_loader
        self.loader_batch_size: int = _get_batch_size(self.loader)
        self.loader_batch_len: int = len(self.loader)
        self.loader_sample_len: int = len(self.loader.dataset)
        self.loader_batch_step: int = 0
        self.loader_sample_step: int = 0
        self.loader_metrics: Dict = defaultdict(None)

        if self.loader_batch_len == 0:
            raise NotImplementedError(f"DataLoader with name {self.loader_key} is empty.")
        set_global_seed(self.seed + self.engine.rank + self.global_epoch_step)

        maybe_recursive_call(self.model, "train", mode=self.is_train_loader)
        if isinstance(self.loader.sampler, DistributedSampler):
            self.loader.sampler.set_epoch(self.stage_epoch_step)
        self.loader = self.engine.autocast_loader(self.loader)
Ejemplo n.º 28
0
    def on_loader_start(self, runner: "IRunner"):
        """Event handler."""
        assert self.loader is not None
        self.is_train_loader: bool = self.loader_key.startswith("train")
        self.is_valid_loader: bool = self.loader_key.startswith("valid")
        self.is_infer_loader: bool = self.loader_key.startswith("infer")
        assert self.is_train_loader or self.is_valid_loader or self.is_infer_loader
        self.loader_batch_size: int = get_loader_batch_size(self.loader)
        self.loader_batch_len: int = len(self.loader)
        self.loader_sample_len: int = get_loader_num_samples(self.loader)
        self.loader_batch_step: int = 0
        self.loader_sample_step: int = 0
        self.loader_metrics: Dict = defaultdict(None)

        if self.loader_batch_len == 0:
            raise IRunnerError(
                f"DataLoader with name {self.loader_key} is empty.")
        set_global_seed(self.seed + max(0, self.engine.process_index) +
                        self.epoch_step)

        maybe_recursive_call(self.model, "train", mode=self.is_train_loader)
        if isinstance(self.loader.sampler, DistributedSampler):
            self.loader.sampler.set_epoch(self.epoch_step)
Ejemplo n.º 29
0
 def _get_loader(
     dataset: Dataset,
     sampler: Sampler,
     initial_seed: int,
     params: DictConfig,
 ) -> DataLoader:
     params = OmegaConf.to_container(params, resolve=True)
     per_gpu_scaling = params.pop("per_gpu_scaling", False)
     params["dataset"] = dataset
     distributed_rank = get_rank()
     distributed = distributed_rank > -1
     if per_gpu_scaling and not distributed:
         num_gpus = max(1, torch.cuda.device_count())
         assert ("batch_size"
                 in params), "loader config must contain 'batch_size' key"
         assert ("num_workers"
                 in params), "loader config must contain 'num_workers' key"
         params["batch_size"] *= num_gpus
         params["num_workers"] *= num_gpus
     if distributed:
         if sampler is not None:
             if not isinstance(sampler, DistributedSampler):
                 sampler = DistributedSamplerWrapper(sampler=sampler)
         else:
             sampler = DistributedSampler(dataset=params["dataset"])
     params["shuffle"] = params.get("shuffle", False) and sampler is None
     params["sampler"] = sampler
     worker_init_fn = params.pop("worker_init_fn", None)
     if worker_init_fn is None:
         params["worker_init_fn"] = lambda x: set_global_seed(initial_seed +
                                                              x)
     else:
         params["worker_init_fn"] = hydra.utils.get_method(worker_init_fn)
     collate_fn = params.pop("collate_fn", None)
     if collate_fn is None:
         params["collate_fn"] = None
     else:
         params["collate_fn"] = hydra.utils.get_method(collate_fn)
     loader: DataLoader = DataLoader(**params)
     return loader
Ejemplo n.º 30
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        engine: Union["IEngine", str] = None,
        seed: int = 42,
        # engine extra params,
        fp16: bool = False,
        amp: bool = False,
        apex: bool = False,
        ddp: bool = False,
    ) -> Generator:
        """
        Runs model inference on PyTorch DataLoader and returns
        python generator with model predictions from `runner.predict_batch`.

        Args:
            loader: loader to predict
            model: model to use for prediction
            engine: engine to use for prediction
            seed: random seed to use before prediction
            fp16: boolean flag to use half-precision training (AMP > APEX)
            amp: boolean flag to use amp half-precision
            apex: boolean flag to use apex half-precision
            ddp: if `True` will start training in distributed mode.
                Note: Works only with python scripts. No jupyter support.

        Yields:
            bathes with model predictions

        .. note::
            Please follow the `minimal examples`_ sections for use cases.

            .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples

        Examples:

        .. code-block:: python

            import os
            from torch import nn, optim
            from torch.nn import functional as F
            from torch.utils.data import DataLoader
            from catalyst import dl, metrics
            from catalyst.data.transforms import ToTensor
            from catalyst.contrib.datasets import MNIST

            model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
            optimizer = optim.Adam(model.parameters(), lr=0.02)

            loaders = {
                "train": DataLoader(
                    MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()),
                    batch_size=32
                ),
                "valid": DataLoader(
                    MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()),
                    batch_size=32
                ),
            }

            class CustomRunner(dl.Runner):
                def predict_batch(self, batch):
                    # model inference step
                    return self.model(batch[0].to(self.device))

                def on_loader_start(self, runner):
                    super().on_loader_start(runner)
                    self.meters = {
                        key: metrics.AdditiveValueMetric(compute_on_call=False)
                        for key in ["loss", "accuracy01", "accuracy03"]
                    }

                def handle_batch(self, batch):
                    # model train/valid step
                    # unpack the batch
                    x, y = batch
                    # run model forward pass
                    logits = self.model(x)
                    # compute the loss
                    loss = F.cross_entropy(logits, y)
                    # compute other metrics of interest
                    accuracy01, accuracy03 = metrics.accuracy(logits, y, topk=(1, 3))
                    # log metrics
                    self.batch_metrics.update(
                        {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
                    )
                    for key in ["loss", "accuracy01", "accuracy03"]:
                        self.meters[key].update(
                            self.batch_metrics[key].item(),
                            self.batch_size
                        )
                    # run model backward pass
                    if self.is_train_loader:
                        loss.backward()
                        self.optimizer.step()
                        self.optimizer.zero_grad()

                def on_loader_end(self, runner):
                    for key in ["loss", "accuracy01", "accuracy03"]:
                        self.loader_metrics[key] = self.meters[key].compute()[0]
                    super().on_loader_end(runner)

            runner = CustomRunner()
            # model training
            runner.train(
                model=model,
                optimizer=optimizer,
                loaders=loaders,
                logdir="./logs",
                num_epochs=5,
                verbose=True,
                valid_loader="valid",
                valid_metric="loss",
                minimize_valid_metric=True,
            )
            # model inference
            for logits in runner.predict_loader(loader=loaders["valid"]):
                assert logits.detach().cpu().numpy().shape[-1] == 10
        """
        self._engine = engine or get_available_engine(fp16=fp16, ddp=ddp, amp=amp, apex=apex)

        if model is not None:
            self.model = model
        assert self.model is not None

        # if resume is not None:
        #     checkpoint = load_checkpoint(resume)
        #     unpack_checkpoint(checkpoint, model=self.model)

        self.model = self.engine.sync_device(self.model)
        maybe_recursive_call(self.model, "train", mode=False)

        set_global_seed(seed)
        for batch in loader:
            yield self.predict_batch(batch)