def main_worker(args, unknown_args): """Runs main worker thread from model training.""" args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) config.setdefault("distributed_params", {})["apex"] = args.apex config.setdefault("distributed_params", {})["amp"] = args.amp expdir = Path(args.expdir) # optuna objective def objective(trial: optuna.trial): trial, trial_config = _process_trial_config(trial, config.copy()) experiment, runner, trial_config = prepare_config_api_components( expdir=expdir, config=trial_config) # @TODO: here we need better solution. experiment._trial = trial # noqa: WPS437 if experiment.logdir is not None and get_rank() <= 0: dump_environment(trial_config, experiment.logdir, args.configs) dump_code(args.expdir, experiment.logdir) runner.run_experiment(experiment) return runner.best_valid_metrics[runner.main_metric] # optuna direction direction = ("minimize" if config.get("stages", {}).get( "stage_params", {}).get("minimize_metric", True) else "maximize") # optuna study study_params = config.pop("study_params", {}) # optuna sampler sampler_params = study_params.pop("sampler_params", {}) optuna_sampler_type = sampler_params.pop("sampler", None) optuna_sampler = (optuna.samplers.__dict__[optuna_sampler_type]( **sampler_params) if optuna_sampler_type is not None else None) # optuna pruner pruner_params = study_params.pop("pruner_params", {}) optuna_pruner_type = pruner_params.pop("pruner", None) optuna_pruner = (optuna.pruners.__dict__[optuna_pruner_type]( **pruner_params) if optuna_pruner_type is not None else None) study = optuna.create_study( direction=direction, storage=args.storage or study_params.pop("storage", None), study_name=args.study_name or study_params.pop("study_name", None), sampler=optuna_sampler, pruner=optuna_pruner, ) study.optimize( objective, n_trials=args.n_trials, timeout=args.timeout, n_jobs=args.n_jobs or 1, gc_after_trial=args.gc_after_trial, show_progress_bar=args.show_progress_bar, )
def on_epoch_start(self, runner: "IRunner"): """Event handler for epoch start. Args: runner: IRunner instance. Raises: RunnerException: if current DataLoader is empty. """ assert self.loaders is not None for loader_key, loader in self.loaders.items(): if len(loader) == 0: raise RunnerException( f"DataLoader with name {loader_key} is empty.") if not self.is_infer_stage: assert self.valid_loader in self.loaders.keys(), ( f"'{self.valid_loader}' " f"should be in provided loaders: {list(self.loaders.keys())}") else: assert not any( x.startswith(SETTINGS.loader_train_prefix) for x in self.loaders.keys() ), "for inference no train loader should be passed" set_global_seed(self.experiment.initial_seed + self.global_epoch + 1)
def on_loader_start(self, runner: "IRunner"): """Event handler for loader start. Args: runner: IRunner instance. Raises: RunnerException: if current DataLoader is empty. """ assert self.loader is not None self.loader_len = len(self.loader) if self.loader_len == 0: raise RunnerException( f"DataLoader with name {self.loader_key} is empty.") self.loader_batch_size = (self.loader.batch_sampler.batch_size if self.loader.batch_sampler is not None else self.loader.batch_size) self.loader_sample_step = 0 self.is_train_loader = self.loader_key.startswith( SETTINGS.loader_train_prefix) self.is_valid_loader = self.loader_key.startswith( SETTINGS.loader_valid_prefix) self.is_infer_loader = self.loader_key.startswith( SETTINGS.loader_infer_prefix) maybe_recursive_call(self.model, "train", mode=self.is_train_loader) if isinstance(self.loader.sampler, DistributedSampler): self.loader.sampler.set_epoch(self.epoch) set_global_seed(self.experiment.initial_seed + self.global_epoch + 1)
def _setup_callbacks(self): set_global_seed(self.seed + max(0, self.engine.rank) + self.global_epoch_step) callbacks = self.get_callbacks(self.stage_key) callbacks = filter_callbacks_by_node(callbacks) callbacks = sort_callbacks_by_order(callbacks) self.callbacks = callbacks self._check_callbacks()
def _setup_components(self) -> None: set_global_seed(self.seed + max(0, self.engine.process_index) + self.epoch_step) self.model = self._setup_model() self.criterion = self._setup_criterion() self.optimizer = self._setup_optimizer(model=self.model) self.scheduler = self._setup_scheduler(optimizer=self.optimizer) if isinstance(self.model, torch.nn.Module): self.model = self.engine.prepare(self.model) elif isinstance(self.model, dict): self.model = { k: self.engine.prepare(v) for k, v in self.model.items() } else: raise NotImplementedError() if isinstance(self.optimizer, torch.optim.Optimizer): self.optimizer = self.engine.prepare(self.optimizer) elif isinstance(self.optimizer, dict): self.optimizer = { k: self.engine.prepare(v) for k, v in self.optimizer.items() } elif self.optimizer is None: pass else: raise NotImplementedError()
def _setup_loaders(self) -> None: set_global_seed(self.seed + max(0, self.engine.process_index) + self.epoch_step) loaders = self.get_loaders() self.loaders = { key: self.engine.prepare(value) for key, value in loaders.items() }
def _setup_components(self) -> None: set_global_seed(self.seed + self.engine.rank + self.global_epoch_step) self.model, self.criterion, self.optimizer, self.scheduler = self.engine.init_components( model_fn=self._get_model, criterion_fn=self._get_criterion, optimizer_fn=self._get_optimizer, scheduler_fn=self._get_scheduler, )
def main(args, unknown_args): """Runs the ``catalyst-dl tune`` script.""" args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) # optuna objective def objective(trial: optuna.trial): trial, trial_config = _process_trial_config(trial, config.copy()) runner: ConfigRunner = get_config_runner(expdir=Path(args.expdir), config=trial_config) # @TODO: here we need better solution. runner._trial = trial # noqa: WPS437 if get_rank() <= 0: dump_environment(logdir=runner.logdir, config=config, configs_path=args.configs) dump_code(expdir=args.expdir, logdir=runner.logdir) runner.run() return trial.best_score # optuna study study_params = config.pop("study", {}) # optuna sampler sampler_params = study_params.pop("sampler", {}) optuna_sampler_type = sampler_params.pop("_target_", None) optuna_sampler = ( optuna.samplers.__dict__[optuna_sampler_type](**sampler_params) if optuna_sampler_type is not None else None ) # optuna pruner pruner_params = study_params.pop("pruner", {}) optuna_pruner_type = pruner_params.pop("_target_", None) optuna_pruner = ( optuna.pruners.__dict__[optuna_pruner_type](**pruner_params) if optuna_pruner_type is not None else None ) study = optuna.create_study( direction=args.direction or study_params.pop("direction", "minimize"), storage=args.storage or study_params.pop("storage", None), study_name=args.study_name or study_params.pop("study_name", None), sampler=optuna_sampler, pruner=optuna_pruner, **study_params, ) study.optimize( objective, n_trials=args.n_trials, timeout=args.timeout, n_jobs=args.n_jobs or 1, gc_after_trial=args.gc_after_trial, show_progress_bar=args.show_progress_bar, )
def _prepare_seed(self): seed = self._seed + random.randrange(_SEED_RANGE) set_global_seed(seed) if self.seeds is None: seed = random.randrange(_SEED_RANGE) else: seed = random.choice(self.seeds) set_global_seed(seed) return seed
def on_stage_start(self, runner: "IRunner"): """Event handler for stage start. Args: runner: IRunner instance. """ assert self.stage is not None set_global_seed(self.experiment.initial_seed + self.global_epoch + 1)
def predict_loader( self, *, loader: DataLoader, model: Model = None, resume: str = None, fp16: Union[Dict, bool] = None, initial_seed: int = 42, ) -> Generator: """ Runs model inference on PyTorch Dataloader and returns python generator with model predictions from `runner.predict_batch`. Cleans up the experiment info to avoid possible collisions. Sets `is_train_loader` and `is_valid_loader` to `False` while keeping `is_infer_loader` as True. Moves model to evaluation mode. Args: loader: loader to predict model: model to use for prediction resume: path to checkpoint to resume fp16 (Union[Dict, bool]): fp16 settings (same as in `train`) initial_seed: seed to use before prediction Yields: bathes with model predictions """ fp16 = _resolve_bool_fp16(fp16) if model is not None: self.model = model assert self.model is not None if resume is not None: checkpoint = load_checkpoint(resume) unpack_checkpoint(checkpoint, model=self.model) self.experiment = None set_global_seed(initial_seed) (model, _, _, _, device) = process_components( # noqa: WPS122 model=self.model, distributed_params=fp16, device=self.device, ) self._prepare_inner_state( stage="infer", model=model, device=device, is_train_loader=False, is_valid_loader=False, is_infer_loader=True, ) maybe_recursive_call(self.model, "train", mode=False) set_global_seed(initial_seed) for batch in loader: yield self.predict_batch(batch)
def run_sampler( *, config, logdir, algorithm_fn, environment_fn, sampler_fn, vis, infer, seed=42, id=None, resume=None, db=True, exploration_power=1.0, sync_epoch=False ): config_ = copy.deepcopy(config) id = 0 if id is None else id set_global_seed(seed + id) db_server = DATABASES.get_from_params( **config.get("db", {}), sync_epoch=sync_epoch ) if db else None env = environment_fn(**config_["environment"], visualize=vis) agent = algorithm_fn.prepare_for_sampler(env_spec=env, config=config_) exploration_params = config_["sampler"].pop("exploration_params", None) exploration_handler = ExplorationHandler(env=env, *exploration_params) \ if exploration_params is not None \ else None if exploration_handler is not None: exploration_handler.set_power(exploration_power) mode = "infer" if infer else "train" valid_seeds = config_["sampler"].pop("valid_seeds") seeds = valid_seeds if infer else None sampler = sampler_fn( agent=agent, env=env, db_server=db_server, exploration_handler=exploration_handler, **config_["sampler"], logdir=logdir, id=id, mode=mode, seeds=seeds ) if resume is not None: sampler.load_checkpoint(filepath=resume) sampler.run()
def on_experiment_start(self, runner: "IRunner"): """Event handler for experiment start. Args: runner: IRunner instance. .. note:: This event work only on IRunner. """ assert self.experiment is not None set_global_seed(self.experiment.initial_seed + self.global_epoch + 1)
def predict_loader( self, *, loader: DataLoader, model: TorchModel = None, engine: Union["Engine", str] = None, seed: int = 42, # extra info resume: str = None, # engine extra params, cpu: bool = False, fp16: bool = False, ) -> Generator: """ Runs model inference on PyTorch DataLoader and returns python generator with model predictions from `runner.predict_batch`. Args: loader: loader to predict model: model to use for prediction engine: engine to use for prediction seed: random seed to use before prediction resume: path to checkpoint for model cpu: boolean flag to force CPU usage fp16: boolean flag to use half-precision Yields: bathes with model predictions .. note:: Please follow the `minimal examples`_ sections for use cases. .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples # noqa: E501, W505 """ self.engine = engine or get_available_engine(cpu=cpu, fp16=fp16) if model is not None: self.model = model assert self.model is not None if resume is not None: self.engine.wait_for_everyone() unwrapped_model = self.engine.unwrap_model(self.model) unwrapped_model.load_state_dict(load_checkpoint(resume)) self.model = self.engine.prepare(self.model) maybe_recursive_call(self.model, "train", mode=False) loader = self.engine.prepare(loader) set_global_seed(seed) for batch in loader: yield self.predict_batch(batch)
def config_main(args, unknown_args): """Yaml config catalyst-dl run entry point.""" args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) runner: ConfigRunner = get_config_runner(expdir=args.expdir, config=config) if get_rank() <= 0: dump_environment(logdir=runner.logdir, config=config, configs_path=args.configs) dump_code(expdir=args.expdir, logdir=runner.logdir) runner.run()
def on_epoch_start(self, runner: "IRunner"): """Event handler.""" self.global_epoch_step += 1 self.stage_epoch_step += 1 self.epoch_metrics: Dict = defaultdict(None) # storage for pure epoch-based metrics, like lr/momentum self.epoch_metrics["_epoch_"] = {} assert self.loaders is not None for loader_key, loader in self.loaders.items(): if len(loader) == 0: raise RunnerException(f"DataLoader with name {loader_key} is empty.") set_global_seed(self.seed + self.engine.rank + self.global_epoch_step)
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) Experiment, Runner = import_experiment_and_runner(Path(args.expdir)) experiment = Experiment(config) runner = Runner() if experiment.logdir is not None: dump_config(config, experiment.logdir, args.configs) dump_code(args.expdir, experiment.logdir) runner.run_experiment(experiment, check=args.check)
def predict_loader( self, *, loader: DataLoader, model: Model = None, engine: Union["IEngine", str] = None, seed: int = 42, # engine extra params, fp16: bool = False, amp: bool = False, apex: bool = False, ddp: bool = False, ) -> Generator: """ Runs model inference on PyTorch DataLoader and returns python generator with model predictions from `runner.predict_batch`. Args: loader: loader to predict model: model to use for prediction engine: engine to use for prediction seed: random seed to use before prediction fp16: boolean flag to use half-precision training (AMP > APEX) amp: boolean flag to use amp half-precision apex: boolean flag to use apex half-precision ddp: if `True` will start training in distributed mode. Note: Works only with python scripts. No jupyter support. Yields: bathes with model predictions """ self._engine = engine or get_available_engine( fp16=fp16, ddp=ddp, amp=amp, apex=apex) if model is not None: self.model = model assert self.model is not None # if resume is not None: # checkpoint = load_checkpoint(resume) # unpack_checkpoint(checkpoint, model=self.model) self.model = self.engine.sync_device(self.model) maybe_recursive_call(self.model, "train", mode=False) set_global_seed(seed) for batch in loader: yield self.predict_batch(batch)
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) if args.logdir is not None: os.makedirs(args.logdir, exist_ok=True) dump_config(config, args.logdir, args.configs) if args.expdir is not None: module = import_module(expdir=args.expdir) # noqa: F841 env = ENVIRONMENTS.get_from_params(**config["environment"]) algorithm_name = config["algorithm"].pop("algorithm") if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES: ALGORITHMS = OFFPOLICY_ALGORITHMS trainer_fn = OffpolicyTrainer sync_epoch = False weights_sync_mode = "critic" if env.discrete_actions else "actor" elif algorithm_name in ONPOLICY_ALGORITHMS_NAMES: ALGORITHMS = ONPOLICY_ALGORITHMS trainer_fn = OnpolicyTrainer sync_epoch = True weights_sync_mode = "actor" else: # @TODO: add registry for algorithms, trainers, samplers raise NotImplementedError() db_server = DATABASES.get_from_params( **config.get("db", {}), sync_epoch=sync_epoch ) algorithm_fn = ALGORITHMS.get(algorithm_name) algorithm = algorithm_fn.prepare_for_trainer(env_spec=env, config=config) if args.resume is not None: algorithm.load_checkpoint(filepath=args.resume) trainer = trainer_fn( algorithm=algorithm, env_spec=env, db_server=db_server, logdir=args.logdir, weights_sync_mode=weights_sync_mode, **config["trainer"], ) trainer.run()
def main_worker(cfg: DictConfig): set_global_seed(cfg.args.seed) prepare_cudnn(cfg.args.deterministic, cfg.args.benchmark) import_module(hydra.utils.to_absolute_path(cfg.args.expdir)) experiment = hydra.utils.instantiate(cfg.experiment, cfg=cfg) runner = hydra.utils.instantiate(cfg.runner) if experiment.logdir is not None and get_rank() <= 0: dump_environment(cfg, experiment.logdir) dump_code( hydra.utils.to_absolute_path(cfg.args.expdir), experiment.logdir ) runner.run_experiment(experiment)
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) if args.logdir is not None: os.makedirs(args.logdir, exist_ok=True) dump_config(args.configs, args.logdir) if args.expdir is not None: module = import_module(expdir=args.expdir) # noqa: F841 algorithm_name = config["algorithm"].pop("algorithm") if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES: ALGORITHMS = OFFPOLICY_ALGORITHMS trainer_fn = OffpolicyTrainer sync_epoch = False else: ALGORITHMS = ONPOLICY_ALGORITHMS trainer_fn = OnpolicyTrainer sync_epoch = True db_server = DATABASES.get_from_params(**config.get("db", {}), sync_epoch=sync_epoch) env = ENVIRONMENTS.get_from_params(**config["environment"]) algorithm_fn = ALGORITHMS.get(algorithm_name) algorithm = algorithm_fn.prepare_for_trainer(env_spec=env, config=config) if args.resume is not None: algorithm.load_checkpoint(filepath=args.resume) trainer = trainer_fn( algorithm=algorithm, env_spec=env, db_server=db_server, **config["trainer"], logdir=args.logdir, ) def on_exit(): for p in trainer.get_processes(): p.terminate() atexit.register(on_exit) trainer.run()
def __init__( self, agent: Union[ActorSpec, CriticSpec], env: EnvironmentSpec, db_server: DBSpec = None, exploration_handler: ExplorationHandler = None, logdir: str = None, id: int = 0, mode: str = "infer", buffer_size: int = int(1e4), weights_sync_period: int = 1, seeds: List = None, episode_limit: int = None, force_store: bool = False, gc_period: int = 10, ): self._device = UtilsFactory.prepare_device() self._seed = 42 + id set_global_seed(self._seed) self._sampler_id = id self._infer = mode == "infer" self.seeds = seeds # logging self._prepare_logger(logdir, mode) # environment, model, exploration & action handlers self.env = env self.agent = agent self.exploration_handler = exploration_handler self.episode_index = 0 self.episode_runner = EpisodeRunner(env=self.env, agent=self.agent, device=self._device, capacity=buffer_size, deterministic=self._infer) # synchronization configuration self.db_server = db_server self.weights_sync_period = weights_sync_period self.episode_limit = episode_limit or _BIG_NUM self._force_store = force_store self._sampler_weight_mode = \ "critic" if env.discrete_actions else "actor" self._gc_period = gc_period
def main_worker(args, unknown_args): """Runs main worker thread from model training.""" args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) config.setdefault("distributed_params", {})["apex"] = args.apex config.setdefault("distributed_params", {})["amp"] = args.amp experiment, runner, config = prepare_config_api_components(expdir=Path( args.expdir), config=config) if experiment.logdir is not None and get_rank() <= 0: dump_environment(config, experiment.logdir, args.configs) dump_code(args.expdir, experiment.logdir) runner.run_experiment(experiment)
def main(args, _=None): """Run the ``catalyst-contrib image2embeddings`` script.""" global IMG_SIZE set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) IMG_SIZE = (args.img_size, args.img_size) # noqa: WPS442 if args.traced_model is not None: device = get_device() model = torch.jit.load(str(args.traced_model), map_location=device) else: model = ResnetEncoder(arch=args.arch, pooling=args.pooling) model = model.eval() model, _, _, _, device = process_components(model=model) df = pd.read_csv(args.in_csv) df = df.reset_index().drop("index", axis=1) df = list(df.to_dict("index").values()) open_fn = ImageReader(input_key=args.img_col, output_key="image", rootpath=args.rootpath) dataloader = get_loader( df, open_fn, batch_size=args.batch_size, num_workers=args.num_workers, dict_transform=dict_transformer, ) features = [] dataloader = tqdm(dataloader) if args.verbose else dataloader with torch.no_grad(): for batch in dataloader: batch_features = model(batch["image"].to(device)) batch_features = batch_features.cpu().detach().numpy() features.append(batch_features) features = np.concatenate(features, axis=0) np.save(args.out_npy, features)
def main(cfg: DictConfig): """ Hydra config catalyst-dl run entry point Args: cfg: (DictConfig) configuration """ cfg = prepare_hydra_config(cfg) set_global_seed(cfg.args.seed) prepare_cudnn(cfg.args.deterministic, cfg.args.benchmark) import_module(hydra.utils.to_absolute_path(cfg.args.expdir)) runner = hydra.utils.instantiate(cfg.runner, cfg=cfg) if get_rank() <= 0: dump_environment(logdir=runner.logdir, config=cfg) dump_code(expdir=hydra.utils.to_absolute_path(cfg.args.expdir), logdir=runner.logdir) runner.run()
def predict_loader( self, *, loader: DataLoader, model: Model = None, engine: Union["IEngine", str] = None, seed: int = 42, ) -> Generator: """ Runs model inference on PyTorch DataLoader and returns python generator with model predictions from `runner.predict_batch`. Args: loader: loader to predict model: model to use for prediction engine: engine to use for prediction seed: random seed to use before prediction Yields: bathes with model predictions """ if engine is not None: self.engine = engine if self.engine is None: self.engine = get_available_engine() if model is not None: self.model = model assert self.model is not None # if resume is not None: # checkpoint = load_checkpoint(resume) # unpack_checkpoint(checkpoint, model=self.model) self.model = self.engine.sync_device(self.model) maybe_recursive_call(self.model, "train", mode=False) set_global_seed(seed) for batch in loader: yield self.predict_batch(batch)
def on_loader_start(self, runner: "IRunner"): """Event handler.""" assert self.loader is not None self.is_train_loader: bool = self.loader_key.startswith("train") self.is_valid_loader: bool = self.loader_key.startswith("valid") self.is_infer_loader: bool = self.loader_key.startswith("infer") assert self.is_train_loader or self.is_valid_loader or self.is_infer_loader self.loader_batch_size: int = _get_batch_size(self.loader) self.loader_batch_len: int = len(self.loader) self.loader_sample_len: int = len(self.loader.dataset) self.loader_batch_step: int = 0 self.loader_sample_step: int = 0 self.loader_metrics: Dict = defaultdict(None) if self.loader_batch_len == 0: raise NotImplementedError(f"DataLoader with name {self.loader_key} is empty.") set_global_seed(self.seed + self.engine.rank + self.global_epoch_step) maybe_recursive_call(self.model, "train", mode=self.is_train_loader) if isinstance(self.loader.sampler, DistributedSampler): self.loader.sampler.set_epoch(self.stage_epoch_step) self.loader = self.engine.autocast_loader(self.loader)
def on_loader_start(self, runner: "IRunner"): """Event handler.""" assert self.loader is not None self.is_train_loader: bool = self.loader_key.startswith("train") self.is_valid_loader: bool = self.loader_key.startswith("valid") self.is_infer_loader: bool = self.loader_key.startswith("infer") assert self.is_train_loader or self.is_valid_loader or self.is_infer_loader self.loader_batch_size: int = get_loader_batch_size(self.loader) self.loader_batch_len: int = len(self.loader) self.loader_sample_len: int = get_loader_num_samples(self.loader) self.loader_batch_step: int = 0 self.loader_sample_step: int = 0 self.loader_metrics: Dict = defaultdict(None) if self.loader_batch_len == 0: raise IRunnerError( f"DataLoader with name {self.loader_key} is empty.") set_global_seed(self.seed + max(0, self.engine.process_index) + self.epoch_step) maybe_recursive_call(self.model, "train", mode=self.is_train_loader) if isinstance(self.loader.sampler, DistributedSampler): self.loader.sampler.set_epoch(self.epoch_step)
def _get_loader( dataset: Dataset, sampler: Sampler, initial_seed: int, params: DictConfig, ) -> DataLoader: params = OmegaConf.to_container(params, resolve=True) per_gpu_scaling = params.pop("per_gpu_scaling", False) params["dataset"] = dataset distributed_rank = get_rank() distributed = distributed_rank > -1 if per_gpu_scaling and not distributed: num_gpus = max(1, torch.cuda.device_count()) assert ("batch_size" in params), "loader config must contain 'batch_size' key" assert ("num_workers" in params), "loader config must contain 'num_workers' key" params["batch_size"] *= num_gpus params["num_workers"] *= num_gpus if distributed: if sampler is not None: if not isinstance(sampler, DistributedSampler): sampler = DistributedSamplerWrapper(sampler=sampler) else: sampler = DistributedSampler(dataset=params["dataset"]) params["shuffle"] = params.get("shuffle", False) and sampler is None params["sampler"] = sampler worker_init_fn = params.pop("worker_init_fn", None) if worker_init_fn is None: params["worker_init_fn"] = lambda x: set_global_seed(initial_seed + x) else: params["worker_init_fn"] = hydra.utils.get_method(worker_init_fn) collate_fn = params.pop("collate_fn", None) if collate_fn is None: params["collate_fn"] = None else: params["collate_fn"] = hydra.utils.get_method(collate_fn) loader: DataLoader = DataLoader(**params) return loader
def predict_loader( self, *, loader: DataLoader, model: Model = None, engine: Union["IEngine", str] = None, seed: int = 42, # engine extra params, fp16: bool = False, amp: bool = False, apex: bool = False, ddp: bool = False, ) -> Generator: """ Runs model inference on PyTorch DataLoader and returns python generator with model predictions from `runner.predict_batch`. Args: loader: loader to predict model: model to use for prediction engine: engine to use for prediction seed: random seed to use before prediction fp16: boolean flag to use half-precision training (AMP > APEX) amp: boolean flag to use amp half-precision apex: boolean flag to use apex half-precision ddp: if `True` will start training in distributed mode. Note: Works only with python scripts. No jupyter support. Yields: bathes with model predictions .. note:: Please follow the `minimal examples`_ sections for use cases. .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples Examples: .. code-block:: python import os from torch import nn, optim from torch.nn import functional as F from torch.utils.data import DataLoader from catalyst import dl, metrics from catalyst.data.transforms import ToTensor from catalyst.contrib.datasets import MNIST model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) optimizer = optim.Adam(model.parameters(), lr=0.02) loaders = { "train": DataLoader( MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32 ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), } class CustomRunner(dl.Runner): def predict_batch(self, batch): # model inference step return self.model(batch[0].to(self.device)) def on_loader_start(self, runner): super().on_loader_start(runner) self.meters = { key: metrics.AdditiveValueMetric(compute_on_call=False) for key in ["loss", "accuracy01", "accuracy03"] } def handle_batch(self, batch): # model train/valid step # unpack the batch x, y = batch # run model forward pass logits = self.model(x) # compute the loss loss = F.cross_entropy(logits, y) # compute other metrics of interest accuracy01, accuracy03 = metrics.accuracy(logits, y, topk=(1, 3)) # log metrics self.batch_metrics.update( {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} ) for key in ["loss", "accuracy01", "accuracy03"]: self.meters[key].update( self.batch_metrics[key].item(), self.batch_size ) # run model backward pass if self.is_train_loader: loss.backward() self.optimizer.step() self.optimizer.zero_grad() def on_loader_end(self, runner): for key in ["loss", "accuracy01", "accuracy03"]: self.loader_metrics[key] = self.meters[key].compute()[0] super().on_loader_end(runner) runner = CustomRunner() # model training runner.train( model=model, optimizer=optimizer, loaders=loaders, logdir="./logs", num_epochs=5, verbose=True, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, ) # model inference for logits in runner.predict_loader(loader=loaders["valid"]): assert logits.detach().cpu().numpy().shape[-1] == 10 """ self._engine = engine or get_available_engine(fp16=fp16, ddp=ddp, amp=amp, apex=apex) if model is not None: self.model = model assert self.model is not None # if resume is not None: # checkpoint = load_checkpoint(resume) # unpack_checkpoint(checkpoint, model=self.model) self.model = self.engine.sync_device(self.model) maybe_recursive_call(self.model, "train", mode=False) set_global_seed(seed) for batch in loader: yield self.predict_batch(batch)