def manual_running_avg_acc(engine): i = engine.state.iteration - 1 true_acc_metric.reset() for j in range(idist.get_world_size()): output = ( torch.from_numpy(all_y_pred_batch_values[j, i, :, :]), torch.from_numpy(all_y_true_batch_values[j, i, :]), ) true_acc_metric.update(output) batch_acc = true_acc_metric._num_correct * 1.0 / true_acc_metric._num_examples if running_avg_acc[0] is None: running_avg_acc[0] = batch_acc else: running_avg_acc[0] = running_avg_acc[0] * alpha + ( 1.0 - alpha) * batch_acc engine.state.running_avg_acc = running_avg_acc[0]
def test_idist_methods_overhead_nccl(distributed_context_single_node_nccl): import time n = 100000 start = time.time() for _ in range(n): _ = idist.get_world_size() _ = idist.get_rank() elapsed = time.time() - start t1 = elapsed / n start = time.time() for _ in range(n): _ = dist.get_world_size() _ = idist.get_rank() elapsed = time.time() - start t2 = elapsed / n assert t2 * 3 > t1, "{} * 3 vs {}".format(t2, t1)
def _test(barrier): engine = Engine(lambda e, b: b) batch_sum = torch.tensor(0).to(device) @engine.on(Events.ITERATION_COMPLETED) @idist.one_rank_only(with_barrier=barrier) # ie rank == 0 def _(_): batch_sum.data += torch.tensor(engine.state.batch).to(device) engine.run([1, 2, 3], max_epochs=2) value_list = idist.all_gather(tensor=batch_sum) for r in range(idist.get_world_size()): if r == 0: assert value_list[r].item() == 12 else: assert value_list[r].item() == 0
def another_wrapper(self: Metric, *args: Any, **kwargs: Any) -> Callable: if not isinstance(self, Metric): raise RuntimeError( "Decorator sync_all_reduce should be used on ignite.metric.Metric class methods only" ) ws = idist.get_world_size() if len(attrs) > 0 and not self._is_reduced: if ws > 1: for attr in attrs: t = getattr(self, attr, None) if t is not None: t = idist.all_reduce(t) self._is_reduced = True setattr(self, attr, t) else: self._is_reduced = True return func(self, *args, **kwargs)
def _test_distrib_all_gather(device): res = torch.tensor(idist.all_gather(10), device=device) true_res = torch.tensor([10,] * idist.get_world_size(), device=device) assert (res == true_res).all() t = torch.tensor(idist.get_rank(), device=device) res = idist.all_gather(t) true_res = torch.tensor([i for i in range(idist.get_world_size())], device=device) assert (res == true_res).all() x = "test-test" if idist.get_rank() == 0: x = "abc" res = idist.all_gather(x) true_res = ["abc",] + ["test-test"] * (idist.get_world_size() - 1) assert res == true_res base_x = "tests/ignite/distributed/utils/test_native.py" * 2000 x = base_x if idist.get_rank() == 0: x = "abc" res = idist.all_gather(x) true_res = ["abc",] + [base_x] * (idist.get_world_size() - 1) assert res == true_res t = torch.arange(100, device=device).reshape(4, 25) * (idist.get_rank() + 1) in_dtype = t.dtype res = idist.all_gather(t) assert res.shape == (idist.get_world_size() * 4, 25) assert res.dtype == in_dtype true_res = torch.zeros(idist.get_world_size() * 4, 25, device=device) for i in range(idist.get_world_size()): true_res[i * 4 : (i + 1) * 4, ...] = torch.arange(100, device=device).reshape(4, 25) * (i + 1) assert (res == true_res).all() if idist.get_world_size() > 1: with pytest.raises(TypeError, match=r"Unhandled input type"): idist.all_reduce([0, 1, 2])
def _test_macro_distrib_integration(device): from ignite.engine import Engine rank = idist.get_rank() size = len(corpus.chunks) data = [] for c in corpus.chunks: data += idist.get_world_size() * [c] def update(_, i): return data[i + size * rank] def _test(metric_device): engine = Engine(update) m = Bleu(ngram=4, smooth="smooth2") m.attach(engine, "bleu") engine.run(data=list(range(size)), max_epochs=1) assert "bleu" in engine.state.metrics ref_bleu = 0 for candidates, references in data: with warnings.catch_warnings(): warnings.simplefilter("ignore") ref_bleu += sentence_bleu( references[0], candidates[0], weights=[0.25, 0.25, 0.25, 0.25], smoothing_function=SmoothingFunction().method2, ) assert pytest.approx( engine.state.metrics["bleu"]) == ref_bleu / len(data) _test("cpu") if device.type != "xla": _test(idist.device())
def _test(metric_device): engine = Engine(update) m = RootMeanSquaredError(device=metric_device) m.attach(engine, "rmse") data = list(range(n_iters)) engine.run(data=data, max_epochs=1) assert "rmse" in engine.state.metrics res = engine.state.metrics["rmse"] y_preds_full = [] for i in range(idist.get_world_size()): y_preds_full.append((i + 1) * torch.ones(offset)) y_preds_full = torch.stack(y_preds_full).to(device).flatten() true_res = np.sqrt(np.mean(np.square((y_true - y_preds_full).cpu().numpy()))) assert pytest.approx(res, rel=tol) == true_res
def initialize(config): model = get_model(config.model, config.model_dir, config.dropout, config.n_fc, config.num_classes) config.learning_rate *= idist.get_world_size() # Adapt model for distributed settings if configured model = idist.auto_model(model) optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) optimizer = idist.auto_optim(optimizer) loss_fn = nn.BCEWithLogitsLoss() le = config.num_iters_per_epoch milestones_values = [ (0, 0.0), (le * config.num_warmup_epochs, config.learning_rate), (le * config.max_epochs, 0.0), ] lr_scheduler = PiecewiseLinear(optimizer, param_name="lr", milestones_values=milestones_values) return model, optimizer, loss_fn, lr_scheduler
def _test_distrib_all_reduce(device): res = idist.all_reduce(10) assert res == 10 * idist.get_world_size() t = torch.tensor(10, device=device) res = idist.all_reduce(t) assert res.item() == 10 * idist.get_world_size() rank = idist.get_rank() t = torch.tensor(rank * 2.0 + 1.0, device=device) res = idist.all_reduce(t) assert res.item() == sum( [i * 2.0 + 1.0 for i in range(idist.get_world_size())]) t = torch.tensor(rank * 2.0 + 1.0, device=device) res = idist.all_reduce(t, "MIN").item() true_val = min([i * 2 + 1 for i in range(idist.get_world_size())]) assert res == true_val, f"{res} vs {true_val}" t = torch.tensor(rank * 2.0 + 1.0, device=device) res = idist.all_reduce(t, "MAX").item() true_val = max([i * 2.0 + 1.0 for i in range(idist.get_world_size())]) assert res == true_val, f"{res} vs {true_val}" t = torch.tensor(rank * 2.0 + 1.0, device=device) res = idist.all_reduce(t, "PRODUCT").item() true_val = 1 for v in [i * 2.0 + 1.0 for i in range(idist.get_world_size())]: true_val *= v assert res == true_val, f"{res} vs {true_val}" if idist.get_world_size() > 1: with pytest.raises(TypeError, match=r"Unhandled input type"): idist.all_reduce("abc") with pytest.raises(ValueError, match=r"Unsupported reduction operation"): idist.all_reduce(10, op="ABC") t = torch.tensor([0, 1, 2]) res = idist.all_reduce(t) assert res.device == t.device, f"{res.device} vs {t.device}"
def __init__( self, output_transform: Callable = lambda x: x, average: bool = False, is_multilabel: bool = False, device: Union[str, torch.device] = torch.device("cpu"), ): if idist.get_world_size() > 1: if (not average) and is_multilabel: warnings.warn( "Precision/Recall metrics do not work in distributed setting when average=False " "and is_multilabel=True. Results are not reduced across computing devices. Computed result " "corresponds to the local rank's (single process) result.", RuntimeWarning, ) self._average = average self.eps = 1e-20 super(_BasePrecisionRecall, self).__init__( output_transform=output_transform, is_multilabel=is_multilabel, device=device )
def _test_distrib_integration(device, tol=1e-6): import numpy as np from ignite.engine import Engine rank = idist.get_rank() n_iters = 100 s = 10 offset = n_iters * s y_true = torch.arange(0, offset * idist.get_world_size(), dtype=torch.float).to(device) y_preds = (rank + 1) * torch.ones(offset, dtype=torch.float).to(device) def update(engine, i): return y_preds[i * s : (i + 1) * s], y_true[i * s + offset * rank : (i + 1) * s + offset * rank] def _test(metric_device): engine = Engine(update) m = RootMeanSquaredError(device=metric_device) m.attach(engine, "rmse") data = list(range(n_iters)) engine.run(data=data, max_epochs=1) assert "rmse" in engine.state.metrics res = engine.state.metrics["rmse"] y_preds_full = [] for i in range(idist.get_world_size()): y_preds_full.append((i + 1) * torch.ones(offset)) y_preds_full = torch.stack(y_preds_full).to(device).flatten() true_res = np.sqrt(np.mean(np.square((y_true - y_preds_full).cpu().numpy()))) assert pytest.approx(res, rel=tol) == true_res _test("cpu") if device.type != "xla": _test(idist.device())
def training(local_rank, config, **kwargs): import time time.sleep(idist.get_rank() * 0.1) print(idist.get_rank(), ": run with config:", config, "- kwargs:", kwargs, f"- backend={idist.backend()}") t = torch.tensor([idist.get_rank()], device=idist.device()) t = idist.all_reduce(t) t = t.item() ws = idist.get_world_size() assert t == ws * (ws - 1) / 2, f"{t} vs {ws}" assert local_rank == idist.get_local_rank() # Test init method: if idist.model_name() == "native-dist": from ignite.distributed.utils import _model true_init_method = config.get("true_init_method", None) assert true_init_method is not None, true_init_method assert _model._init_method == true_init_method
def _test(metric_device): n_iters = 60 s = 16 offset = n_iters * s n_probabilities = 10 y = torch.rand(offset * idist.get_world_size(), n_probabilities) def update(_, i): return y[i * s + rank * offset:(i + 1) * s + rank * offset, :] engine = Engine(update) m = InceptionScore(num_features=n_probabilities, feature_extractor=torch.nn.Identity(), device=metric_device) m.attach(engine, "InceptionScore") engine.run(data=list(range(n_iters)), max_epochs=1) assert "InceptionScore" in engine.state.metrics assert pytest.approx(calculate_inception_score(y)) == m.compute()
def __init__( self, output_transform: Callable = lambda x: x, device: Optional[Union[str, torch.device]] = None, ): self._output_transform = output_transform # Check device if distributed is initialized: if idist.get_world_size() > 1: # check if reset and update methods are decorated. Compute may not be decorated if not (hasattr(self.reset, "_decorated") and hasattr(self.update, "_decorated")): warnings.warn( "{} class does not support distributed setting. Computed result is not collected " "across all computing devices".format( self.__class__.__name__), RuntimeWarning, ) self._device = device self._is_reduced = False self.reset()
def _test(data): if sampler_name is None: sampler = None elif sampler_name == "WeightedRandomSampler": sampler = WeightedRandomSampler(weights=torch.ones(100), num_samples=100) else: raise RuntimeError(f"Unknown sampler name: {sampler_name}") # Test auto_dataloader assert idist.get_world_size() == ws, f"{idist.get_world_size()} vs {ws}" shuffle = sampler is None if not isinstance(data, IterableDataset) else False dataloader = auto_dataloader( data, batch_size=batch_size, num_workers=num_workers, sampler=sampler, shuffle=shuffle ) assert isinstance(dataloader, dl_type) if hasattr(dataloader, "_loader"): dataloader = dataloader._loader if ws < batch_size: assert dataloader.batch_size == batch_size // ws else: assert dataloader.batch_size == batch_size if ws <= num_workers: assert dataloader.num_workers == (num_workers + nproc - 1) // nproc else: assert dataloader.num_workers == num_workers if isinstance(data, IterableDataset): sampler_type = _InfiniteConstantSampler elif ws > 1: sampler_type = DistributedSampler if sampler is None else DistributedProxySampler else: sampler_type = RandomSampler if sampler is None else type(sampler) assert isinstance(dataloader.sampler, sampler_type) if isinstance(dataloader, DataLoader): assert dataloader.pin_memory == ("cuda" in idist.device().type)
def _test(metric_device): engine = Engine(update) m = MeanPairwiseDistance(device=metric_device) m.attach(engine, "mpwd") data = list(range(n_iters)) engine.run(data=data, max_epochs=1) assert "mpwd" in engine.state.metrics res = engine.state.metrics["mpwd"] true_res = [] for i in range(n_iters * idist.get_world_size()): true_res.append( torch.pairwise_distance(y_true[i * s:(i + 1) * s, ...], y_preds[i * s:(i + 1) * s, ...], p=m._p, eps=m._eps).cpu().numpy()) true_res = np.array(true_res).ravel() true_res = true_res.mean() assert pytest.approx(res) == true_res
def __init__(self, logger: TrainsLogger = None, output_uri: str = None, dirname: str = None, *args, **kwargs): self._setup_check_trains(logger, output_uri) if not dirname: dirname = "" if idist.get_rank() == 0: dirname = tempfile.mkdtemp( prefix="ignite_checkpoints_{}".format(datetime.now().strftime("%Y_%m_%d_%H_%M_%S_")) ) if idist.get_world_size() > 1: dirname = idist.all_gather(dirname)[0] warnings.warn("TrainsSaver created a temporary checkpoints directory: {}".format(dirname)) idist.barrier() # Let's set non-atomic tmp dir saving behaviour if "atomic" not in kwargs: kwargs["atomic"] = False self._checkpoint_slots = defaultdict(list) super(TrainsSaver, self).__init__(dirname=dirname, *args, **kwargs)
def __init__( self, output_transform: Callable = lambda x: x, device: Union[str, torch.device] = torch.device("cpu"), ): self._output_transform = output_transform # Check device if distributed is initialized: if idist.get_world_size() > 1: # check if reset and update methods are decorated. Compute may not be decorated if not (hasattr(self.reset, "_decorated") and hasattr(self.update, "_decorated")): warnings.warn( f"{self.__class__.__name__} class does not support distributed setting. " "Computed result is not collected across all computing devices", RuntimeWarning, ) # Some metrics have a large performance regression when run on XLA devices, so for now, we disallow it. if torch.device(device).type == "xla": raise ValueError("Cannot create metric on an XLA device. Use device='cpu' instead.") self._device = torch.device(device) self._is_reduced = False self.reset()
def test_auto_dataloader_warning_distributed_sampler( distributed_context_single_node_gloo): dataset = DummyDS() rank = idist.get_rank() world_size = idist.get_world_size() auto_dataloader(dataset, sampler=DistributedSampler(dataset, num_replicas=world_size, rank=rank)) if world_size > 1: wrong_rank = (rank + 1) % world_size expected_warning = f"Found distributed sampler with rank={wrong_rank}, but process rank is {rank}" with pytest.warns(UserWarning, match=expected_warning): auto_dataloader(dataset, sampler=DistributedSampler(dataset, num_replicas=world_size, rank=wrong_rank)) expected_warning = f"Found distributed sampler with num_replicas={world_size + 1}, but world size is {world_size}" with pytest.warns(UserWarning, match=expected_warning): auto_dataloader(dataset, sampler=DistributedSampler(dataset, num_replicas=world_size + 1, rank=rank))
def test_no_distrib(capsys): from ignite.distributed.utils import _model print("test_no_distrib : dist: ", dist.is_available()) print("test_no_distrib : _model", type(_model)) assert idist.backend() is None if torch.cuda.is_available(): assert idist.device().type == "cuda" else: assert idist.device().type == "cpu" assert idist.get_rank() == 0 assert idist.get_world_size() == 1 assert idist.get_local_rank() == 0 assert idist.model_name() == "serial" from ignite.distributed.utils import _model, _SerialModel _sanity_check() assert isinstance(_model, _SerialModel) idist.show_config() captured = capsys.readouterr() out = captured.err.split("\r") out = list(map(lambda x: x.strip(), out)) out = list(filter(None, out)) assert "ignite.distributed.utils INFO: distributed configuration: serial" in out[ -1] assert "ignite.distributed.utils INFO: backend: None" in out[-1] if torch.cuda.is_available(): assert "ignite.distributed.utils INFO: device: cuda" in out[-1] else: assert "ignite.distributed.utils INFO: device: cpu" in out[-1] assert "ignite.distributed.utils INFO: rank: 0" in out[-1] assert "ignite.distributed.utils INFO: local rank: 0" in out[-1] assert "ignite.distributed.utils INFO: world size: 1" in out[-1]
def _test_auto_dataloader(ws, nproc, batch_size, num_workers=1, sampler_name=None, dl_type=DataLoader): data = torch.rand(100, 3, 12, 12) if sampler_name is None: sampler = None elif sampler_name == "WeightedRandomSampler": sampler = WeightedRandomSampler(weights=torch.ones(100), num_samples=100) else: raise RuntimeError("Unknown sampler name: {}".format(sampler_name)) # Test auto_dataloader assert idist.get_world_size() == ws dataloader = auto_dataloader( data, batch_size=batch_size, num_workers=num_workers, sampler=sampler, shuffle=sampler is None ) assert isinstance(dataloader, dl_type) if hasattr(dataloader, "_loader"): dataloader = dataloader._loader if ws < batch_size: assert dataloader.batch_size == batch_size // ws else: assert dataloader.batch_size == batch_size if ws <= num_workers: assert dataloader.num_workers == (num_workers + nproc - 1) // nproc else: assert dataloader.num_workers == num_workers if ws < 2: sampler_type = RandomSampler if sampler is None else type(sampler) assert isinstance(dataloader.sampler, sampler_type) else: sampler_type = DistributedSampler if sampler is None else DistributedProxySampler assert isinstance(dataloader.sampler, sampler_type) if isinstance(dataloader, DataLoader): assert dataloader.pin_memory == ("cuda" in idist.device().type)
def _test_distrib_config(local_rank, backend, ws, true_device, rank=None, true_init_method=None): assert idist.backend() == backend, f"{idist.backend()} vs {backend}" this_device = idist.device() assert isinstance(this_device, torch.device) if backend in ("nccl", "horovod") and "cuda" in this_device.type: true_device = torch.device(f"{true_device}:{local_rank}") assert this_device == true_device, f"{this_device} vs {true_device}" elif backend in ("gloo", "horovod"): assert this_device == torch.device(true_device) elif backend == "xla-tpu": assert true_device in this_device.type if rank is None: if idist.model_name() == "native-dist": rank = dist.get_rank() if rank is not None: assert idist.get_rank() == rank assert idist.get_world_size() == ws assert idist.get_local_rank() == local_rank assert idist.model_name() in ("native-dist", "xla-dist", "horovod-dist") _sanity_check() if idist.model_name() == "native-dist": from ignite.distributed.utils import _model if true_init_method is not None: assert _model._init_method == true_init_method
def __init__( self, logger: Optional[ClearMLLogger] = None, output_uri: Optional[str] = None, dirname: Optional[str] = None, *args: Any, **kwargs: Any, ): self._setup_check_clearml(logger, output_uri) if not dirname: dirname = "" if idist.get_rank() == 0: dirname = tempfile.mkdtemp( prefix= f"ignite_checkpoints_{datetime.now().strftime('%Y_%m_%d_%H_%M_%S_')}" ) if idist.get_world_size() > 1: dirname = idist.all_gather(dirname)[ 0] # type: ignore[index, assignment] warnings.warn( f"ClearMLSaver created a temporary checkpoints directory: {dirname}" ) idist.barrier() # Let's set non-atomic tmp dir saving behaviour if "atomic" not in kwargs: kwargs["atomic"] = False self._checkpoint_slots = defaultdict( list) # type: DefaultDict[Union[str, Tuple[str, str]], List[Any]] super(ClearMLSaver, self).__init__(dirname=dirname, *args, **kwargs) # type: ignore[misc]
def log_basic_info(logger, config): logger.info(f"Train {config['model']} on CIFAR10") logger.info(f"- PyTorch version: {torch.__version__}") logger.info(f"- Ignite version: {ignite.__version__}") if torch.cuda.is_available(): # explicitly import cudnn as # torch.backends.cudnn can not be pickled with hvd spawning procs from torch.backends import cudnn logger.info(f"- GPU Device: {torch.cuda.get_device_name(idist.get_local_rank())}") logger.info(f"- CUDA version: {torch.version.cuda}") logger.info(f"- CUDNN version: {cudnn.version()}") logger.info("\n") logger.info("Configuration:") for key, value in config.items(): logger.info(f"\t{key}: {value}") logger.info("\n") if idist.get_world_size() > 1: logger.info("\nDistributed setting:") logger.info(f"\tbackend: {idist.backend()}") logger.info(f"\tworld size: {idist.get_world_size()}") logger.info("\n")
def _test_frequency_with_engine(workers=None, lower_bound_factor=0.8, every=1): if workers is None: workers = idist.get_world_size() artificial_time = 1.0 / workers # seconds total_tokens = 400 // workers batch_size = 128 // workers estimated_wps = batch_size * workers / artificial_time def update_fn(engine, batch): time.sleep(artificial_time) return {"ntokens": len(batch)} engine = Engine(update_fn) wps_metric = Frequency(output_transform=lambda x: x["ntokens"]) event = Events.ITERATION_COMPLETED(every=every) wps_metric.attach(engine, "wps", event_name=event) @engine.on(event) def assert_wps(e): wps = e.state.metrics["wps"] # Skip iterations 2, 3, 4 if backend is Horovod on CUDA, # wps is abnormally low for these iterations # otherwise, other values of wps are OK if idist.model_name() == "horovod-dist" and e.state.iteration in (2, 3, 4): return assert ( estimated_wps * lower_bound_factor < wps <= estimated_wps ), f"{e.state.iteration}: {estimated_wps * lower_bound_factor} < {wps} < {estimated_wps}" data = [[i] * batch_size for i in range(0, total_tokens, batch_size)] max_epochs = 1 if idist.model_name() != "horovod-dist" else 2 engine.run(data, max_epochs=2)
def setup_common_training_handlers( trainer: Engine, train_sampler: Optional[DistributedSampler] = None, to_save: Optional[Mapping] = None, save_every_iters: int = 1000, output_path: Optional[str] = None, lr_scheduler: Optional[Union[ParamScheduler, _LRScheduler]] = None, with_gpu_stats: bool = False, output_names: Optional[Iterable[str]] = None, with_pbars: bool = True, with_pbar_on_iters: bool = True, log_every_iters: int = 100, stop_on_nan: bool = True, clear_cuda_cache: bool = True, save_handler: Optional[Union[Callable, BaseSaveHandler]] = None, **kwargs: Any, ) -> None: """Helper method to setup trainer with common handlers (it also supports distributed configuration): - :class:`~ignite.handlers.TerminateOnNan` - handler to setup learning rate scheduling - :class:`~ignite.handlers.ModelCheckpoint` - :class:`~ignite.metrics.RunningAverage` on `update_function` output - Two progress bars on epochs and optionally on iterations Args: trainer (Engine): trainer engine. Output of trainer's `update_function` should be a dictionary or sequence or a single tensor. train_sampler (torch.utils.data.DistributedSampler, optional): Optional distributed sampler used to call `set_epoch` method on epoch started event. to_save (dict, optional): dictionary with objects to save in the checkpoint. This argument is passed to :class:`~ignite.handlers.Checkpoint` instance. save_every_iters (int, optional): saving interval. By default, `to_save` objects are stored each 1000 iterations. output_path (str, optional): output path to indicate where `to_save` objects are stored. Argument is mutually exclusive with ``save_handler``. lr_scheduler (ParamScheduler or subclass of `torch.optim.lr_scheduler._LRScheduler`): learning rate scheduler as native torch LRScheduler or ignite's parameter scheduler. with_gpu_stats (bool, optional): if True, :class:`~ignite.contrib.metrics.handlers.GpuInfo` is attached to the trainer. This requires `pynvml` package to be installed. output_names (list/tuple, optional): list of names associated with `update_function` output dictionary. with_pbars (bool, optional): if True, two progress bars on epochs and optionally on iterations are attached. Default, True. with_pbar_on_iters (bool, optional): if True, a progress bar on iterations is attached to the trainer. Default, True. log_every_iters (int, optional): logging interval for :class:`~ignite.contrib.metrics.handlers.GpuInfo` and for epoch-wise progress bar. Default, 100. stop_on_nan (bool, optional): if True, :class:`~ignite.handlers.TerminateOnNan` handler is added to the trainer. Default, True. clear_cuda_cache (bool, optional): if True, `torch.cuda.empty_cache()` is called every end of epoch. Default, True. save_handler (callable or :class:`~ignite.handlers.checkpoint.BaseSaveHandler`, optional): Method or callable class to use to store ``to_save``. See :class:`~ignite.handlers.checkpoint.Checkpoint` for more details. Argument is mutually exclusive with ``output_path``. **kwargs: optional keyword args to be passed to construct :class:`~ignite.handlers.checkpoint.Checkpoint`. """ if idist.get_world_size() > 1: _setup_common_distrib_training_handlers( trainer, train_sampler=train_sampler, to_save=to_save, save_every_iters=save_every_iters, output_path=output_path, lr_scheduler=lr_scheduler, with_gpu_stats=with_gpu_stats, output_names=output_names, with_pbars=with_pbars, with_pbar_on_iters=with_pbar_on_iters, log_every_iters=log_every_iters, stop_on_nan=stop_on_nan, clear_cuda_cache=clear_cuda_cache, save_handler=save_handler, **kwargs, ) else: if train_sampler is not None and isinstance(train_sampler, DistributedSampler): warnings.warn( "Argument train_sampler is a distributed sampler," " but either there is no distributed setting or world size is < 2. " "Train sampler argument will be ignored", UserWarning, ) _setup_common_training_handlers( trainer, to_save=to_save, save_every_iters=save_every_iters, output_path=output_path, lr_scheduler=lr_scheduler, with_gpu_stats=with_gpu_stats, output_names=output_names, with_pbars=with_pbars, with_pbar_on_iters=with_pbar_on_iters, log_every_iters=log_every_iters, stop_on_nan=stop_on_nan, clear_cuda_cache=clear_cuda_cache, save_handler=save_handler, **kwargs, )
def _get_output_value(self) -> Union[torch.Tensor, float]: # we need to compute average instead of sum produced by @sync_all_reduce("src") output = cast(Union[torch.Tensor, float], self.src) / idist.get_world_size() return output
def _test(metric_device): metric_device = torch.device(metric_device) acc = Accuracy(is_multilabel=True, device=metric_device) torch.manual_seed(10 + rank) y_pred = torch.randint(0, 2, size=(4, 5, 8, 10), device=device).long() y = torch.randint(0, 2, size=(4, 5, 8, 10), device=device).long() acc.update((y_pred, y)) assert ( acc._num_correct.device == metric_device ), f"{type(acc._num_correct.device)}:{acc._num_correct.device} vs {type(metric_device)}:{metric_device}" # gather y_pred, y y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) np_y_pred = to_numpy_multilabel(y_pred.cpu()) # (N, C, H, W, ...) -> (N * H * W ..., C) np_y = to_numpy_multilabel(y.cpu()) # (N, C, H, W, ...) -> (N * H * W ..., C) assert acc._type == "multilabel" n = acc._num_examples res = acc.compute() assert n * idist.get_world_size() == acc._num_examples assert isinstance(res, float) assert accuracy_score(np_y, np_y_pred) == pytest.approx(res) acc.reset() torch.manual_seed(10 + rank) y_pred = torch.randint(0, 2, size=(4, 7, 10, 8), device=device).long() y = torch.randint(0, 2, size=(4, 7, 10, 8), device=device).long() acc.update((y_pred, y)) assert ( acc._num_correct.device == metric_device ), f"{type(acc._num_correct.device)}:{acc._num_correct.device} vs {type(metric_device)}:{metric_device}" # gather y_pred, y y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) np_y_pred = to_numpy_multilabel(y_pred.cpu()) # (N, C, H, W, ...) -> (N * H * W ..., C) np_y = to_numpy_multilabel(y.cpu()) # (N, C, H, W, ...) -> (N * H * W ..., C) assert acc._type == "multilabel" n = acc._num_examples res = acc.compute() assert n * idist.get_world_size() == acc._num_examples assert isinstance(res, float) assert accuracy_score(np_y, np_y_pred) == pytest.approx(res) # check that result is not changed res = acc.compute() assert n * idist.get_world_size() == acc._num_examples assert isinstance(res, float) assert accuracy_score(np_y, np_y_pred) == pytest.approx(res) # Batched Updates acc.reset() torch.manual_seed(10 + rank) y_pred = torch.randint(0, 2, size=(80, 5, 8, 10), device=device).long() y = torch.randint(0, 2, size=(80, 5, 8, 10), device=device).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size acc.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) assert ( acc._num_correct.device == metric_device ), f"{type(acc._num_correct.device)}:{acc._num_correct.device} vs {type(metric_device)}:{metric_device}" # gather y_pred, y y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) np_y_pred = to_numpy_multilabel(y_pred.cpu()) # (N, C, L, ...) -> (N * L * ..., C) np_y = to_numpy_multilabel(y.cpu()) # (N, C, L, ...) -> (N * L ..., C) assert acc._type == "multilabel" n = acc._num_examples res = acc.compute() assert n * idist.get_world_size() == acc._num_examples assert isinstance(res, float) assert accuracy_score(np_y, np_y_pred) == pytest.approx(res)
def _test_distrib_integration_multilabel(device): from ignite.engine import Engine rank = idist.get_rank() torch.manual_seed(12) def _test(average, n_epochs): n_iters = 60 s = 16 n_classes = 7 offset = n_iters * s y_true = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) y_preds = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset : (i + 1) * s + rank * offset, ...], y_true[i * s + rank * offset : (i + 1) * s + rank * offset, ...], ) engine = Engine(update) pr = Precision(average=average, is_multilabel=True) pr.attach(engine, "pr") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "pr" in engine.state.metrics res = engine.state.metrics["pr"] res2 = pr.compute() if isinstance(res, torch.Tensor): res = res.cpu().numpy() res2 = res2.cpu().numpy() assert (res == res2).all() else: assert res == res2 with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) true_res = precision_score( to_numpy_multilabel(y_true), to_numpy_multilabel(y_preds), average="samples" if average else None ) assert pytest.approx(res) == true_res for _ in range(2): _test(average=True, n_epochs=1) _test(average=True, n_epochs=2) if idist.get_world_size() > 1: with pytest.warns( RuntimeWarning, match="Precision/Recall metrics do not work in distributed setting when " "average=False and is_multilabel=True", ): pr = Precision(average=False, is_multilabel=True) y_pred = torch.randint(0, 2, size=(4, 3, 6, 8)) y = torch.randint(0, 2, size=(4, 3, 6, 8)).long() pr.update((y_pred, y)) pr_compute1 = pr.compute() pr_compute2 = pr.compute() assert len(pr_compute1) == 4 * 6 * 8 assert (pr_compute1 == pr_compute2).all()
def _test(metric_device): data = list(range(n_iters)) np.random.seed(12) all_y_true_batch_values = np.random.randint( 0, n_classes, size=(idist.get_world_size(), n_epochs * n_iters, batch_size)) all_y_pred_batch_values = np.random.rand(idist.get_world_size(), n_epochs * n_iters, batch_size, n_classes) y_true_batch_values = iter(all_y_true_batch_values[rank, ...]) y_pred_batch_values = iter(all_y_pred_batch_values[rank, ...]) def update_fn(engine, batch): y_true_batch = next(y_true_batch_values) y_pred_batch = next(y_pred_batch_values) return torch.from_numpy(y_pred_batch), torch.from_numpy( y_true_batch) trainer = Engine(update_fn) alpha = 0.98 acc_metric = RunningAverage(Accuracy( output_transform=lambda x: [x[0], x[1]], device=metric_device), alpha=alpha, epoch_bound=False) acc_metric.attach(trainer, "running_avg_accuracy") running_avg_acc = [ None, ] true_acc_metric = Accuracy(device=metric_device) @trainer.on(Events.ITERATION_COMPLETED) def manual_running_avg_acc(engine): i = engine.state.iteration - 1 true_acc_metric.reset() for j in range(idist.get_world_size()): output = ( torch.from_numpy(all_y_pred_batch_values[j, i, :, :]), torch.from_numpy(all_y_true_batch_values[j, i, :]), ) true_acc_metric.update(output) batch_acc = true_acc_metric._num_correct.item( ) * 1.0 / true_acc_metric._num_examples if running_avg_acc[0] is None: running_avg_acc[0] = batch_acc else: running_avg_acc[0] = running_avg_acc[0] * alpha + ( 1.0 - alpha) * batch_acc engine.state.running_avg_acc = running_avg_acc[0] @trainer.on(Events.ITERATION_COMPLETED) def assert_equal_running_avg_acc_values(engine): assert ( engine.state.running_avg_acc == engine.state.metrics["running_avg_accuracy"] ), f"{engine.state.running_avg_acc} vs {engine.state.metrics['running_avg_accuracy']}" trainer.run(data, max_epochs=3)