def _test_auto_model_optimizer(ws, device): # Test auto_model model = nn.Linear(10, 10) _test_auto_model(model, ws, device) model = nn.Sequential(nn.Linear(20, 100), nn.BatchNorm1d(100)) _test_auto_model(model, ws, device, sync_bn="cuda" in torch.device(device).type) if ws > 1: _test_auto_model(model, ws, device, find_unused_parameters=True) _test_auto_model(model, ws, device, find_unused_parameters=False) # Test auto_optim bnd = idist.backend() optimizer = optim.SGD(model.parameters(), lr=0.01) optimizer = auto_optim(optimizer) if idist.has_xla_support and "xla" in device: assert isinstance(optimizer, optim.SGD) and hasattr(optimizer, "wrapped_optimizer") elif idist.has_hvd_support and bnd in ("horovod",): assert isinstance(optimizer, optim.SGD) and hasattr(optimizer, "_allreduce_grad_async") else: assert isinstance(optimizer, optim.SGD) and not hasattr(optimizer, "wrapped_optimizer") if idist.has_hvd_support and bnd in ("horovod",): backward_passes_per_step = 2 optimizer = optim.SGD(model.parameters(), lr=0.01) optimizer = auto_optim(optimizer, backward_passes_per_step=backward_passes_per_step) assert isinstance(optimizer, optim.SGD) and hasattr(optimizer, "backward_passes_per_step") assert optimizer.backward_passes_per_step == backward_passes_per_step
def _test_auto_model_optimizer(ws, device): # Test auto_model model = nn.Linear(10, 10) model = auto_model(model) bnd = idist.backend() if ws > 1 and device in ("cuda", "cpu"): if idist.has_native_dist_support and bnd in ("nccl" or "gloo"): assert isinstance(model, nn.parallel.DistributedDataParallel) elif idist.has_hvd_support and bnd in ("horovod", ): assert isinstance(model, nn.Module) elif device != "cpu" and torch.cuda.is_available( ) and torch.cuda.device_count() > 1: assert isinstance(model, nn.parallel.DataParallel) else: assert isinstance(model, nn.Module) assert all([p.device.type == device for p in model.parameters()]), "{} vs {}".format( [p.device.type for p in model.parameters()], device) # Test auto_optim optimizer = optim.SGD(model.parameters(), lr=0.01) optimizer = auto_optim(optimizer) if idist.has_xla_support and "xla" in device: assert isinstance(optimizer, optim.SGD) and hasattr( optimizer, "wrapped_optimizer") elif idist.has_hvd_support and bnd in ("horovod", ): assert isinstance(optimizer, optim.SGD) and hasattr( optimizer, "_allreduce_grad_async") else: assert isinstance(optimizer, optim.SGD) and not hasattr( optimizer, "wrapped_optimizer")
def _test_auto_model_optimizer(ws, device): # Test auto_model model = nn.Linear(10, 10) model = auto_model(model) if ws > 1: assert isinstance(model, nn.parallel.DistributedDataParallel) elif device != "cpu" and torch.cuda.is_available( ) and torch.cuda.device_count() > 1: assert isinstance(model, nn.parallel.DataParallel) else: assert isinstance(model, nn.Module) assert all([p.device.type == device for p in model.parameters()]), "{} vs {}".format( [p.device.type for p in model.parameters()], device) # Test auto_optim optimizer = optim.SGD(model.parameters(), lr=0.01) optimizer = auto_optim(optimizer) if "xla" in device: assert isinstance(optimizer, optim.SGD) and hasattr( optimizer, "wrapped_optimizer") else: assert isinstance(optimizer, optim.SGD) and not hasattr( optimizer, "wrapped_optimizer")
def _test_auto_model_optimizer(ws, device): # Test auto_model model = nn.Linear(10, 10) _test_auto_model(model, ws, device) model = nn.Sequential(nn.Linear(20, 100), nn.BatchNorm1d(100)) _test_auto_model(model, ws, device, sync_bn="cuda" in device) # Test auto_optim bnd = idist.backend() optimizer = optim.SGD(model.parameters(), lr=0.01) optimizer = auto_optim(optimizer) if idist.has_xla_support and "xla" in device: assert isinstance(optimizer, optim.SGD) and hasattr( optimizer, "wrapped_optimizer") elif idist.has_hvd_support and bnd in ("horovod", ): assert isinstance(optimizer, optim.SGD) and hasattr( optimizer, "_allreduce_grad_async") else: assert isinstance(optimizer, optim.SGD) and not hasattr( optimizer, "wrapped_optimizer")