def test_get_nvidia_gpu_stats(tmpdir): """Test GPU get_device_stats with Pytorch < 1.8.0.""" current_device = torch.device(f"cuda:{torch.cuda.current_device()}") gpu_stats = GPUAccelerator().get_device_stats(current_device) fields = [ "utilization.gpu", "memory.used", "memory.free", "utilization.memory" ] for f in fields: assert any(f in h for h in gpu_stats.keys())
def test_get_torch_gpu_stats(tmpdir): current_device = torch.device(f"cuda:{torch.cuda.current_device()}") gpu_stats = GPUAccelerator().get_device_stats(current_device) fields = [ "allocated_bytes.all.freed", "inactive_split.all.peak", "reserved_bytes.large_pool.peak" ] for f in fields: assert any(f in h for h in gpu_stats.keys())
def test_get_nvidia_gpu_stats(tmpdir): """Test GPU get_device_stats with Pytorch < 1.8.0.""" current_device = torch.device(f"cuda:{torch.cuda.current_device()}") GPUAccel = GPUAccelerator(training_type_plugin=DataParallelPlugin( parallel_devices=[current_device]), precision_plugin=PrecisionPlugin()) gpu_stats = GPUAccel.get_device_stats(current_device) fields = [ "utilization.gpu", "memory.used", "memory.free", "utilization.memory" ] for f in fields: assert any(f in h for h in gpu_stats.keys())
def test_get_torch_gpu_stats(tmpdir): """Test GPU get_device_stats with Pytorch >= 1.8.0.""" current_device = torch.device(f"cuda:{torch.cuda.current_device()}") GPUAccel = GPUAccelerator(training_type_plugin=DataParallelPlugin( parallel_devices=[current_device]), precision_plugin=PrecisionPlugin()) gpu_stats = GPUAccel.get_device_stats(current_device) fields = [ "allocated_bytes.all.freed", "inactive_split.all.peak", "reserved_bytes.large_pool.peak" ] for f in fields: assert any(f in h for h in gpu_stats.keys())
def test_gpu_availability(): assert GPUAccelerator.is_available()
def test_auto_device_count(device_count_mock): assert CPUAccelerator.auto_device_count() == 1 assert GPUAccelerator.auto_device_count() == 2 assert TPUAccelerator.auto_device_count() == 8 assert IPUAccelerator.auto_device_count() == 4
def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=1e-3) return optimizer dataset = MNIST(os.getcwd(), download=False, transform=transforms.ToTensor()) train_loader = DataLoader(dataset) # init model autoencoder = LitAutoEncoder() # most basic trainer, uses good defaults (auto-tensorboard, checkpoints, logs, and more) parallel_devices = [torch.device(i) for i in range(torch.cuda.device_count())] acc = GPUAccelerator(precision_plugin=NativeMixedPrecisionPlugin(), training_type_plugin=DDPPlugin( parallel_devices=parallel_devices, cluster_environment=LSFEnvironment())) targs = { 'max_epochs': 1, 'num_nodes': 2, 'accumulate_grad_batches': 1, 'gpus': 6, 'accelerator': acc, 'limit_train_batches': 10, 'limit_val_batches': 5, 'log_every_n_steps': 1 } # trainer = pl.Trainer(gpus=8) (if you have GPUs) trainer = pl.Trainer(**targs)