Esempio n. 1
0
def test_get_nvidia_gpu_stats(tmpdir):
    """Test GPU get_device_stats with Pytorch < 1.8.0."""
    current_device = torch.device(f"cuda:{torch.cuda.current_device()}")
    gpu_stats = GPUAccelerator().get_device_stats(current_device)
    fields = [
        "utilization.gpu", "memory.used", "memory.free", "utilization.memory"
    ]

    for f in fields:
        assert any(f in h for h in gpu_stats.keys())
Esempio n. 2
0
def test_get_torch_gpu_stats(tmpdir):
    current_device = torch.device(f"cuda:{torch.cuda.current_device()}")
    gpu_stats = GPUAccelerator().get_device_stats(current_device)
    fields = [
        "allocated_bytes.all.freed", "inactive_split.all.peak",
        "reserved_bytes.large_pool.peak"
    ]

    for f in fields:
        assert any(f in h for h in gpu_stats.keys())
Esempio n. 3
0
def test_get_nvidia_gpu_stats(tmpdir):
    """Test GPU get_device_stats with Pytorch < 1.8.0."""
    current_device = torch.device(f"cuda:{torch.cuda.current_device()}")
    GPUAccel = GPUAccelerator(training_type_plugin=DataParallelPlugin(
        parallel_devices=[current_device]),
                              precision_plugin=PrecisionPlugin())
    gpu_stats = GPUAccel.get_device_stats(current_device)
    fields = [
        "utilization.gpu", "memory.used", "memory.free", "utilization.memory"
    ]

    for f in fields:
        assert any(f in h for h in gpu_stats.keys())
Esempio n. 4
0
def test_get_torch_gpu_stats(tmpdir):
    """Test GPU get_device_stats with Pytorch >= 1.8.0."""
    current_device = torch.device(f"cuda:{torch.cuda.current_device()}")
    GPUAccel = GPUAccelerator(training_type_plugin=DataParallelPlugin(
        parallel_devices=[current_device]),
                              precision_plugin=PrecisionPlugin())
    gpu_stats = GPUAccel.get_device_stats(current_device)
    fields = [
        "allocated_bytes.all.freed", "inactive_split.all.peak",
        "reserved_bytes.large_pool.peak"
    ]

    for f in fields:
        assert any(f in h for h in gpu_stats.keys())
Esempio n. 5
0
def test_gpu_availability():
    assert GPUAccelerator.is_available()
Esempio n. 6
0
def test_auto_device_count(device_count_mock):
    assert CPUAccelerator.auto_device_count() == 1
    assert GPUAccelerator.auto_device_count() == 2
    assert TPUAccelerator.auto_device_count() == 8
    assert IPUAccelerator.auto_device_count() == 4
Esempio n. 7
0
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer


dataset = MNIST(os.getcwd(), download=False, transform=transforms.ToTensor())
train_loader = DataLoader(dataset)

# init model
autoencoder = LitAutoEncoder()

# most basic trainer, uses good defaults (auto-tensorboard, checkpoints, logs, and more)

parallel_devices = [torch.device(i) for i in range(torch.cuda.device_count())]
acc = GPUAccelerator(precision_plugin=NativeMixedPrecisionPlugin(),
                     training_type_plugin=DDPPlugin(
                         parallel_devices=parallel_devices,
                         cluster_environment=LSFEnvironment()))

targs = {
    'max_epochs': 1,
    'num_nodes': 2,
    'accumulate_grad_batches': 1,
    'gpus': 6,
    'accelerator': acc,
    'limit_train_batches': 10,
    'limit_val_batches': 5,
    'log_every_n_steps': 1
}

# trainer = pl.Trainer(gpus=8) (if you have GPUs)
trainer = pl.Trainer(**targs)