import torch.nn as nn import torch.optim as optim from torch.nn.parallel import DistributedDataParallel as DDP # Create model model = nn.Sequential(nn.Linear(100, 10), nn.ReLU()) # Wrap model with DDP model = DDP(model) # Define optimizer optimizer = optim.SGD(model.parameters(), lr=0.01) # Forward pass input_data = torch.randn(10, 100) output_data = model(input_data) # Backward pass output_data.backward(torch.ones_like(output_data)) # Update weights optimizer.step()
from torch.nn.parallel import DistributedDataParallel as DDP class CustomCommHook: def __init__(self, *args, **kwargs): pass def __call__(self, module, inputs, outputs): # Perform custom communication operation return None # Create model model = nn.Sequential(nn.Linear(100, 10), nn.ReLU()) # Wrap model with DDP model = DDP(model, broadcast_buffers=False, find_unused_parameters=True, bucket_cap_mb=1, communications_hooks=[CustomCommHook()]) # Forward pass input_data = torch.randn(10, 100) output_data = model(input_data)In this example, we define a custom communication hook and pass it to `DistributedDataParallel` using the `communications_hooks` parameter. We also set a few other parameters, such as disabling buffer broadcasting and enabling unused parameters detection. Overall, `torch.nn.parallel.DistributedDataParallel` is a powerful tool for parallelizing training on PyTorch models. It enables parallelization across multiple processors or machines, allowing for faster training and better scalability.