import torch import torch.nn as nn import torch.optim as optim import apex.parallel as parallel # Define the neural network module class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc1 = nn.Linear(10, 100) self.relu = nn.ReLU(inplace=True) self.fc2 = nn.Linear(100, 1) def forward(self, x): x = self.fc1(x) x = self.relu(x) x = self.fc2(x) return x # Initialize the model and optimizer net = MyNet() optimizer = optim.SGD(net.parameters(), lr=0.01) # Initialize the distributed data parallel module net, optimizer = parallel.initialize(net, optimizer) # Create a fake data set data = torch.randn(100, 10) # Loop over some number of epochs for epoch in range(10): # Zero the gradients and compute the loss optimizer.zero_grad() output = net(data) loss = output.mean() # Compute the gradients and update the weights loss.backward() optimizer.step() # Print the loss for this epoch print('Epoch {}/{}: Loss = {:.4f}'.format(epoch+1, 10, loss.item()))In this example, we define a simple neural network module (`MyNet`) and initialize it and the optimizer using `parallel.initialize`. We then create a fake data set and loop over 10 epochs, computing the loss and updating the weights each time. Note that we did not need to manually handle any device synchronization or communication - this was all handled by `DistributedDataParallel` behind the scenes.