def testAllreduceWithEightGPUs(self): pattern = workspace.GetGpuPeerAccessPattern() if (pattern.shape[0] >= 8 and np.all(pattern[:4, :4]) and np.all(pattern[4:, 4:])): self.RunningAllreduceWithGPUs(list(range(8)), muji.Allreduce8) else: print('Skipping allreduce with 8 gpus. Not peer access ready.')
def testAllreduceWithFourGPUsAndTwoGroups(self): pattern = workspace.GetGpuPeerAccessPattern() if pattern.shape[0] >= 4 and np.all(pattern[:2, :2]) and np.all( pattern[2:4, 2:4]): self.RunningAllreduceWithGPUs([0, 1, 2, 3], muji.Allreduce4Group2) else: print( 'Skipping allreduce with 4 gpus and 2 groups. Not peer access ready.' )
def Allreduce(net, blobs, reduced_affix="_reduced", gpu_indices=None): """The general Allreduce interface that reroutes the function calls. CPUs and AMD GPUs are not supported because GetGpuPeerAccessPattern is called to get gpu peer access pattern. """ if gpu_indices is None: gpu_indices = list(range(len(blobs))) if len(gpu_indices) != len(blobs): raise RuntimeError( "gpu_indices length and blobs length mismatch: %d vs %d" % (len(gpu_indices), len(blobs))) pattern = workspace.GetGpuPeerAccessPattern() if len(blobs) == 2 and pattern.shape[0] >= 2 and np.all(pattern[:2, :2]): return Allreduce2(net, blobs, reduced_affix, gpu_indices) elif len(blobs) == 4 and pattern.shape[0] >= 4 and np.all(pattern[:4, :4]): return Allreduce4(net, blobs, reduced_affix, gpu_indices) elif len(blobs) == 4 and pattern.shape[0] >= 4 and np.all( pattern[:2, :2]) and np.all(pattern[2:4, 2:4]): return Allreduce4Group2(net, blobs, reduced_affix, gpu_indices) elif len(blobs) == 8 and pattern.shape[0] >= 8 and np.all(pattern[:8, :8]): return Allreduce8(net, blobs, reduced_affix, gpu_indices) else: return AllreduceFallback(net, blobs, reduced_affix, gpu_indices)
def testGetGpuPeerAccessPattern(self): pattern = workspace.GetGpuPeerAccessPattern() self.assertEqual(type(pattern), np.ndarray) self.assertEqual(pattern.ndim, 2) self.assertEqual(pattern.shape[0], pattern.shape[1]) self.assertEqual(pattern.shape[0], workspace.NumGpuDevices())
def testAllreduceWithTwoGPUs(self): pattern = workspace.GetGpuPeerAccessPattern() if pattern.shape[0] >= 2 and np.all(pattern[:2, :2]): self.RunningAllreduceWithGPUs([0, 1], muji.Allreduce2) else: print('Skipping allreduce with 2 gpus. Not peer access ready.')