def test_all_gather(self, device, dtype): cpu_inputs = [ torch.zeros(128).uniform_().to(dtype=dtype) for i in range(nGPUs) ] expected = torch.cat(cpu_inputs, 0) inputs = [cpu_inputs[i].cuda(i) for i in range(nGPUs)] outputs = [ torch.zeros(128 * nGPUs, device=i, dtype=dtype) for i in range(nGPUs) ] nccl.all_gather(inputs, outputs) for tensor in outputs: self.assertEqual(tensor, expected) # Test with tuple. inputs = [cpu_inputs[i].cuda(i) for i in range(nGPUs)] outputs = [ torch.zeros(128 * nGPUs, device=i, dtype=dtype) for i in range(nGPUs) ] nccl.all_gather(tuple(inputs), tuple(outputs)) for tensor in outputs: self.assertEqual(tensor, expected)
def test_all_gather(self): inputs = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)] expected = torch.cat(inputs, 0) inputs = [inputs[i].cuda(i) for i in range(nGPUs)] outputs = [torch.cuda.FloatTensor(128 * nGPUs, device=i) for i in range(nGPUs)] nccl.all_gather(inputs, outputs) for tensor in outputs: self.assertEqual(tensor, expected)
def test_collective_errors(self, device): t = torch.rand(10).cuda(0) with self.assertRaisesRegex( TypeError, "Inputs should be a collection of tensors"): nccl.all_reduce(t) with self.assertRaisesRegex( TypeError, "Inputs should be a collection of tensors"): nccl.reduce(t) with self.assertRaisesRegex( TypeError, "Inputs should be a collection of tensors"): nccl.broadcast(t) with self.assertRaisesRegex( TypeError, "Inputs should be a collection of tensors"): nccl.all_gather(t, t) with self.assertRaisesRegex( TypeError, "Inputs should be a collection of tensors"): nccl.reduce_scatter(t, t)