Example #1
0
    def test_broadcast(self):
        expected = torch.FloatTensor(128).uniform_()
        tensors = [expected.cuda()]
        for device in range(1, torch.cuda.device_count()):
            with torch.cuda.device(device):
                tensors.append(torch.cuda.FloatTensor(128))

        nccl.broadcast(tensors)
        for i in range(torch.cuda.device_count()):
            self.assertEqual(tensors[i], expected)
Example #2
0
    def test_broadcast(self):
        expected = torch.FloatTensor(128).uniform_()
        tensors = [expected.cuda()]
        for device in range(1, torch.cuda.device_count()):
            with torch.cuda.device(device):
                tensors.append(torch.cuda.FloatTensor(128))

        nccl.broadcast(tensors)
        for i in range(torch.cuda.device_count()):
            self.assertEqual(tensors[i], expected)
Example #3
0
def broadcast(tensor, devices):
    "Broadcasts a tensor to a number of GPUs"
    if nccl.is_available([tensor]) and len(set(devices)) == len(devices):
        tensors = [tensor]
        for device in devices[1:]:
            with torch.cuda.device(device):
                tensors.append(type(tensor)(tensor.size()))
        nccl.broadcast(tensors)
        return tuple(tensors)

    # TODO: copy to a pinned buffer first (if copy is from CPU)
    return tuple(tensor.cuda(gpu, async=True) for gpu in devices)
Example #4
0
    def test_broadcast(self, device, dtype):
        expected = torch.zeros(128).uniform_().to(dtype=dtype)
        tensors = [expected.cuda()]
        for device in range(1, torch.cuda.device_count()):
            tensors.append(torch.zeros(128, dtype=dtype, device=device))

        nccl.broadcast(tensors)
        for i in range(torch.cuda.device_count()):
            self.assertEqual(tensors[i], expected)

        # Test with tuple
        tensors = [expected.cuda()]
        for device in range(1, torch.cuda.device_count()):
            tensors.append(torch.zeros(128, dtype=dtype, device=device))

        nccl.broadcast(tuple(tensors))
        for i in range(torch.cuda.device_count()):
            self.assertEqual(tensors[i], expected)
Example #5
0
    def test_collective_errors(self, device):
        t = torch.rand(10).cuda(0)
        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.all_reduce(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.reduce(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.broadcast(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.all_gather(t, t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.reduce_scatter(t, t)