Ejemplo n.º 1
0
    def test_all_gather(self, device, dtype):
        cpu_inputs = [
            torch.zeros(128).uniform_().to(dtype=dtype) for i in range(nGPUs)
        ]
        expected = torch.cat(cpu_inputs, 0)

        inputs = [cpu_inputs[i].cuda(i) for i in range(nGPUs)]
        outputs = [
            torch.zeros(128 * nGPUs, device=i, dtype=dtype)
            for i in range(nGPUs)
        ]
        nccl.all_gather(inputs, outputs)

        for tensor in outputs:
            self.assertEqual(tensor, expected)

        # Test with tuple.
        inputs = [cpu_inputs[i].cuda(i) for i in range(nGPUs)]
        outputs = [
            torch.zeros(128 * nGPUs, device=i, dtype=dtype)
            for i in range(nGPUs)
        ]
        nccl.all_gather(tuple(inputs), tuple(outputs))

        for tensor in outputs:
            self.assertEqual(tensor, expected)
Ejemplo n.º 2
0
    def test_all_gather(self):
        inputs = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
        expected = torch.cat(inputs, 0)

        inputs = [inputs[i].cuda(i) for i in range(nGPUs)]
        outputs = [torch.cuda.FloatTensor(128 * nGPUs, device=i)
                   for i in range(nGPUs)]
        nccl.all_gather(inputs, outputs)

        for tensor in outputs:
            self.assertEqual(tensor, expected)
Ejemplo n.º 3
0
    def test_all_gather(self):
        inputs = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
        expected = torch.cat(inputs, 0)

        inputs = [inputs[i].cuda(i) for i in range(nGPUs)]
        outputs = [torch.cuda.FloatTensor(128 * nGPUs, device=i)
                   for i in range(nGPUs)]
        nccl.all_gather(inputs, outputs)

        for tensor in outputs:
            self.assertEqual(tensor, expected)
Ejemplo n.º 4
0
    def test_collective_errors(self, device):
        t = torch.rand(10).cuda(0)
        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.all_reduce(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.reduce(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.broadcast(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.all_gather(t, t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.reduce_scatter(t, t)