Python ProcessGroupCCL Exemples, torch.distributed.ProcessGroupCCL Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : intel/torch-ccl

    def test_broadcast_checks(self):
        store = c10d.FileStore(self.file_name, self.world_size)
        pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)

        t1 = torch.zeros([1], dtype=torch.float32)
        t2 = torch.zeros([1], dtype=torch.float64)
        t3 = torch.zeros([2], dtype=torch.float32)

        with self.assertRaisesRegex(ValueError, "unexpected rank"):
            opts = c10d.BroadcastOptions()
            opts.rootRank = -1
            opts.rootTensor = 0
            pg.broadcast([t1], opts)

Exemple #2

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : intel/torch-ccl

 def _test_alltoall_base_equal_split_helper(self, fn):
     store = c10d.FileStore(self.file_name, self.world_size)
     pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)
     group = list(range(0, self.world_size))
     rank = self.rank
     size = len(group)
     in_tensor = fn(torch.ones([size, size]) * rank)
     expected_tensor = torch.cat([torch.ones([1, size]) * i for i in group])
     out_tensor = fn(torch.ones([size, size]) * -1)
     in_splits = []
     out_splits = []
     work = pg.alltoall_base(out_tensor, in_tensor, out_splits, in_splits)
     work.wait()
     self.assertEqual(out_tensor.cpu(), expected_tensor.cpu())

Exemple #3

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : intel/torch-ccl

    def _test_allgather_basics(self, fn):
        store = c10d.FileStore(self.file_name, self.world_size)
        pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)

        # Run with 1 input tensor per rank
        for n in range(self.world_size):
            input = [fn(torch.tensor([n * self.rank]))]
            output = [[fn(torch.tensor([-1])) for _ in range(self.world_size)]]
            expected_output = [[
                torch.tensor([i * n]) for i in range(self.world_size)
            ]]
            work = pg.allgather(output, input)
            work.wait()
            self.assertEqual(expected_output, output)

Exemple #4

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : intel/torch-ccl

 def _test_broadcast_stress(self, inputs):
     store = c10d.FileStore(self.file_name, self.world_size)
     pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)
     work_handles = [
         pg.broadcast(inputs[i], root=(i % self.world_size))
         for i in range(len(inputs))
     ]
     for i, work_handle in enumerate(work_handles):
         work_handle.wait()
         self.assertEqual(
             torch.tensor([(i * self.world_size) + (i % self.world_size)]),
             inputs[i],
             msg=("Mismatch in iteration %d" % i),
         )

Exemple #5

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : intel/torch-ccl

    def _test_allreduce_basics(self, fn):
        store = c10d.FileStore(self.file_name, self.world_size)
        pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)

        # Single input tests
        tests = simple_reduce_tests(self.rank, self.world_size)
        for (op, input, output) in tests:
            opts = c10d.AllreduceOptions()
            opts.reduceOp = op
            tensor = fn(input)
            work = pg.allreduce([tensor], opts)
            work.wait()

            # TODO(#38095): Replace assertEqualIgnoreType. See issue #38095
            self.assertEqualIgnoreType(output, tensor)

Exemple #6

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : vfdev-5/torch-ccl

 def _test_alltoall_base_unequal_split_helper(self, fn):
     store = c10d.FileStore(self.file_name, self.world_size)
     pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)
     group = list(range(0, self.world_size))
     rank = self.rank
     size = len(group)
     in_splits = [i + 1 for i in group]
     out_splits = [rank + 1 for _ in group]
     in_tensor = torch.ones([sum(in_splits), size]) * rank
     out_tensor = torch.ones([(rank + 1) * size, size])
     expected_tensor = torch.cat([torch.ones([rank + 1, size]) * i for i in group])
     work = pg.alltoall_base(
          out_tensor, in_tensor, out_splits, in_splits)
     work.wait()
     self.assertEqual(out_tensor, expected_tensor)

Exemple #7

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : vfdev-5/torch-ccl

 def _test_all_to_all_helper(self, fn):
     store = c10d.FileStore(self.file_name, self.world_size)
     pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)
     group = list(range(0, self.world_size))
     rank = self.rank
     size = len(group)
     in_splits = [i + 1 for i in group]
     in_tensors = [
         torch.ones([in_splits[i], size]) * rank for i, _ in enumerate(group)
     ]
     out_tensors = [torch.ones([(rank + 1), size]) for _ in group]
     expected_tensors = [torch.ones([rank + 1, size]) * i for i in group]
     work = pg.alltoall(out_tensors, in_tensors)
     work.wait()
     for t1, t2 in zip(out_tensors, expected_tensors):
         self.assertEqual(t1, t2)

Exemple #8

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : vfdev-5/torch-ccl

    def _test_reduce_basics(self, fn):
        store = c10d.FileStore(self.file_name, self.world_size)
        pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)
        
        for (op, input, output) in simple_reduce_tests(self.rank, self.world_size):

            for root in range(self.world_size):
                opts = c10d.ReduceOptions()
                opts.reduceOp = op
                opts.rootRank = root
                tmp = fn(input)
                work = pg.reduce([tmp], opts)
                work.wait()
                #print(op, " ", input, " " , output)
                if root == self.rank:
                    self.assertEqual(output, tmp)

Exemple #9

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : intel/torch-ccl

    def _test_broadcast_basics(self, fn):
        store = c10d.FileStore(self.file_name, self.world_size)
        pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)

        def broadcast(xs, rootRank, rootTensor):
            opts = c10d.BroadcastOptions()
            opts.rootRank = rootRank
            opts.rootTensor = rootTensor
            work = pg.broadcast(xs, opts)
            work.wait()

        # Every rank is root once
        for i in range(self.world_size):
            # Run with 1 input tensor
            x = fn(torch.tensor([self.rank]))
            broadcast([x], i, 0)
            # TODO(#38095): Replace assertEqualIgnoreType. See issue #38095
            self.assertEqualIgnoreType(torch.tensor([i]), x)

        x = fn(torch.tensor([self.rank + 1.0]))
        work = pg.broadcast(x, root=0)
        work.wait()
        self.assertEqual(torch.tensor([1.0]), x)

Exemple #10

0

Afficher le fichier

Fichier : test_c10d_ccl.py Projet : intel/torch-ccl

    def _test_gather_basics(self, fn):
        store = c10d.FileStore(self.file_name, self.world_size)
        pg = c10d.ProcessGroupCCL(store, self.rank, self.world_size)

        # Preallocate tensors for input/output
        input = [fn(torch.tensor([self.rank]))]
        outputs = [fn(torch.tensor([-1])) for _ in range(self.world_size)]

        # Take turns being the gather root and accumulate work items
        work = []
        for i in range(self.world_size):
            opts = c10d.GatherOptions()
            opts.rootRank = i
            if i == self.rank:
                work.append(pg.gather([outputs], input, opts))
            else:
                work.append(pg.gather([], input, opts))

        # Wait for work to complete
        expected = [torch.tensor([rank]) for rank in range(self.world_size)]
        for i in range(self.world_size):
            work[i].wait()
            if i == self.rank:
                self.assertEqual(expected, outputs)