Beispiel #1
0
    def test_dist_broadcast_coalesced(self):
        # Set up process group.
        store = c10d.TCPStore('localhost', self.port, self.is_master)
        options = c10d.ProcessGroupGloo.Options()
        options.devices = [
            c10d.ProcessGroupGloo.create_tcp_device(interface="lo")
        ]
        process_group = c10d.ProcessGroupGloo(store, self.rank,
                                              self.world_size, options)

        device = torch.device('cuda')

        target = torch.arange(10, dtype=torch.float64, device=device).chunk(5)

        if self.is_master:
            # All processes should have these tensors in the end.
            tensors = target
        else:
            # Non-master processes start with empty tensors and should be
            # filled with the tensors from the master.
            tensors = torch.zeros(10, device=device).chunk(5)

        c10d._dist_broadcast_coalesced(tensors,
                                       buffer_size=10,
                                       process_group=process_group)

        if not self.is_master:
            self.assertEqual(tensors, target)
Beispiel #2
0
def _dist_broadcast_coalesced(tensors, buffer_size=None, process_group=None):
    if process_group is None:
        process_group = dist.distributed_c10d._get_default_group()

    if buffer_size is None:
        buffer_size = BROADCAST_BUCKET_SIZE

    dist._dist_broadcast_coalesced(process_group, tensors, buffer_size, False)
Beispiel #3
0
 def _dist_broadcast_coalesced(self, tensors, buffer_size):
     dist._dist_broadcast_coalesced(self.process_group, tensors,
                                    buffer_size, False)