Beispiel #1
0
 def all_gather(self,
                tensor: torch.Tensor,
                group: Optional[Any] = None,
                sync_grads: bool = False) -> torch.Tensor:
     """Perform a all_gather on all processes """
     return all_gather_ddp_if_available(tensor,
                                        group=group,
                                        sync_grads=sync_grads)
Beispiel #2
0
 def broadcast(self, obj: object, src: int) -> object:
     buffer = io.BytesIO()
     torch.save(obj, buffer)
     data = bytearray(buffer.getbuffer())
     data_tensor = torch.tensor(data).to(self.root_device, dtype=torch.float)
     data = all_gather_ddp_if_available(data_tensor)
     buffer = io.BytesIO(data.cpu().byte().numpy())
     obj = torch.load(buffer)
     return obj
Beispiel #3
0
 def all_gather(self, tensor: Union[torch.Tensor], group: Optional[Any] = None, sync_grads: bool = False):
     """
     Function to gather a tensor from several distributed processes
     Args:
         tensor: tensor of shape (batch, ...)
         group: the process group to gather results from. Defaults to all processes (world)
         sync_grads: flag that allows users to synchronize gradients for all_gather op
     Return:
         A tensor of shape (world_size, batch, ...)
     """
     return all_gather_ddp_if_available(tensor, group=group, sync_grads=sync_grads)