def _test_broadcast_helper( self, group, group_id, rank, cuda=False, rank_to_GPU=None ): for ttype, value, requires_cuda in [ ("torch.FloatTensor", -1e-10, False), ("torch.DoubleTensor", -1e-100, False), ("torch.HalfTensor", -0.1, True), ("torch.CharTensor", -2, False), ("torch.ByteTensor", 129, False), ("torch.IntTensor", -1e5, False), ("torch.LongTensor", -1e15, False), ]: if requires_cuda and not cuda: continue for src in group: expected_tensor = _build_tensor(src + 1, value).type(ttype) if cuda: expected_tensor = expected_tensor.cuda(rank_to_GPU[rank][0]) if rank == src: dist.broadcast(expected_tensor, src, group_id) else: tensor = _build_tensor(src + 1, -1).type(ttype) if cuda: tensor = tensor.cuda(rank_to_GPU[rank][0]) dist.broadcast(tensor, src, group_id) self.assertEqual(tensor.size(), expected_tensor.size()) self.assertEqual(tensor.ne(expected_tensor).max(), 0) self._barrier()
def init_param(model, src, group): for param in model.parameters(): #print(param) sys.stdout.flush() dist.broadcast(param.data, src=src, group=group) #print('done') sys.stdout.flush()
def _dist_broadcast_coalesced(self, tensors, buffer_size): """ Broadcast a sequence of tensors to the default group from rank 0. Small tensors are first coalesced into a buffer to reduce the number of broadcasts. tensors (sequence): tensors to broadcast. Each tensor needs to be on the same GPU. buffer_size (int): maximum size of the buffer for coalescing """ for tensors in _take_tensors(tensors, buffer_size): flat_tensors = _flatten_dense_tensors(tensors) dist.broadcast(flat_tensors, 0) for tensor, synced in zip(tensors, _unflatten_dense_tensors(flat_tensors, tensors)): tensor.copy_(synced)
def _test_barrier_helper(self, group, group_id, rank): WAIT_TIME = 0.3 # seconds for dest in group: expected_time = torch.DoubleTensor(1).fill_(0.0) if dest == rank: expected_time.fill_(time.time() + WAIT_TIME) dist.broadcast(expected_time, dest, group_id) time.sleep(WAIT_TIME + 0.1) # sleep a little bit longer dist.barrier(group_id) else: dist.broadcast(expected_time, dest, group_id) dist.barrier(group_id) self.assertGreaterEqual(time.time(), expected_time[0]) self._barrier()
def run(): modell = model.CNN() # modell = model.AlexNet() size = dist.get_world_size() rank = dist.get_rank() group_list = [] for i in range(size): group_list.append(i) group = dist.new_group(group_list) while (1): for param in modell.parameters(): # for dst in range(1, size): # dist.send(param.data, dst=dst) dist.broadcast(param.data, src=0, group=group) for param in modell.parameters(): tensor_temp = torch.zeros_like(param.data) dist.reduce(tensor_temp, dst=0, op=dist.reduce_op.SUM, group=group) param.data = tensor_temp / (size - 1)
def init_param(model, src, group): for param in model.parameters(): dist.broadcast(param.data, src=src, group=group)
def get_new_model(model, group): for param in model.parameters(): dist.broadcast(param.data, src=0, group=group) #print(dist.get_rank()) return model
def sync_parameters(self): for param in self.module.parameters(): dist.broadcast(param.data, 0)