Esempi in Python per scatter, esempi in Python per torch.distributed.scatter

Esempio n. 1

0

Mostra file

File: Flex_demo.py Progetto: Steamgjk/Hove

def bp_recv_proc(conv_wid, conv_wn, fc_wid, fc_wn, wid, wn, pred_wid, succ_wid,
                 comm_rank, world_sz, bs, subbs, pd, input_shp, output_shp,
                 bp_tail_list, shared_cnters, global_step, sta_lidx, end_lidx):
    #fp_send:0; fp_recv:1; bp_send:2; bp_recv:3
    iter_thresh = int(bs / subbs)
    allreduce_group, fp_gather_group, bp_scatter_group = init_processes(
        comm_rank, world_sz)
    print("bp_recv_proc comm_rank=", comm_rank)
    if wid == wn - 1:
        shared_cnters[3] = iter_thresh
        return
    src_rank = succ_wid * 4 + 2
    while True:
        if shared_cnters[3] < iter_thresh:
            if wid == 2:
                dist.recv(tensor=bp_tail_list[shared_cnters[3]], src=src_rank)
            elif wid == 0 or wid == 1:
                dist.scatter(tensor=bp_tail_list[shared_cnters[3]],
                             scatter_list=[],
                             src=src_rank,
                             group=bp_scatter_group,
                             async_op=False)
            shared_cnters[3] += 1
            #print("wid=",wid, " bp_recv")
        else:
            time.sleep(0.001)

Esempio n. 2

0

Mostra file

File: part2a.py Progetto: PhanTask/CS744-Big-Data-System

def train_model(model, train_loader, optimizer, criterion, epoch, rank):
    """
    model (torch.nn.module): The model created to train
    train_loader (pytorch data loader): Training data loader
    optimizer (optimizer.*): A instance of some sort of optimizer, usually SGD
    criterion (nn.CrossEntropyLoss) : Loss function used to train the network
    epoch (int): Current epoch number
    """

    group = dist.new_group([0, 1, 2, 3])

    # remember to exit the train loop at end of the epoch
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # Your code goes here!
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        train_loss = criterion(output, target)
        train_loss.backward()
        for p in model.parameters():
            dist.gather(p.grad, group=group, async_op=False)
            dist.scatter(p.grad, group=group, src=0, async_op=False)
        optimizer.step()
        if batch_idx % 20 == 0:
            print(batch_idx, "loss: ", train_loss.item())
            now = datetime.now()
        if batch_idx == 10:
            later = datetime.now()
            print("average time: ", (later - now).total_seconds() / 9)

Esempio n. 3

0

Mostra file

    def _test_scatter_helper(self, group, group_id, rank):
        for dest in group:
            tensor = _build_tensor(dest + 1, -1)
            expected_tensor = _build_tensor(dest + 1, rank)
            tensors = [_build_tensor(dest + 1, i) for i in group] if rank == dest else []
            dist.scatter(tensor, src=dest, scatter_list=tensors, group=group_id)
            self.assertEqual(tensor, expected_tensor)

        self._barrier()

Esempio n. 4

0

Mostra file

File: distributed_retriever.py Progetto: kmeng01/custom-transformers

 def _scattered(self,
                scatter_list,
                target_shape,
                target_type=torch.float32):
     target_tensor = torch.empty(target_shape, dtype=target_type)
     dist.scatter(target_tensor,
                  src=0,
                  scatter_list=scatter_list,
                  group=self.process_group)
     return target_tensor

Esempio n. 5

0

Mostra file

File: functional.py Progetto: tongxin/pytorch

 def forward(ctx, src, group, *tensors):
     ctx.src = src
     ctx.group = group
     assert all(t.size() == tensors[0].size() for t in tensors)
     output = torch.zeros_like(tensors[0])
     if dist.get_rank(group=group) == src:
         dist.scatter(output, list(tensors), src, group=group)
     else:
         dist.scatter(output, None, src, group=group)
     return output

Esempio n. 6

0

Mostra file

def distribute_samples(nodes, rank, dataset, eta, epochs):
    """
    The master node (rank 0) randomly chooses and transmits samples indices to each device for training.
    Upon reception of their assigned samples, the nodes create their training dataset
    """

    if rank == 0:
        inpi = tables.open_file(dataset).root.train.data.shape[0]

        print(inpi)
        n_samples = tables.open_file(dataset).root.train.data.shape[0]  # Total number of samples
        n_samples_train_per_class = int(n_samples / 2 * 0.9)  # There are 2 classes and 10% of the dataset is kept for testing

        # Indices corresponding to each class
        indices_0 = np.asarray(torch.max(torch.sum(torch.FloatTensor(tables.open_file(dataset).root.train.label[:]), dim=-1), dim=-1).indices == 0).nonzero()[0][:n_samples_train_per_class]
        indices_1 = np.asarray(torch.max(torch.sum(torch.FloatTensor(tables.open_file(dataset).root.train.label[:]), dim=-1), dim=-1).indices == 1).nonzero()[0][:n_samples_train_per_class]

        assert len(indices_0) == len(indices_1)
        n_main_class = math.floor(epochs * eta)
        n_secondary_class = epochs - n_main_class
        assert (n_main_class + n_secondary_class) == epochs

        # Randomly select samples for each worker
        indices_worker_0 = np.hstack((np.random.choice(indices_0, [n_main_class], replace=False), np.random.choice(indices_1, [n_secondary_class], replace=False)))
        np.random.shuffle(indices_worker_0)
        remaining_indices_0 = [i for i in indices_0 if i not in indices_worker_0]
        remaining_indices_1 = [i for i in indices_1 if i not in indices_worker_0]
        indices_worker_1 = np.hstack((np.random.choice(remaining_indices_0, [n_secondary_class], replace=False), np.random.choice(remaining_indices_1, [n_main_class], replace=False)))
        np.random.shuffle(indices_worker_1)

        assert len(indices_worker_0) == len(indices_worker_1)

        # Send samples to the workers
        indices = [torch.zeros([epochs], dtype=torch.int), torch.IntTensor(indices_worker_0), torch.IntTensor(indices_worker_1)]
        indices_local = torch.zeros([epochs], dtype=torch.int)
        dist.scatter(tensor=indices_local, src=0, scatter_list=indices, group=nodes)

        # Save samples sent to the workers at master to evaluate train loss and accuracy later
        indices_local = torch.IntTensor(np.hstack((indices_worker_0, indices_worker_1)))
        local_input = tables.open_file(dataset).root.train.data[:][indices_local]
        local_output = tables.open_file(dataset).root.train.label[:][indices_local]
        local_teaching_signal = torch.cat((torch.FloatTensor(local_input), torch.FloatTensor(local_output)), dim=1)

    else:
        indices_local = torch.zeros([epochs], dtype=torch.int)
        dist.scatter(tensor=indices_local, src=0, scatter_list=[], group=nodes)

        assert torch.sum(indices_local) != 0

        local_input = tables.open_file(dataset).root.train.data[:][indices_local]
        local_output = tables.open_file(dataset).root.train.label[:][indices_local]

        local_teaching_signal = torch.cat((torch.FloatTensor(local_input), torch.FloatTensor(local_output)), dim=1)

    return local_teaching_signal

Esempio n. 7

0

Mostra file

 def scatter(self, scatter_list, src, size=None):
     """Scatters a list of tensors to all parties."""
     assert dist.is_initialized(), "initialize the communicator first"
     if src != self.get_rank():
         if size is None:
             size = scatter_list[self.get_rank()].size()
         tensor = torch.empty(size=size, dtype=torch.long)
         dist.scatter(tensor, [], src, group=self.main_group)
     else:
         tensor = scatter_list[self.get_rank()]
         dist.scatter(tensor, scatter_list, src, group=self.main_group)
     return tensor

Esempio n. 8

0

Mostra file

File: functions.py Progetto: snehashischatterjee1997/diffdist

 def backward(ctx, *grads):
     input, = ctx.saved_tensors
     grad_input = torch.zeros_like(input)
     if dist.get_rank(ctx.group) == ctx.dst:
         grad_outputs = list(grads)
         dist.scatter(grad_input,
                      grad_outputs,
                      src=ctx.dst,
                      group=ctx.group)
         return (grad_input, None, None, None) + grads
     else:
         dist.scatter(grad_input, [], src=ctx.dst, group=ctx.group)
         return grad_input, None, None, None, None

Esempio n. 9

0

Mostra file

File: multiThreadExample.py Progetto: chenziku/NMT-MCTS

def run(rank, numProcesses, group):
    tensor = torch.ones(1) * rank

    #dst (int) – Destination rank, dist.gather(tensor, dst, gather_list, group)
    #dist.all_reduce(tensor, op=dist.reduce_op.SUM, group=group)
    #print('Rank ',rank,' has data ', tensor[0])
    for i in range(rank):
        gather_list = None
        dist.gather(tensor=tensor, gather_list=gather_list, dst=0,
                    group=group)  #send to process 2

        outputTens = torch.ones(1)
        dist.scatter(tensor=outputTens, scatter_list=None, src=0, group=group)
        print('Rank ', rank, ' has data ', outputTens)

Esempio n. 10

0

Mostra file

File: main.py Progetto: harsh-rawat/CS744-Distributed-Data-Parallel

def average_gradients(model, rank):
    for p in model.parameters():
        if rank == 0:
            inputs = [
                torch.empty(p.grad.size())
                for _ in range(dist.get_world_size())
            ]
            dist.gather(p.grad, inputs)
            avg_grad = torch.mean(torch.stack(inputs), dim=0)
            outputs = [avg_grad for _ in range(dist.get_world_size())]
            dist.scatter(p.grad, outputs)
        else:
            dist.gather(p.grad)
            dist.scatter(p.grad)

Esempio n. 11

0

Mostra file

File: distributed_operations.py Progetto: tanthml/sagemaker-pytorch-container

def _scatter(rank, rows, columns):
    source = 0
    tensor = _get_tensor(rank, rows, columns)
    if rank == source:
        tensors_list = _get_zeros_tensors_list(rows, columns)
        logger.debug('Rank: {},\nTensor BEFORE scatter: {}. tensors_list: {}'.format(
            rank, tensor, tensors_list))
        dist.scatter(tensor=tensor, scatter_list=tensors_list)
    else:
        logger.debug('Rank: {},\nTensor BEFORE scatter: {}\n'.format(rank, tensor))
        dist.scatter(tensor=tensor, src=source)
    logger.debug('Rank: {},\nTensor AFTER scatter: {}\n'.format(rank, tensor))

    assert torch.equal(tensor, _get_zeros_tensor(rows, columns)), \
        'Rank {}: Tensor should be all zeroes after scatter.'.format(rank)

Esempio n. 12

0

Mostra file

def run(number, scatter_list, sr):
    ranks = list(range(number))
    lengths = []
    examples = len(scatter_list)
    for a in scatter_list:
        lengths = lengths + list(a.size())
    maxlen = max(lengths)
    for a in range(len(lengths)):
        zero = torch.zeros(maxlen - lengths[a])
        scatter_list[a] = torch.cat((scatter_list[a], zero, sr), 0)
    # ind = lengths.index(maxlen)
    win_length = 2048
    group = dist.new_group(ranks)
    src = dst = 0
    scatter_op = dist.scatter(torch.zeros(lengths),
                              scatter_list=scatter_list,
                              group=group)
    #assume scatter_o_p is a list
    while True:
        all_done = True
        for i in scatter_op:
            if i.is_completed == False:
                all_done = False
                break
        if (all_done):
            break
    frames_size = librosa.util.frame(scatter_list[0]).shape[0]
    gather_list_ele = torch.zeros(frames_size, win_length)
    gather_list = []
    for i in range(len(scatter_list)):
        gather_list.append(deepcopy(gather_list_ele))
    dist.gather(gather_list=gather_list, dst=dst, group=group)
    return gather_list

Esempio n. 13

0

Mostra file

File: extend_distributed.py Progetto: Luo-Liang/dlrm

 def forward(ctx, a2a_info, *inputs):
     global myreq
     batch_split_lengths = (a2a_info.global_batch_partition_slices
                            if a2a_info.global_batch_partition_slices else
                            a2a_info.local_batch_num)
     table_split_lengths = (a2a_info.global_table_wise_parition_slices
                            if a2a_info.global_table_wise_parition_slices
                            else [a2a_info.local_table_num] * my_size)
     input = torch.cat(inputs, dim=1)
     scatter_list = list(input.split(batch_split_lengths, dim=0))
     gather_list = []
     req_list = []
     for i in range(my_size):
         out_tensor = input.new_empty([
             a2a_info.local_batch_num,
             table_split_lengths[i] * a2a_info.emb_dim
         ])
         req = dist.scatter(out_tensor,
                            scatter_list if i == my_rank else [],
                            src=i,
                            async_op=True)
         gather_list.append(out_tensor)
         req_list.append(req)
     myreq.req = req_list
     myreq.tensor = tuple(gather_list)
     myreq.a2a_info = a2a_info
     ctx.a2a_info = a2a_info
     return myreq.tensor

Esempio n. 14

0

Mostra file

File: functions.py Progetto: snehashischatterjee1997/diffdist

 def forward(ctx,
             tensor,
             src,
             group=dist.group.WORLD,
             inplace=True,
             *scatter_list):
     ctx.src = src
     ctx.group = group
     if not inplace:
         tensor = torch.zeros_like(tensor)
     if dist.get_rank(group) == src:
         ctx.save_for_backward(*scatter_list)
         scatter_list = list(scatter_list)
         dist.scatter(tensor, scatter_list, src=src, group=group)
     else:
         dist.scatter(tensor, [], src=src, group=group)
     return tensor

Esempio n. 15

0

Mostra file

File: functional.py Progetto: btxuyenHCMUS/MTCNN-GPU

 def forward(ctx, group, *tensors):
     ctx.group = group
     out_tensor_list = [
         torch.empty_like(tensors[i]) for i in range(dist.get_world_size(group=group))
     ]
     reqs = [None] * dist.get_world_size(group=group)
     my_rank = dist.get_rank(group=group)
     # Implement it on means of scatter/gather, send/recv async operations have issues
     if dist.get_backend(group=group) is dist.Backend.GLOO:
         for i in range(dist.get_world_size(group=group)):
             to_send = None
             if i == my_rank:
                 to_send = list(tensors)
             dist.scatter(out_tensor_list[i], to_send, i, group=group)
     else:
         dist.all_to_all(out_tensor_list, list(tensors), group=group)
     return tuple(out_tensor_list)

Esempio n. 16

0

Mostra file

File: manager.py Progetto: omarfoq/communication-in-cross-silo-fl

    def communicate(self):
        for ii, param in enumerate(self.gather_list[-1].net.parameters()):
            param_list = [list(self.gather_list[idx].net.parameters())[ii].data
                          for idx in range(self.world_size)]

            dist.gather(tensor=param.data, dst=self.world_size - 1, gather_list=param_list)

        self.mix()

        if (self.round_idx - 1) % self.log_freq == 0:
            self.write_logs()

        for ii, param in enumerate(self.scatter_list[-1].net.parameters()):
            param_list = [list(self.scatter_list[idx].net.parameters())[ii].data
                          for idx in range(self.world_size)]

            dist.scatter(tensor=param.data, src=self.world_size - 1, scatter_list=param_list)

Esempio n. 17

0

Mostra file

File: distributed_communicator.py Progetto: vishalbelsare/CrypTen

 def scatter(self, scatter_list, src, size=None, device=None):
     """Scatters a list of tensors to all parties."""
     assert dist.is_initialized(), "initialize the communicator first"
     if src != self.get_rank():
         if size is None:
             size = scatter_list[self.get_rank()].size()
         if device is None:
             try:
                 device = scatter_list[self.get_rank()].device
             except Exception:
                 pass
         tensor = torch.empty(size=size, dtype=torch.long, device=device)
         dist.scatter(tensor.data, [], src, group=self.main_group)
     else:
         scatter_list = [s.data for s in scatter_list]
         tensor = scatter_list[self.get_rank()]
         dist.scatter(tensor.data, scatter_list, src, group=self.main_group)
     return tensor

Esempio n. 18

0

Mostra file

File: functional.py Progetto: tongxin/pytorch

 def forward(ctx, group, out_tensor_list, *tensors):
     ctx.group = group
     ctx.input_tensor_size_list = [
         tensors[i].size() for i in range(dist.get_world_size(group=group))
     ]
     my_rank = dist.get_rank(group=group)
     tensors = tuple(t.contiguous() for t in tensors)
     # Implement it on means of scatter/gather, send/recv async operations have issues
     if dist.get_backend(group=group) is dist.Backend.GLOO:
         for i in range(dist.get_world_size(group=group)):
             to_send = None
             if i == my_rank:
                 to_send = list(tensors)
             dist.scatter(out_tensor_list[i], to_send, i, group=group)
     else:
         dist.all_to_all(
             out_tensor_list,
             list(tensors),
             group=group,
         )
     return tuple(out_tensor_list)

Esempio n. 19

0

Mostra file

def main_func(numProcesses, group, src_tensor):

    while (True):
        t = torch.zeros(15)  #THE FINAL ELEMENT IS LENGTH WHEN NOT PADDED
        gather_t = [torch.ones_like(t) for _ in range(numProcesses)]

        #every process in group sends tensor to this gather_t list
        dist.gather(tensor=t, gather_list=gather_t, dst=0, group=group)

        print('GATHERED DATA')
        print(gather_t[1][:15])
        print(gather_t[2][:15])

        to_scatter = torch.rand((5, 3))

        outputTens = torch.rand((5))

        #SIZE OF EACH TENSOR to scatter is main_params.num_children*2 +1
        #where first part is the actions, then probs, then leaf value
        #print('len to scatter: {}'.format(len(to_scatter)))
        print(to_scatter)
        to_scatter = np.split(to_scatter, 3, axis=1)

        #this is vital to make sure memory isn't shared among these vectors
        to_scatter = [torch.clone(t).squeeze() for t in to_scatter]

        #to_scatter = [x.view(1,-1) for x in to_scatter]

        #print('TO SCATTER: ',to_scatter)
        print('just before scattering: ')
        #print(to_scatter[1].type)
        #print(to_scatter[1][:15])
        #print(to_scatter[2][:15])
        dist.scatter(tensor=outputTens,
                     scatter_list=to_scatter,
                     src=0,
                     group=group)

        time.sleep(5)
        exit(1)

Esempio n. 20

0

Mostra file

File: distributed_utils.py Progetto: yeshwanthv5/snn

def distribute_samples(nodes, rank, dataset, eta, num_samples):
    """
    The master node (rank 0) randomly chooses and transmits samples indices to each device for training.
    Upon reception of their assigned samples, the nodes create their training dataset
    """

    if rank == 0:
        # Indices corresponding to each class
        indices_0 = np.asarray(torch.max(torch.sum(torch.FloatTensor(dataset.root.label[:]), dim=-1), dim=-1).indices == 0).nonzero()[0]
        indices_1 = np.asarray(torch.max(torch.sum(torch.FloatTensor(dataset.root.label[:]), dim=-1), dim=-1).indices == 1).nonzero()[0]

        assert len(indices_0) == len(indices_1)
        n_main_class = math.floor(num_samples * eta)
        n_secondary_class = num_samples - n_main_class
        assert (n_main_class + n_secondary_class) == num_samples

        # Randomly select samples for each worker
        indices_worker_0 = np.hstack((np.random.choice(indices_0, [n_main_class], replace=False), np.random.choice(indices_1, [n_secondary_class], replace=False)))
        np.random.shuffle(indices_worker_0)
        remaining_indices_0 = [i for i in indices_0 if i not in indices_worker_0]
        remaining_indices_1 = [i for i in indices_1 if i not in indices_worker_0]
        indices_worker_1 = np.hstack((np.random.choice(remaining_indices_0, [n_secondary_class], replace=False), np.random.choice(remaining_indices_1, [n_main_class], replace=False)))
        np.random.shuffle(indices_worker_1)

        assert len(indices_worker_0) == len(indices_worker_1)

        # Send samples to the workers
        indices = [torch.zeros([num_samples], dtype=torch.int), torch.IntTensor(indices_worker_0), torch.IntTensor(indices_worker_1)]
        indices_local = torch.zeros([num_samples], dtype=torch.int)
        dist.scatter(tensor=indices_local, src=0, scatter_list=indices, group=nodes)

        # Save samples sent to the workers at master to evaluate train loss and accuracy later
        indices_local = torch.IntTensor(np.hstack((indices_worker_0, indices_worker_1)))

    else:
        indices_local = torch.zeros([num_samples], dtype=torch.int)
        dist.scatter(tensor=indices_local, src=0, scatter_list=[], group=nodes)
        assert torch.sum(indices_local) != 0

    return indices_local

Esempio n. 21

0

Mostra file

def run(rank, numProcesses, group, trg_tensor):

    print('gathering rank: ', rank)

    #now just continually gather and scatter until scatter gives a
    #negative value which means we can exit
    #and also tell main_func that length is 0
    while (True):

        padded_output = torch.rand((15))
        print('Gathering rank: ', rank)
        print('rank: {}, sending to gather: {}'.format(rank, padded_output))
        dist.gather(tensor=padded_output, gather_list=None, dst=0,
                    group=group)  #send to process 2
        print('Finished gather: ', rank)

        model_response = torch.rand(5)
        dist.scatter(tensor=model_response,
                     scatter_list=None,
                     src=0,
                     group=group)
        print('scatter rank: {}, given: {}'.format(rank, model_response))

Esempio n. 22

0

Mostra file

File: utils.py Progetto: yuk12/dgl

def alltoall_cpu(rank, world_size, output_tensor_list, input_tensor_list):
    """Each process scatters list of input tensors to all processes in a cluster
    and return gathered list of tensors in output list. The tensors should have the same shape.

    Parameters
    ----------
    rank : int
        The rank of current worker
    world_size : int
        The size of the entire
    output_tensor_list : List of tensor
        The received tensors
    input_tensor_list : List of tensor
        The tensors to exchange
    """
    input_tensor_list = [
        tensor.to(th.device('cpu')) for tensor in input_tensor_list
    ]
    for i in range(world_size):
        dist.scatter(output_tensor_list[i],
                     input_tensor_list if i == rank else [],
                     src=i)

Esempio n. 23

0

Mostra file

File: Flex_demo.py Progetto: Steamgjk/Hove

def bp_send_proc(conv_wid, conv_wn, fc_wid, fc_wn, wid, wn, pred_wid, succ_wid,
                 comm_rank, world_sz, bs, subbs, pd, input_shp, output_shp,
                 bp_head_list, shared_cnters, global_step, sta_lidx, end_lidx):
    #fp_send:0; fp_recv:1; bp_send:2; bp_recv:3
    iter_thresh = int(bs / subbs)
    allreduce_group, fp_gather_group, bp_scatter_group = init_processes(
        comm_rank, world_sz)
    print("bp_send_proc comm_rank=", comm_rank)
    if wid == 0 or wid == 1:
        shared_cnters[2] = 0
        return
    local_bp_sent_counter = 0
    dst_rank = pred_wid * 4 + 3
    scatter_src = 2 * 4 + 2
    place_tensor = torch.zeros(1)
    while True:
        if local_bp_sent_counter < shared_cnters[2]:
            # hard code
            if wid == 3:
                dist.send(tensor=bp_head_list[local_bp_sent_counter],
                          dst=dst_rank)
            elif wid == 2:
                slist = list(bp_head_list[local_bp_sent_counter].chunk(
                    chunks=2, dim=0))
                place_tensor = slist[0]
                slist.append(place_tensor)
                dist.scatter(tensor=place_tensor,
                             scatter_list=slist,
                             src=scatter_src,
                             group=bp_scatter_group,
                             async_op=False)
            #print("wid=",wid, " bp send ")
            local_bp_sent_counter += 1
        else:
            time.sleep(0.001)
        if local_bp_sent_counter == iter_thresh:
            local_bp_sent_counter = 0
            shared_cnters[2].zero_()

Esempio n. 24

0

Mostra file

File: comm.py Progetto: wx-b/scrl

def scatter(data, src=0, group=None):
    """
    Run scatter on arbitrary picklable data (not necessarily tensors).

    Args:
        data: any picklable object
        src (int): source rank from which to scatter
        group: a torch process group. By default, will use a group which
            contains all ranks on gloo backend.

    Returns:
        data_scattered: the object scattered from src.
    """
    if get_world_size() == 1:
        return data
    if group is None:
        group = _get_global_gloo_group()
        assert dist.get_world_size(group) == dist.get_world_size()
    if dist.get_world_size(group=group) == 1:
        return data
    
    rank = dist.get_rank(group=group)
    input_tensor = _serialize_to_tensor(data, group)

    # receiving Tensor from the source ranks
    output_tensor = torch.empty((input_tensor.numel(),), 
                                dtype=torch.uint8, 
                                device=input_tensor.device)
    if rank == src: 
        dist.scatter(tensor=output_tensor, 
                     scatter_list=[input_tensor] * get_world_size(), 
                     src=src, group=group)
        return data
    else:
        dist.scatter(output_tensor, [], src=src, group=group)
        buffer = output_tensor.cpu().numpy().tostring()
        data_scattered = pickle.loads(buffer)
        return data_scattered

Esempio n. 25

0

Mostra file

def average_gradients(model):
    size = float(dist.get_world_size())
    for param in model.parameters():
        """ using all_reduce """
        # dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM)
        # param.grad.data /= size

        """ using gather and scatter """
        # group = dist.new_group(list(range(int(size))))
        gather_list, scatter_list = None, None
        if args.rank == 0:
            gather_list = [torch.zeros_like(param.grad.data)] * int(size)
            scatter_list = [torch.zeros_like(param.grad.data)] * int(size)

        dist.gather(tensor=param.grad.data, dst=0, gather_list=gather_list)
        # dist.gather(tensor=param.grad.data, dst=0)
        if args.rank == 0:
            param.grad.data /= size

        dist.scatter(tensor=param.grad.data, src=0, scatter_list=scatter_list)
        # dist.scatter(tensor=param.grad.data, src=0)

        """ using ring-reduce """

Esempio n. 26

0

Mostra file

File: deep_gradient_compression.py Progetto: lusinlu/deep_gradient_compression

    def transfer_gradients(self, grad_update_conv):
        """transfering avaraged sparse gradient to the all nodes for the optimization of the model at each node
        :parameter
        grad_update_conv : tensor, final avaraged sparse gradient tensor
        :return
        upd_grads : final gradient accessable at each node
        """
        upd_grads = []

        for idx in range(len(self.shapes)):
            updated = torch.zeros(self.shapes[idx])

            if self.rank == 0:
                reciever_list = []
                for i in range(self.size):
                    reciever_list.append(grad_update_conv[idx].to('cpu'))

                dist.scatter(tensor=updated, src=0, scatter_list=reciever_list)
            else:
                dist.scatter(tensor=updated, src=0, scatter_list=[])
            upd_grads.append(updated.cuda(self.device_id))

        return upd_grads

Esempio n. 27

0

Mostra file

File: runMultiThread.py Progetto: chenziku/NMT-MCTS

def run(rank,numProcesses,group,maxlen,main_params,trg_tensor):
	
	mcts = MCTS(tgt_tensor=trg_tensor,group=group,rankInGroup=rank,
				max_len=maxlen,main_params=main_params)


	#here actions is list of actions corresponding to the 
	#200 probabilities in mcts_probs
	bleu, output_states, mcts_probs,actions = mcts.translate_sentence()
	#write to file
	fileName = globalsFile.CODEPATH+'MCTSFiles/rank'+str(rank)+'.json'
	with open(fileName,'w') as f:
		json.dump([bleu,output_states,mcts_probs,actions],f)

	print('rank: ',rank, ' is done NOW WAITING FOR REST')


	while(True):
		#now just gathering and scattering until main exits
		padded_output = torch.zeros(maxlen+1)*globalsFile.BLANK_WORD_ID
		dist.gather(tensor=padded_output,gather_list=None, dst=0,group=group) #send to process 2
		model_response = torch.ones(2*main_params.num_children + 1).double()
		dist.scatter(tensor=model_response,scatter_list=None,src=0,group=group)

Esempio n. 28

0

Mostra file

def run():
    src = dst = 0;
    mytensor = torch.zeros(1000)
    dist.scatter(mytensor,src=src)

    #processing
    features,num_frames,freqs = mysimpl(mytensor)
    frames_features = {}
    for frame in range(num_frames+1):
        frames_features[frame] = []
    for x in features:
        # print(x[0],x[1],x[2]) x[1] = framenumber x[0] amp x[2] freq
        frames_features[int(x[1])].append((x[0],x[2]))
    frame_freq_bins =[]
    for x in range(num_frames+1):
        freq_bins = np.zeros(2048)
        #dict with key as freqbin
        to_be_added ={}
        for y in frames_features[x]:
            index_i = np.abs(freqs-y[1]).argmin();
            if(y[1] < freqs[index_i]):
                index_i -=1;
            if index_i not in to_be_added.keys():
                to_be_added[index_i] = []
            to_be_added[index_i].append(y[0])
        all_non_zero_bins = to_be_added.keys()
        for x in all_non_zero_bins:
            amp_array =to_be_added[x]
            amp_array = np.array(amp_array)
            avg_amp = np.mean(amp_array)
            freq_bins[x] += avg_amp
            # freq_bins = torch.LongTensor(freq_bins)
        frame_freq_bins.append(freq_bins)
    frame_freq_bins = np.array(frame_freq_bins)
    frame_freq_bins = torch.from_numpy(frame_freq_bins)
    dist.gather(frame_freq_bins,dst=dst)
    return;

Esempio n. 29

0

Mostra file

def distribute_samples(nodes, rank, args):
    """
    The master node (rank 0) randomly chooses and transmits samples indices to each device for training.
    Upon reception of their assigned samples, the nodes create their training dataset
    """

    if rank == 0:
        # Indices corresponding to each class
        indices_worker_0 = np.zeros([args.num_samples_train])
        indices_worker_1 = np.zeros([args.num_samples_train])

        num_samples_per_class = int(args.num_samples_train / (len(args.labels)/2))

        for i, label in enumerate(args.labels[:int(len(args.labels)/2)]):
            indices_worker_0[i * num_samples_per_class: (i + 1) * num_samples_per_class] =\
                np.random.choice(misc.find_indices_for_labels(args.dataset.root.train, [label]), [num_samples_per_class], replace=True)
        for i, label in enumerate(args.labels[int(len(args.labels)/2):]):
            indices_worker_1[i * num_samples_per_class: (i + 1) * num_samples_per_class] =\
                np.random.choice(misc.find_indices_for_labels(args.dataset.root.train, [label]), [num_samples_per_class], replace=True)

        random.shuffle(indices_worker_0)
        random.shuffle(indices_worker_1)

        # Send samples to the workers
        indices_local = torch.zeros([args.num_samples_train], dtype=torch.int)
        indices = [indices_local, torch.IntTensor(indices_worker_0), torch.IntTensor(indices_worker_1)]
        dist.scatter(tensor=indices_local, src=0, scatter_list=indices, group=nodes)

        # Save samples sent to the workers at master to evaluate train loss and accuracy later
        indices_local = torch.IntTensor(np.hstack((indices_worker_0, indices_worker_1)))

    else:
        args.local_labels = args.labels[int(len(args.labels)/2) * (rank - 1): int(len(args.labels)/2) * rank]
        indices_local = torch.zeros([args.num_samples_train], dtype=torch.int)
        dist.scatter(tensor=indices_local, src=0, scatter_list=[], group=nodes)

    return indices_local

Esempio n. 30

0

Mostra file

    def transfer_gradients(self, grad_update_conv):
        """
        transferring averaged sparse gradients to all the nodes for the optimization of the model at each node
        :parameter
        grad_update_conv : tensor, final averaged sparse gradient tensor
        :return
        upd_grads : final gradient accessible at each node
        """
        grad_update_conv = self.converter.str_to_gradient(grad_update_conv)
        upd_grads = []

        for idx in range(len(self.shapes)):
            updated = torch.zeros(self.shapes[idx])

            if self.rank == 0:
                receiver_list = []
                for i in range(self.size):
                    receiver_list.append(grad_update_conv[idx].to('cpu'))

                dist.scatter(tensor=updated, src=0, scatter_list=receiver_list)
            else:
                dist.scatter(tensor=updated, src=0, scatter_list=[])
            upd_grads.append(updated.to('cpu'))
        return upd_grads

Esempio n. 31

0

Mostra file

File: benchmark.py Progetto: Jsmilemsj/pytorch

    for bytes in [2**n for n in range(MIN_BYTES, MAX_BYTES)]:
        tensor = torch.ByteTensor(bytes).fill_(42)
        for num_tensors in [10**n for n in range(MIN_NUM_TENSORS, MAX_NUM_TENSORS)]:
            for i in range(0, num_tensors):
                dist.all_reduce(tensor)
dist.barrier()

if rank == 0:
    print_header("scatter")
    for bytes in [2**n for n in range(MIN_BYTES, MAX_BYTES)]:
        tensor = torch.ByteTensor(bytes).fill_(42)
        tensors = [tensor for n in range(0, dist.get_world_size())]
        for num_tensors in [10**n for n in range(MIN_NUM_TENSORS, MAX_NUM_TENSORS)]:
            start = timer()
            for i in range(0, num_tensors):
                dist.scatter(tensor, scatter_list=tensors)
            end = timer()
            print_stats(bytes, num_tensors, end - start)
    print()
else:
    for bytes in [2**n for n in range(MIN_BYTES, MAX_BYTES)]:
        tensor = torch.ByteTensor(bytes).fill_(42)
        for num_tensors in [10**n for n in range(MIN_NUM_TENSORS, MAX_NUM_TENSORS)]:
            for i in range(0, num_tensors):
                dist.scatter(tensor, src=0)
dist.barrier()

if rank == 0:
    print_header("gather")
    for bytes in [2**n for n in range(MIN_BYTES, MAX_BYTES)]:
        tensor = torch.ByteTensor(bytes).fill_(42)