コード例 #1
0
    def forward(self, inputs, im_info, gt_boxes, num_boxes, Ms, Ns):

        #tensors,_=scatter_kwargs([inputs,im_info,gt_boxes,num_boxes], {}, self.device_ids)

        inputs_multi = comm.scatter(inputs, self.device_ids)
        im_info = comm.scatter(im_info, self.device_ids)
        gt_boxes = comm.scatter(gt_boxes, self.device_ids)
        num_boxes = comm.scatter(num_boxes, self.device_ids)
        #im_info, gt_boxes, num_boxes

        tensors = parallel_apply(self.modules, [(v, ) for v in inputs_multi],
                                 devices=self.device_ids)
        out = []

        for i, tensor in enumerate(tensors):
            with torch.cuda.device(tensor.get_device()):
                tensors[i] = tensors[i].view(
                    tensors[i].size(0),
                    tensors[i].size(1) * tensors[i].size(2),
                    tensors[i].size(3), tensors[i].size(4))
                tensors[i] = tensors[i][:, :, :Ms, :Ns]
                tensors[i] = tensors[i].contiguous()
                tensors[i] = Variable(tensors[i])
                out.append([
                    tensors[i], im_info[i].cuda(), gt_boxes[i].cuda(),
                    num_boxes[i].cuda()
                ])

        return out  #tensors,im_info, gt_boxes, num_boxes
コード例 #2
0
    def forward(self, inputs):
        inputs_multi = comm.scatter(inputs, self.device_ids)
        tensors = parallel_apply(self.modules, [(v, ) for v in inputs_multi],
                                 devices=self.device_ids)
        out = []

        for i, tensor in enumerate(tensors):
            with torch.cuda.device(tensor.get_device()):
                tensors[i] = torch.autograd.Variable(tensors[i])
                out.append([tensors[i]])

        return out
コード例 #3
0
ファイル: test_cuda.py プロジェクト: ohnabe/C-CORE
 def _test_scatter(self, input, chunk_sizes=None, dim=0):
     if torch.cuda.device_count() < 2:
         raise unittest.SkipTest("only one GPU detected")
     result = comm.scatter(input, (0, 1), chunk_sizes, dim)
     self.assertEqual(len(result), 2)
     if chunk_sizes is None:
         chunk_sizes = tuple(repeat(input.size(dim) // 2, 2))
     chunk_start = 0
     for i, r in enumerate(result):
         chunk_end = chunk_start + chunk_sizes[i]
         index = [slice(None, None), slice(None, None)]
         index[dim] = slice(chunk_start, chunk_end)
         self.assertEqual(r, input[tuple(index)], 0)
         chunk_start = chunk_end
コード例 #4
0
ファイル: _functions.py プロジェクト: athiwatp/pytorch
 def forward(self, input):
     self.input_device = input.get_device() if input.is_cuda else -1
     streams = None
     if self.input_device == -1:
         # Perform CPU to GPU copies in a background stream
         streams = [_get_stream(device) for device in self.target_gpus]
     outputs = comm.scatter(input, self.target_gpus, self.chunk_sizes, self.dim, streams)
     # Synchronize with the copy stream
     if streams is not None:
         for i, output in enumerate(outputs):
             with torch.cuda.device(self.target_gpus[i]):
                 main_stream = torch.cuda.current_stream()
                 main_stream.wait_stream(streams[i])
                 output.record_stream(main_stream)
     return outputs
コード例 #5
0
 def forward(self, input):
     self.input_device = input.get_device() if input.is_cuda else -1
     streams = None
     if self.input_device == -1:
         # Perform CPU to GPU copies in a background stream
         streams = [_get_stream(device) for device in self.target_gpus]
     outputs = comm.scatter(input, self.target_gpus, self.chunk_sizes,
                            self.dim, streams)
     # Synchronize with the copy stream
     if streams is not None:
         for i, output in enumerate(outputs):
             with torch.cuda.device(self.target_gpus[i]):
                 main_stream = torch.cuda.current_stream()
                 main_stream.wait_stream(streams[i])
                 output.record_stream(main_stream)
     return outputs
コード例 #6
0
 def forward(ctx, target_gpus, chunk_sizes, dim, input):
     target_gpus = list(map(lambda x: _get_device_index(x, True), target_gpus))
     ctx.dim = dim
     ctx.input_device = input.get_device() if input.is_cuda else -1
     streams = None
     if ctx.input_device == -1:
         # Perform CPU to GPU copies in a background stream
         streams = [_get_stream(device) for device in target_gpus]
     outputs = comm.scatter(input, target_gpus, chunk_sizes, ctx.dim, streams)
     # Synchronize with the copy stream
     if streams is not None:
         for i, output in enumerate(outputs):
             with torch.cuda.device(target_gpus[i]):
                 main_stream = torch.cuda.current_stream()
                 main_stream.wait_stream(streams[i])
                 output.record_stream(main_stream)
     return outputs
コード例 #7
0
def get_onehot_label(labels,
                     num_gpus,
                     num_classes,
                     model_parallel=False,
                     class_split=None):
    # Get one-hot labels
    labels = labels.view(-1, 1)
    labels_onehot = torch.zeros(len(labels), num_classes).cuda()
    labels_onehot.scatter_(1, labels, 1)

    if not model_parallel:
        return labels_onehot
    else:
        label_tuple = comm.scatter(labels_onehot,
                                   range(num_gpus),
                                   class_split,
                                   dim=1)
        return label_tuple
コード例 #8
0
 def forward(self, input):
     self.input_device = input.get_device() if input.is_cuda else -1
     return comm.scatter(input, self.target_gpus, self.chunk_sizes,
                         self.dim)
コード例 #9
0
 def backward(self, grad_output):
     return comm.scatter(grad_output, self.input_gpus, self.input_sizes,
                         self.dim)
コード例 #10
0
ファイル: _functions.py プロジェクト: athiwatp/pytorch
 def backward(self, grad_output):
     return comm.scatter(grad_output, self.input_gpus, self.input_sizes,
                         self.dim)