Exemple #1
0
    def test_record_stream_cuda(self, cuda_sleep):
        # This test detects unexpected block reallocation. For reliable test,
        # the stream to allocate tensors is isolated. The allocator will not
        # reuse free blocks which were allocated from another stream.
        stream_alloc = new_stream(torch.device('cuda'))
        with torch.cuda.stream(stream_alloc):
            x = torch.rand(1, device=torch.device('cuda'))

        stream = new_stream(torch.device('cuda'))
        record_stream(x, stream)
        with use_stream(stream):
            cuda_sleep(0.5)

        # 'x' is deleted at Python's perspective. But the block of 'x' is still
        # required for 'stream'. 'y' shouldn't be allocated to the block.
        data_ptr = x.data_ptr()
        del x
        stream_alloc.synchronize()
        with torch.cuda.stream(stream_alloc):
            y = torch.rand(1, device=torch.device('cuda'))
        assert y.data_ptr() != data_ptr

        # Pause Python until 'stream' finishes tasks queued. Now the block of
        # 'x' is free to be reallocated.
        wait_stream(CPUStream, stream)
        with torch.cuda.stream(stream_alloc):
            z = torch.rand(1, device=torch.device('cuda'))
        assert z.data_ptr() == data_ptr
Exemple #2
0
    def test_record_stream_shifted_view(self, cuda_sleep):
        # Issue: https://github.com/pytorch/pytorch/issues/27366
        stream_alloc = new_stream(torch.device('cuda'))
        with torch.cuda.stream(stream_alloc):
            x = torch.rand(2, device=torch.device('cuda'))

        y = x[1:]
        assert y.data_ptr() > x.data_ptr()

        stream = new_stream(torch.device('cuda'))
        with use_stream(stream):
            cuda_sleep(0.5)
        record_stream(y, stream)

        data_ptr = x.data_ptr()
        del x, y

        stream_alloc.synchronize()
        with torch.cuda.stream(stream_alloc):
            z = torch.rand(2, device=torch.device('cuda'))
        assert z.data_ptr() != data_ptr
Exemple #3
0
    def _ensure_copy_streams(self) -> List[List[AbstractStream]]:
        """Ensures that :class:`GPipe` caches CUDA streams for copy.

        It's worth to cache CUDA streams although PyTorch already manages a
        pool of pre-allocated CUDA streams, because it may reduce GPU memory
        fragementation when the number of micro-batches is small.

        """
        if not self._copy_streams:
            for device in self.devices:
                self._copy_streams.append(
                    [new_stream(device) for _ in range(self.chunks)])

        return self._copy_streams
Exemple #4
0
    def __init__(
        self,
        batches: List[Batch],
        partitions: List[nn.Sequential],
        devices: Optional[List[torch.device]] = None,
        checkpoint_stop: int = 0,
    ) -> None:
        self.batches = batches
        self.partitions = partitions

        if devices is None:
            devices = [torch.device('cpu') for _ in partitions]
        self.devices = devices

        # NOTE(sublee): We don't need to manage a pool of CUDA streams because
        # PyTorch already manages it.
        # See https://github.com/pytorch/pytorch/pull/9938
        self.copy_streams = [[new_stream(d) for _ in self.batches]
                             for d in devices]

        self.checkpoint_stop = checkpoint_stop
Exemple #5
0
def test_copy_wait_cuda_cuda(cuda_sleep):
    prev_stream = current_stream(torch.device('cuda'))
    next_stream = new_stream(torch.device('cuda'))
    _test_copy_wait(prev_stream, next_stream, cuda_sleep)
Exemple #6
0
 def test_wait_stream_cpu_cuda(self, cuda_sleep):
     source = CPUStream
     target = new_stream(torch.device('cuda'))
     self._test_wait_stream(source, target, cuda_sleep)
Exemple #7
0
 def test_use_stream_cuda(self):
     stream = new_stream(torch.device('cuda'))
     with use_stream(stream):
         assert current_stream(torch.device('cuda')) == stream
Exemple #8
0
 def test_new_stream_cuda(self):
     stream = new_stream(torch.device('cuda'))
     assert isinstance(stream, torch.cuda.Stream)
     assert stream != torch.cuda.default_stream()
Exemple #9
0
 def test_new_stream_cpu(self):
     stream = new_stream(torch.device('cpu'))
     assert stream is CPUStream
Exemple #10
0
 def test_wait_stream_cuda_cuda(self, cuda_sleep):
     source = current_stream(torch.device('cuda'))
     target = new_stream(torch.device('cuda'))
     self._test_wait_stream(source, target, cuda_sleep)