Ejemplo n.º 1
0
 def test_cupy_array_async1(self):
     x = cuda.to_gpu(self.x)
     if not self.c_contiguous:
         x = cuda.cupy.asfortranarray(x)
     y = cuda.to_cpu(x, stream=cuda.Stream(null=True))
     self.assertIsInstance(y, numpy.ndarray)
     cuda.cupy.testing.assert_array_equal(self.x, y)
Ejemplo n.º 2
0
    def __call__(self, batch, device=None, padding=None):
        """Concatenate data and transfer them to GPU asynchronously.

        See also :func:`chainer.dataset.concat_examples`.

        Args:
            batch (list): A list of examples.
            device (int): Device ID to which each array is sent.
            padding: Scalar value for extra elements.

        Returns:
            Array, a tuple of arrays, or a dictionary of arrays.
            The type depends on the type of each example in the batch.
        """
        if len(batch) == 0:
            raise ValueError('batch is empty')
        first_elem = batch[0]

        if len(self._conveyor) == 0:
            self._device = device  # device is set at first call
            if device is not None and device >= 0 and self._stream is None:
                with cuda.get_device_from_id(device):
                    self._stream = cuda.Stream(non_blocking=True)
        if device is not self._device:
            raise ValueError('device is different')

        with cuda.get_device_from_id(device):
            if isinstance(first_elem, tuple):
                result = []
                if not isinstance(padding, tuple):
                    padding = [padding] * len(first_elem)

                for i in six.moves.range(len(first_elem)):
                    self._conveyor[i].put(
                        _concat_arrays([example[i] for example in batch],
                                       padding[i]))

                for i in six.moves.range(len(first_elem)):
                    result.append(self._conveyor[i].get())

                return tuple(result)

            elif isinstance(first_elem, dict):
                result = {}
                if not isinstance(padding, dict):
                    padding = {key: padding for key in first_elem}

                for key in first_elem:
                    self._conveyor[key].put(
                        _concat_arrays([example[key] for example in batch],
                                       padding[key]))

                for key in first_elem:
                    result[key] = self._conveyor[key].get()

                return result

            else:
                return to_device(device, _concat_arrays(batch, padding))
Ejemplo n.º 3
0
 def test_cupy_array_async1(self):
     x = cuda.to_gpu(self.x)
     if not self.c_contiguous:
         x = cuda.cupy.asfortranarray(x)
     y = cuda.to_gpu(x, stream=cuda.Stream())
     self.assertIsInstance(y, cuda.ndarray)
     self.assertIs(x, y)  # Do not copy
     cuda.cupy.testing.assert_array_equal(x, y)
Ejemplo n.º 4
0
 def test_cupy_array_async2(self):
     x = cuda.to_gpu(self.x, device=0)
     with x.device:
         if not self.c_contiguous:
             x = cuda.cupy.asfortranarray(x)
     y = cuda.to_gpu(x, device=1, stream=cuda.Stream(null=True))
     self.assertIsInstance(y, cuda.ndarray)
     self.assertIsNot(x, y)  # Do copy
     cuda.cupy.testing.assert_array_equal(x, y)
Ejemplo n.º 5
0
    def forward(self, inputs):
        x = inputs[0].copy()  # make sure data is aligned
        xp = cuda.get_array_module(x)
        alphabet_size = x.shape[2]
        label_lengths = np.asarray([len(l.flatten()) for l in self.labels],
                                   dtype=np.intc)
        seq_lengths = np.asarray(self.seq_lengths, dtype=np.intc)
        ws_size = np.zeros(1, dtype=np.intc)

        if xp is np:
            warp_ctc.ctc_get_workspace_size_cpu(label_lengths.ctypes.data,
                                                seq_lengths.ctypes.data,
                                                alphabet_size, x.shape[1],
                                                ws_size.ctypes.data)
            self.gradients = np.zeros_like(x)
            ws = np.empty(ws_size // 4, dtype=np.float32)
            loss = np.zeros(len(self.seq_lengths), dtype=np.float32)
            labels = np.concatenate([l.flatten() for l in self.labels])

            warp_ctc.ctc_compute_ctc_loss_cpu(
                x.ctypes.data, self.gradients.ctypes.data, labels.ctypes.data,
                label_lengths.ctypes.data, seq_lengths.ctypes.data,
                alphabet_size, x.shape[1], loss.ctypes.data, ws.ctypes.data, 1)
        else:
            stream = cuda.Stream(null=True)
            warp_ctc.ctc_get_workspace_size_gpu(label_lengths.ctypes.data,
                                                seq_lengths.ctypes.data,
                                                alphabet_size, x.shape[1],
                                                ws_size.ctypes.data,
                                                stream.ptr)
            self.gradients = cuda.cupy.zeros_like(x)
            ws = cuda.cupy.empty(ws_size // 4, dtype=np.float32)
            loss = np.zeros(len(self.seq_lengths), dtype=np.float32)
            labels = np.concatenate([l.flatten() for l in self.labels])

            def _ctc():
                warp_ctc.ctc_compute_ctc_loss_gpu(
                    x.data.ptr, self.gradients.data.ptr, labels.ctypes.data,
                    label_lengths.ctypes.data, seq_lengths.ctypes.data,
                    alphabet_size, x.shape[1], loss.ctypes.data, ws.data.ptr,
                    stream.ptr)

            try:
                _ctc()
            except Exception as e:
                cuda.memory_pool.free_all_free()
                try:
                    _ctc()
                except:
                    raise e

        if np.any(np.isnan(loss)):
            raise ValueError

        score = xp.full((1, ), xp.mean(loss), dtype=np.float32)
        return score,
Ejemplo n.º 6
0
 def test_cupy_array_async3(self):
     with cuda.Device(0):
         x = cuda.to_gpu(self.x)
         if not self.c_contiguous:
             x = cuda.cupy.asfortranarray(x)
     with cuda.Device(1):
         y = cuda.to_gpu(x, stream=cuda.Stream(null=True))
     self.assertIsInstance(y, cuda.ndarray)
     self.assertIsNot(x, y)  # Do copy
     cuda.cupy.testing.assert_array_equal(x, y)
Ejemplo n.º 7
0
    def test_get_and_add_callback(self):
        N = 100
        cupy_arrays = [testing.shaped_random((2, 3)) for _ in range(N)]

        stream = cuda.Stream(null=True)
        out = []
        for i in range(N):
            numpy_array = cupy_arrays[i].get(stream=stream)
            stream.add_callback(lambda _, __, t: out.append(t[0]),
                                (i, numpy_array))

        stream.synchronize()
        self.assertEqual(out, list(range(N)))
Ejemplo n.º 8
0
 def __call__(self, batch, device=None):
     assert len(batch) != 0, 'batch is empty'
     first_elem = batch[0]
     if len(self._conveyor) == 0:
         self._device = device
         if device is not None and device >= 0 and self._stream is None:
             with cuda.get_device_from_id(device):
                 self._stream = cuda.Stream(non_blocking=True)
     assert device is self._device, 'device is different'
     with cuda.get_device_from_id(device):
         if isinstance(first_elem, tuple):
             I, J = len(first_elem), len(batch)
             result = [[] for i in range(I)]
             for i in range(I):
                 for j in range(J):
                     self._conveyor[j * J + i].put(batch[j][i])
             for i in range(I):
                 for j in range(J):
                     result[i].append(self._conveyor[j * J + i].get())
             return tuple(result)
     assert False, 'Not supported'
Ejemplo n.º 9
0
 def test_numpy_array_async(self):
     y = cuda.to_cpu(self.x, stream=cuda.Stream())
     self.assertIsInstance(y, numpy.ndarray)
     self.assertIs(self.x, y)  # Do not copy
Ejemplo n.º 10
0
 def test_numpy_array_async3(self):
     with cuda.Device(1):
         y = cuda.to_gpu(self.x, stream=cuda.Stream(null=True))
     self.assertIsInstance(y, cuda.ndarray)
     cuda.cupy.testing.assert_array_equal(self.x, y)
     self.assertEqual(int(y.device), 1)
Ejemplo n.º 11
0
 def test_numpy_array_async(self):
     y = cuda.to_gpu(self.x, stream=cuda.Stream(null=True))
     self.assertIsInstance(y, cuda.ndarray)
     cuda.cupy.testing.assert_array_equal(self.x, y)