def test_large_seq2col_gpu_against_cpu(nW): cupy_ops = CupyOps() numpy_ops = NumpyOps() # Use array with a large enough batch to require multiple # CUDA grids. batch_size = 128 * 128 * 2 # threads per block * blocks * 2 X = numpy_ops.xp.random.randn(batch_size * 2).astype("float32").reshape( -1, 2) X_gpu = cupy_ops.asarray2f(X) # Use somewhat interesting sequence lengths. lengths = numpy_ops.asarray1i([1, 4, 2, 1] * (batch_size // 8)) lengths_gpu = cupy_ops.asarray1i(lengths) cols = numpy_ops.seq2col(X, nW=nW, lengths=lengths) cols_gpu = cupy_ops.seq2col(X_gpu, nW=nW, lengths=lengths_gpu) assert_allclose(cols, cols_gpu.get())
def get_dropout_mask(self, shape: Shape, drop: Optional[float]) -> FloatsXd: if drop is None or drop <= 0: return self.xp.ones(shape, dtype="f") else: raise ValueError( "During prediction, dropout should not be applied") array1d = OPS.xp.asarray([1, 2, 3], dtype="f") array1dint = OPS.xp.asarray([1, 2, 3], dtype="i") array2d = OPS.xp.asarray([[4, 2, 3, 4], [1, 5, 3, 1], [9, 8, 5, 7]], dtype="f") array2dint = OPS.xp.asarray([[1, 2, 3], [4, 5, 6]], dtype="i") array3d = OPS.xp.zeros((3, 3, 3), dtype="f") ragged = Ragged(array2d, OPS.xp.asarray([2, 1], dtype="i")) padded = Padded(array3d, array1d, OPS.asarray1i([1, 2, 3, 4]), OPS.asarray1i([1, 2, 3, 4])) width = array2d.shape[1] vectors = numpy.zeros((array2dint.max(), 1), dtype="f") def assert_data_match(Y, out_data): assert type(Y) == type(out_data) if isinstance(out_data, OPS.xp.ndarray): assert isinstance(Y, OPS.xp.ndarray) assert out_data.ndim == Y.ndim elif isinstance(out_data, Ragged): assert isinstance(Y, Ragged) assert out_data.data.ndim == Y.data.ndim assert out_data.lengths.ndim == Y.lengths.ndim elif isinstance(out_data, Padded):