Example #1
0
    def mul(self, a, b, out, alpha=1.0, beta=0.0):
        # Ensure the matrices are compatible
        if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
            raise ValueError('Incompatible matrices for out = a*b')

        # Check that A is constant
        if 'const' not in a.tags:
            raise NotSuitableError('GiMMiK requires a constant a matrix')

        # Check that A is reasonably sparse
        if np.count_nonzero(a.get()) > self.max_nnz:
            raise NotSuitableError('Matrix too dense for GiMMiK')

        # Generate
        src = generate_mm(a.get(),
                          dtype=a.dtype,
                          platform='cuda',
                          alpha=alpha,
                          beta=beta)

        # Build
        fun = self._build_kernel('gimmik_mm', src, 'iPiPi')

        # Determine the grid/block
        block = (128, 1, 1)
        grid = get_grid_for_block(block, b.ncol)

        class MulKernel(ComputeKernel):
            def run(self, queue):
                fun.prepared_async_call(grid, block, queue.cuda_stream_comp,
                                        b.ncol, b, b.leaddim, out, out.leaddim)

        return MulKernel()
Example #2
0
    def mul(self, a, b, out, alpha=1.0, beta=0.0):
        # Ensure the matrices are compatible
        if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
            raise ValueError('Incompatible matrices for out = a*b')

        # Check that A is constant
        if 'const' not in a.tags:
            raise NotSuitableError('GiMMiK requires a constant a matrix')

        # Check that A is reasonably sparse
        if np.count_nonzero(a.get()) > self.max_nnz:
            raise NotSuitableError('Matrix too dense for GiMMiK')

        # Generate
        src = generate_mm(a.get(), dtype=a.dtype, platform='opencl',
                          alpha=alpha, beta=beta)

        # Build
        fun = self._build_kernel('gimmik_mm', src,
                                 [np.int32] + [np.intp, np.int32]*2)
        fun.set_args(b.ncol, b, b.leaddim, out, out.leaddim)

        class MulKernel(Kernel):
            def run(self, queue):
                fun.exec_async(queue.cmd_q, (b.ncol,), None)

        return MulKernel(mats=[b, out])
Example #3
0
    def mul(self, a, b, out, alpha=1.0, beta=0.0):
        # Ensure the matrices are compatible
        if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
            raise ValueError('Incompatible matrices for out = a*b')

        # Check that A is constant
        if 'const' not in a.tags:
            raise NotSuitableError('GiMMiK requires a constant a matrix')

        # Fetch the matrix and tally up the number of non-zeros
        arr = a.get()
        nnz, nuq = np.count_nonzero(arr), len(np.unique(np.abs(arr)))

        # Check that A is suitable
        if nuq > 28 and nnz / arr.size > 0.15:
            raise NotSuitableError('Matrix is inappropriate for GiMMiK')

        # Generate
        src = generate_mm(arr, dtype=a.dtype, platform='cuda',
                          alpha=alpha, beta=beta)

        # Build
        fun = self._build_kernel('gimmik_mm', src,
                                 [np.int32, np.intp]*2 + [np.int32])

        # Determine the grid/block
        block = (128, 1, 1)
        grid = get_grid_for_block(block, b.ncol)

        class MulKernel(ComputeKernel):
            def run(self, queue):
                fun.exec_async(grid, block, queue.stream_comp, b.ncol, b,
                               b.leaddim, out, out.leaddim)

        return MulKernel()
Example #4
0
    def mul(self, a, b, out, alpha=1.0, beta=0.0):
        # Ensure the matrices are compatible
        if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
            raise ValueError("Incompatible matrices for out = a*b")

        # Check that A is constant
        if "const" not in a.tags:
            raise NotSuitableError("GiMMiK requires a constant a matrix")

        # Check that A is reasonably sparse
        if np.count_nonzero(a.get()) > self.max_nnz:
            raise NotSuitableError("Matrix too dense for GiMMiK")

        # Generate
        src = generate_mm(a.get(), dtype=a.dtype, platform="cuda", alpha=alpha, beta=beta)

        # Build
        fun = self._build_kernel("gimmik_mm", src, "iPiPi")

        # Determine the grid/block
        block = (128, 1, 1)
        grid = get_grid_for_block(block, b.ncol)

        class MulKernel(ComputeKernel):
            def run(self, queue):
                fun.prepared_async_call(grid, block, queue.cuda_stream_comp, b.ncol, b, b.leaddim, out, out.leaddim)

        return MulKernel()
Example #5
0
File: gimmik.py Project: pv101/PyFR
    def mul(self, a, b, out, alpha=1.0, beta=0.0):
        # Ensure the matrices are compatible
        if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
            raise ValueError('Incompatible matrices for out = a*b')

        # Check that A is constant
        if 'const' not in a.tags:
            raise NotSuitableError('GiMMiK requires a constant a matrix')

        # Check that A is reasonably sparse
        if np.count_nonzero(a.get()) > self.max_nnz:
            raise NotSuitableError('Matrix too dense for GiMMiK')

        # Generate
        src = generate_mm(a.get(), dtype=a.dtype, platform='opencl',
                          alpha=alpha, beta=beta)

        # Build
        fun = self._build_kernel('gimmik_mm', src,
                                 [np.int32] + [np.intp, np.int32]*2)

        class MulKernel(ComputeKernel):
            def run(self, queue):
                fun(queue.cl_queue_comp, (b.ncol,), None, b.ncol,
                    b.data, b.leaddim, out.data, out.leaddim)

        return MulKernel()
Example #6
0
    def mul(self, a, b, out, alpha=1.0, beta=0.0):
        # Ensure the matrices are compatible
        if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
            raise ValueError('Incompatible matrices for out = a*b')

        # Check that A is constant
        if 'const' not in a.tags:
            raise NotSuitableError('GiMMiK requires a constant a matrix')

        # Check that A is reasonably sparse
        if np.count_nonzero(a.get()) > self.max_nnz:
            raise NotSuitableError('Matrix too dense for GiMMiK')

        # Generate the GiMMiK kernel
        src = generate_mm(a.get(),
                          dtype=a.dtype,
                          platform='c-omp',
                          alpha=alpha,
                          beta=beta)
        gimmik_mm = self._build_kernel('gimmik_mm', src,
                                       [np.int32] + [np.intp, np.int32] * 2)

        class MulKernel(ComputeKernel):
            def run(self, queue):
                gimmik_mm(b.ncol, b, b.leaddim, out, out.leaddim)

        return MulKernel()
Example #7
0
def get_mm_kernel(mat, alpha=1., beta=0., tol=1e-15):
    matSrc = generate_mm(filter_tol(mat, tol=tol),
                         dtype=mat.dtype,
                         platform='cuda',
                         alpha=alpha,
                         beta=beta)
    matMod = compiler.SourceModule(matSrc)
    matKern = get_kernel(matMod, "gimmik_mm", 'iPiPi')
    return matKern
Example #8
0
    def mul(self, a, b, out, alpha=1.0, beta=0.0):
        # Ensure the matrices are compatible
        if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
            raise ValueError('Incompatible matrices for out = a*b')

        # Check that A is constant
        if 'const' not in a.tags:
            raise NotSuitableError('GiMMiK requires a constant a matrix')

        # Check that A is reasonably sparse
        if np.count_nonzero(a.get()) > self.max_nnz:
            raise NotSuitableError('Matrix too dense for GiMMiK')

        # Generate the GiMMiK kernel
        src = generate_mm(a.get(),
                          dtype=a.dtype,
                          platform='c',
                          alpha=alpha,
                          beta=beta)
        gimmik_mm = self._build_kernel('gimmik_mm', src,
                                       [np.int32] + [np.intp, np.int32] * 2)
        gimmik_ptr = cast(gimmik_mm, c_void_p).value

        tplargs = {'lib': 'gimmik'}

        # Render our parallel wrapper kernel
        src = self.backend.lookup.get_template('batch-gemm').render(**tplargs)

        # Argument types for par_gimmik
        argt = [np.intp] + [np.int32] * 2 + [np.intp, np.int32] * 2

        # Build
        batch_gemm = self._build_kernel('batch_gemm', src, argt)

        class MulKernel(ComputeKernel):
            def run(self, queue):
                batch_gemm(gimmik_ptr, b.leaddim, b.nblocks, b, b.blocksz, out,
                           out.blocksz)

        return MulKernel()
Example #9
0
    def mul(self, a, b, out, alpha=1.0, beta=0.0):
        # Ensure the matrices are compatible
        if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
            raise ValueError('Incompatible matrices for out = a*b')

        # Check that A is constant
        if 'const' not in a.tags:
            raise NotSuitableError('GiMMiK requires a constant a matrix')

        # Check that A is reasonably sparse
        if np.count_nonzero(a.get()) > self.max_nnz:
            raise NotSuitableError('Matrix too dense for GiMMiK')

        # Generate and compile the GiMMiK function
        src = generate_mm(a.get(),
                          dtype=a.dtype,
                          platform='c',
                          alpha=alpha,
                          beta=beta)
        gimmik_mm = self._build_function('gimmik_mm', src, 'iPiPi')
        gimmik_ptr = cast(gimmik_mm, c_void_p).value

        # Render our parallel wrapper kernel
        src = self.backend.lookup.get_template('batch-gemm').render(
            lib='gimmik')

        # Build
        batch_gemm = self._build_kernel('batch_gemm', src, 'PiiPiPi')
        batch_gemm.set_args(gimmik_ptr, b.leaddim, b.nblocks, b, b.blocksz,
                            out, out.blocksz)

        class MulKernel(Kernel):
            def run(self, queue):
                batch_gemm()

        return MulKernel(mats=[b, out], misc=[gimmik_mm])