Example #1
0
def parse_c_arg(c_arg):
    c_arg = c_arg.replace("const", "").replace("volatile", "")

    # process and remove declarator
    import re
    decl_re = re.compile(r"(\**)\s*([_a-zA-Z0-9]+)(\s*\[[ 0-9]*\])*\s*$")
    decl_match = decl_re.search(c_arg)

    if decl_match is None:
        raise ValueError("couldn't parse C declarator '%s'" % c_arg)

    name = decl_match.group(2)

    if decl_match.group(1) or decl_match.group(3) is not None:
        arg_class = VectorArg
    else:
        arg_class = ScalarArg

    tp = c_arg[:decl_match.start()]
    tp = " ".join(tp.split())

    from pycuda.characterize import platform_bits
    from sys import platform

    if tp == "float": dtype = np.float32
    elif tp == "double": dtype = np.float64
    elif tp == "pycuda::complex<float>": dtype = np.complex64
    elif tp == "pycuda::complex<double>": dtype = np.complex128
    elif tp in ["int", "signed int"]: dtype = np.int32
    elif tp in ["unsigned", "unsigned int"]: dtype = np.uint32
    elif tp in ["long", "long int"]:
        if platform_bits() == 64 and 'win32' not in platform:
            dtype = np.int64
        else:
            dtype = np.int32
    elif tp in ["unsigned long", "unsigned long int"]:
        if platform_bits() == 64 and 'win32' not in platform:
            dtype = np.uint64
        else:
            dtype = np.uint32
    elif tp in ["short", "short int"]:
        dtype = np.int16
    elif tp in ["unsigned short", "unsigned short int"]:
        dtype = np.uint16
    elif tp in ["char", "signed char"]:
        dtype = np.int8
    elif tp in ["unsigned char"]:
        dtype = np.uint8
    elif tp in ["bool"]:
        dtype = np.bool
    else:
        import pycuda.gpuarray as gpuarray
        try:
            return gpuarray.vec._c_name_to_dtype[tp]
        except KeyError:
            raise ValueError("unknown type '%s'" % tp)

    return arg_class(dtype, name)
Example #2
0
def parse_c_arg(c_arg):
    c_arg = c_arg.replace("const", "").replace("volatile", "")

    # process and remove declarator
    import re
    decl_re = re.compile(r"(\**)\s*([_a-zA-Z0-9]+)(\s*\[[ 0-9]*\])*\s*$")
    decl_match = decl_re.search(c_arg)

    if decl_match is None:
        raise ValueError("couldn't parse C declarator '%s'" % c_arg)

    name = decl_match.group(2)

    if decl_match.group(1) or decl_match.group(3) is not None:
        arg_class = VectorArg
    else:
        arg_class = ScalarArg

    tp = c_arg[:decl_match.start()]
    tp = " ".join(tp.split())

    from pycuda.characterize import platform_bits
    from sys import platform

    if tp == "float": dtype = np.float32
    elif tp == "double": dtype = np.float64
    elif tp == "pycuda::complex<float>": dtype = np.complex64
    elif tp == "pycuda::complex<double>": dtype = np.complex128
    elif tp in ["int", "signed int"]: dtype = np.int32
    elif tp in ["unsigned", "unsigned int"]: dtype = np.uint32
    elif tp in ["long", "long int"]:
        if platform_bits() == 64 and 'win32' not in platform:
            dtype = np.int64
        else:
            dtype = np.int32
    elif tp in ["unsigned long", "unsigned long int", "long unsigned int"]:
        if platform_bits() == 64 and 'win32' not in platform:
            dtype = np.uint64
        else:
            dtype = np.uint32
    elif tp in ["short", "short int"]: dtype = np.int16
    elif tp in ["unsigned short", "unsigned short int", "short unsigned int"]:
        dtype = np.uint16
    elif tp in ["char", "signed char"]: dtype = np.int8
    elif tp in ["unsigned char"]: dtype = np.uint8
    elif tp in ["bool"]: dtype = np.bool
    else:
        import pycuda.gpuarray as gpuarray
        try:
            dtype = gpuarray.vec._c_name_to_dtype[tp]
        except KeyError:
            raise ValueError("unknown type '%s'" % tp)

    return arg_class(dtype, name)
Example #3
0
def dtype_to_ctype(dtype, with_fp_tex_hack=False):
    from pycuda.characterize import platform_bits
    from sys import platform

    if dtype is None:
        raise ValueError("dtype may not be None")

    dtype = np.dtype(dtype)
    if dtype == np.int64 and platform_bits() == 64:
        if 'win32' in platform:
            return "long long"
        else:
            return "long"
    elif dtype == np.uint64 and platform_bits() == 64:
        if 'win32' in platform:
            return "unsigned long long"
        else:
            return "unsigned long"
    elif dtype == np.int32:
        return "int"
    elif dtype == np.uint32:
        return "unsigned int"
    elif dtype == np.int16:
        return "short int"
    elif dtype == np.uint16:
        return "short unsigned int"
    elif dtype == np.int8:
        return "signed char"
    elif dtype == np.uint8:
        return "unsigned char"
    elif dtype == np.bool:
        return "bool"
    elif dtype == np.float32:
        if with_fp_tex_hack:
            return "fp_tex_float"
        else:
            return "float"
    elif dtype == np.float64:
        if with_fp_tex_hack:
            return "fp_tex_double"
        else:
            return "double"
    elif dtype == np.complex64:
        return "pycuda::complex<float>"
    elif dtype == np.complex128:
        return "pycuda::complex<double>"
    else:
        import pycuda.gpuarray as gpuarray
        try:
            return gpuarray.vec._dtype_to_c_name[dtype]
        except KeyError:
            raise ValueError, "unable to map dtype '%s'" % dtype
Example #4
0
def dtype_to_ctype(dtype, with_fp_tex_hack=False):
    from pycuda.characterize import platform_bits
    from sys import platform

    if dtype is None:
        raise ValueError("dtype may not be None")

    dtype = np.dtype(dtype)
    if dtype == np.int64 and platform_bits() == 64:
        if 'win32' in platform:
            return "long long"
        else:
            return "long"
    elif dtype == np.uint64 and platform_bits() == 64:
        if 'win32' in platform:
            return "unsigned long long"
        else:
            return "unsigned long"
    elif dtype == np.int32:
        return "int"
    elif dtype == np.uint32:
        return "unsigned int"
    elif dtype == np.int16:
        return "short int"
    elif dtype == np.uint16:
        return "short unsigned int"
    elif dtype == np.int8:
        return "signed char"
    elif dtype == np.uint8:
        return "unsigned char"
    elif dtype == np.bool:
        return "bool"
    elif dtype == np.float32:
        if with_fp_tex_hack:
            return "fp_tex_float"
        else:
            return "float"
    elif dtype == np.float64:
        if with_fp_tex_hack:
            return "fp_tex_double"
        else:
            return "double"
    elif dtype == np.complex64:
        return "pycuda::complex<float>"
    elif dtype == np.complex128:
        return "pycuda::complex<double>"
    else:
        import pycuda.gpuarray as gpuarray
        try:
            return gpuarray.vec._dtype_to_c_name[dtype]
        except KeyError:
            raise ValueError, "unable to map dtype '%s'" % dtype
Example #5
0
def compile_plain(source, options, keep, nvcc, cache_dir):
    from os.path import join

    if cache_dir:
        checksum = _new_md5()

        checksum.update(source)
        for option in options: 
            checksum.update(option)
        checksum.update(get_nvcc_version(nvcc))
        from pycuda.characterize import platform_bits
        checksum.update(str(platform_bits()))

        cache_file = checksum.hexdigest()
        cache_path = join(cache_dir, cache_file + ".cubin")

        try:
            return open(cache_path, "rb").read()
        except:
            pass

    from tempfile import mkdtemp
    file_dir = mkdtemp()
    file_root = "kernel"

    cu_file_name = file_root + ".cu"
    cu_file_path = join(file_dir, cu_file_name)

    outf = open(cu_file_path, "w")
    outf.write(str(source))
    outf.close()

    if keep:
        options = options[:]
        options.append("--keep")

        print "*** compiler output in %s" % file_dir

    cmdline = [nvcc, "--cubin"] + options + [cu_file_name]
    try:
        from pytools.prefork import call_capture_output
    except ImportError:
        from pytools.prefork import call
        try:
            result = call(cmdline, cwd=file_dir)
        except OSError, e:
            raise OSError("%s was not found (is it on the PATH?) [%s]" 
                    % (nvcc, str(e)))

        stdout = None
        stderr = None
Example #6
0
def _create_vector_types():
    name_to_dtype = {}
    dtype_to_name = {}

    from pycuda.characterize import platform_bits
    if platform_bits() == 32:
        long_dtype = np.int32
        ulong_dtype = np.uint32
    else:
        long_dtype = np.int64
        ulong_dtype = np.uint64

    field_names = ["x", "y", "z", "w"]

    for base_name, base_type, counts in [
        ('char', np.int8, [1, 2, 3, 4]),
        ('uchar', np.uint8, [1, 2, 3, 4]),
        ('short', np.int16, [1, 2, 3, 4]),
        ('ushort', np.uint16, [1, 2, 3, 4]),
        ('int', np.uint32, [1, 2, 3, 4]),
        ('uint', np.uint32, [1, 2, 3, 4]),
        ('long', long_dtype, [1, 2, 3, 4]),
        ('ulong', ulong_dtype, [1, 2, 3, 4]),
        ('longlong', np.int64, [1, 2]),
        ('ulonglong', np.uint64, [1, 2]),
        ('float', np.float32, [1, 2, 3, 4]),
        ('ulonglong', np.float64, [1, 2]),
    ]:
        for count in counts:
            name = "%s%d" % (base_name, count)
            dtype = np.dtype([(field_names[i], base_type)
                              for i in range(count)])

            name_to_dtype[name] = dtype
            dtype_to_name[dtype] = name

            setattr(vec, name, dtype)

            my_field_names = ",".join(field_names[:count])
            setattr(
                vec, "make_" + name,
                staticmethod(
                    eval(
                        "lambda %s: array((%s), dtype=my_dtype)" %
                        (my_field_names, my_field_names),
                        dict(array=np.array, my_dtype=dtype))))

    vec._dtype_to_c_name = dtype_to_name
    vec._c_name_to_dtype = name_to_dtype
Example #7
0
def _create_vector_types():
    name_to_dtype = {}
    dtype_to_name = {}

    from pycuda.characterize import platform_bits
    if platform_bits() == 32:
        long_dtype = np.int32
        ulong_dtype = np.uint32
    else:
        long_dtype = np.int64
        ulong_dtype = np.uint64

    field_names = ["x", "y", "z", "w"]

    for base_name, base_type, counts in [
        ('char', np.int8, [1,2,3,4]),
        ('uchar', np.uint8, [1,2,3,4]),
        ('short', np.int16, [1,2,3,4]),
        ('ushort', np.uint16, [1,2,3,4]),
        ('int', np.int32, [1,2,3,4]),
        ('uint', np.uint32, [1,2,3,4]),
        ('long', long_dtype, [1,2,3,4]),
        ('ulong', ulong_dtype, [1,2,3,4]),
        ('longlong', np.int64, [1,2]),
        ('ulonglong', np.uint64, [1,2]),
        ('float', np.float32, [1,2,3,4]),
        ('ulonglong', np.float64, [1,2]),
        ]:
        for count in counts:
            name = "%s%d" % (base_name, count)
            dtype = np.dtype([
                (field_names[i], base_type)
                for i in range(count)])

            name_to_dtype[name] = dtype
            dtype_to_name[dtype] = name

            setattr(vec, name, dtype)

            my_field_names = ",".join(field_names[:count])
            setattr(vec, "make_"+name, 
                    staticmethod(eval(
                        "lambda %s: array((%s), dtype=my_dtype)"
                        % (my_field_names, my_field_names),
                        dict(array=np.array, my_dtype=dtype))))

    vec._dtype_to_c_name = dtype_to_name
    vec._c_name_to_dtype = name_to_dtype
Example #8
0
def _create_vector_types():
    from pycuda.characterize import platform_bits

    if platform_bits() == 32:
        long_dtype = np.int32
        ulong_dtype = np.uint32
    else:
        long_dtype = np.int64
        ulong_dtype = np.uint64

    field_names = ["x", "y", "z", "w"]

    from pycuda.tools import register_dtype

    for base_name, base_type, counts in [
        ("char", np.int8, [1, 2, 3, 4]),
        ("uchar", np.uint8, [1, 2, 3, 4]),
        ("short", np.int16, [1, 2, 3, 4]),
        ("ushort", np.uint16, [1, 2, 3, 4]),
        ("int", np.int32, [1, 2, 3, 4]),
        ("uint", np.uint32, [1, 2, 3, 4]),
        ("long", long_dtype, [1, 2, 3, 4]),
        ("ulong", ulong_dtype, [1, 2, 3, 4]),
        ("longlong", np.int64, [1, 2]),
        ("ulonglong", np.uint64, [1, 2]),
        ("float", np.float32, [1, 2, 3, 4]),
        ("double", np.float64, [1, 2]),
    ]:
        for count in counts:
            name = "%s%d" % (base_name, count)
            dtype = np.dtype([(field_names[i], base_type) for i in range(count)])

            register_dtype(dtype, name, alias_ok=True)

            setattr(vec, name, dtype)

            my_field_names = ",".join(field_names[:count])
            setattr(
                vec,
                "make_" + name,
                staticmethod(
                    eval(
                        "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names, my_field_names),
                        dict(array=np.array, my_dtype=dtype),
                    )
                ),
            )
Example #9
0
def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"):
    from os.path import join

    assert target in ["cubin", "ptx", "fatbin"]

    if cache_dir:
        checksum = _new_md5()

        if '#include' in source:
            checksum.update(
                preprocess_source(source, options, nvcc).encode("utf-8"))
        else:
            checksum.update(source.encode("utf-8"))

        for option in options:
            checksum.update(option.encode("utf-8"))
        checksum.update(get_nvcc_version(nvcc).encode("utf-8"))
        from pycuda.characterize import platform_bits
        checksum.update(str(platform_bits()).encode("utf-8"))

        cache_file = checksum.hexdigest()
        cache_path = join(cache_dir, cache_file + "." + target)

        try:
            cache_file = open(cache_path, "rb")
            try:
                return cache_file.read()
            finally:
                cache_file.close()

        except:
            pass

    from tempfile import mkdtemp
    file_dir = mkdtemp()
    file_root = "kernel"

    cu_file_name = file_root + ".cu"
    cu_file_path = join(file_dir, cu_file_name)

    outf = open(cu_file_path, "w")
    outf.write(str(source))
    outf.close()

    if keep:
        options = options[:]
        options.append("--keep")

        print("*** compiler output in %s" % file_dir)

    cmdline = [nvcc, "--" + target] + options + [cu_file_name]
    result, stdout, stderr = call_capture_output(cmdline,
                                                 cwd=file_dir,
                                                 error_on_nonzero=False)

    try:
        result_f = open(join(file_dir, file_root + "." + target), "rb")
    except IOError:
        no_output = True
    else:
        no_output = False

    if result != 0 or (no_output and (stdout or stderr)):
        if result == 0:
            from warnings import warn
            warn("PyCUDA: nvcc exited with status 0, but appears to have "
                 "encountered an error")
        from pycuda.driver import CompileError
        raise CompileError("nvcc compilation of %s failed" % cu_file_path,
                           cmdline,
                           stdout=stdout.decode("utf-8", "replace"),
                           stderr=stderr.decode("utf-8", "replace"))

    if stdout or stderr:
        lcase_err_text = (stdout + stderr).decode("utf-8", "replace").lower()
        from warnings import warn
        if "demoted" in lcase_err_text or "demoting" in lcase_err_text:
            warn(
                "nvcc said it demoted types in source code it "
                "compiled--this is likely not what you want.",
                stacklevel=4)
        warn("The CUDA compiler succeeded, but said the following:\n" +
             (stdout + stderr).decode("utf-8", "replace"),
             stacklevel=4)

    result_data = result_f.read()
    result_f.close()

    if cache_dir:
        outf = open(cache_path, "wb")
        outf.write(result_data)
        outf.close()

    if not keep:
        from os import listdir, unlink, rmdir
        for name in listdir(file_dir):
            unlink(join(file_dir, name))
        rmdir(file_dir)

    return result_data
Example #10
0
def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"):
    from os.path import join

    assert target in ["cubin", "ptx", "fatbin"]

    if cache_dir:
        checksum = _new_md5()

        if '#include' in source:
            checksum.update(preprocess_source(source, options, nvcc).encode("utf-8"))
        else:
            checksum.update(source.encode("utf-8"))

        for option in options:
            checksum.update(option.encode("utf-8"))
        checksum.update(get_nvcc_version(nvcc).encode("utf-8"))
        from pycuda.characterize import platform_bits
        checksum.update(str(platform_bits()).encode("utf-8"))

        cache_file = checksum.hexdigest()
        cache_path = join(cache_dir, cache_file + "." + target)

        try:
            cache_file = open(cache_path, "rb")
            try:
                return cache_file.read()
            finally:
                cache_file.close()

        except:
            pass

    from tempfile import mkdtemp
    file_dir = mkdtemp()
    file_root = "kernel"

    cu_file_name = file_root + ".cu"
    cu_file_path = join(file_dir, cu_file_name)

    outf = open(cu_file_path, "w")
    outf.write(str(source))
    outf.close()

    if keep:
        options = options[:]
        options.append("--keep")

        print("*** compiler output in %s" % file_dir)

    cmdline = [nvcc, "--" + target] + options + [cu_file_name]
    result, stdout, stderr = call_capture_output(cmdline,
            cwd=file_dir, error_on_nonzero=False)

    try:
        result_f = open(join(file_dir, file_root + "." + target), "rb")
    except IOError:
        no_output = True
    else:
        no_output = False

    if result != 0 or (no_output and (stdout or stderr)):
        if result == 0:
            from warnings import warn
            warn("PyCUDA: nvcc exited with status 0, but appears to have "
                    "encountered an error")
        from pycuda.driver import CompileError
        raise CompileError("nvcc compilation of %s failed" % cu_file_path,
                cmdline, stdout=stdout.decode("utf-8", "replace"),
                stderr=stderr.decode("utf-8", "replace"))

    if stdout or stderr:
        lcase_err_text = (stdout+stderr).decode("utf-8", "replace").lower()
        from warnings import warn
        if "demoted" in lcase_err_text or "demoting" in lcase_err_text:
            warn("nvcc said it demoted types in source code it "
                "compiled--this is likely not what you want.",
                stacklevel=4)
        warn("The CUDA compiler succeeded, but said the following:\n"
                + (stdout+stderr).decode("utf-8", "replace"), stacklevel=4)

    result_data = result_f.read()
    result_f.close()

    if cache_dir:
        outf = open(cache_path, "wb")
        outf.write(result_data)
        outf.close()

    if not keep:
        from os import listdir, unlink, rmdir
        for name in listdir(file_dir):
            unlink(join(file_dir, name))
        rmdir(file_dir)

    return result_data