def parse_c_arg(c_arg): c_arg = c_arg.replace("const", "").replace("volatile", "") # process and remove declarator import re decl_re = re.compile(r"(\**)\s*([_a-zA-Z0-9]+)(\s*\[[ 0-9]*\])*\s*$") decl_match = decl_re.search(c_arg) if decl_match is None: raise ValueError("couldn't parse C declarator '%s'" % c_arg) name = decl_match.group(2) if decl_match.group(1) or decl_match.group(3) is not None: arg_class = VectorArg else: arg_class = ScalarArg tp = c_arg[:decl_match.start()] tp = " ".join(tp.split()) from pycuda.characterize import platform_bits from sys import platform if tp == "float": dtype = np.float32 elif tp == "double": dtype = np.float64 elif tp == "pycuda::complex<float>": dtype = np.complex64 elif tp == "pycuda::complex<double>": dtype = np.complex128 elif tp in ["int", "signed int"]: dtype = np.int32 elif tp in ["unsigned", "unsigned int"]: dtype = np.uint32 elif tp in ["long", "long int"]: if platform_bits() == 64 and 'win32' not in platform: dtype = np.int64 else: dtype = np.int32 elif tp in ["unsigned long", "unsigned long int"]: if platform_bits() == 64 and 'win32' not in platform: dtype = np.uint64 else: dtype = np.uint32 elif tp in ["short", "short int"]: dtype = np.int16 elif tp in ["unsigned short", "unsigned short int"]: dtype = np.uint16 elif tp in ["char", "signed char"]: dtype = np.int8 elif tp in ["unsigned char"]: dtype = np.uint8 elif tp in ["bool"]: dtype = np.bool else: import pycuda.gpuarray as gpuarray try: return gpuarray.vec._c_name_to_dtype[tp] except KeyError: raise ValueError("unknown type '%s'" % tp) return arg_class(dtype, name)
def parse_c_arg(c_arg): c_arg = c_arg.replace("const", "").replace("volatile", "") # process and remove declarator import re decl_re = re.compile(r"(\**)\s*([_a-zA-Z0-9]+)(\s*\[[ 0-9]*\])*\s*$") decl_match = decl_re.search(c_arg) if decl_match is None: raise ValueError("couldn't parse C declarator '%s'" % c_arg) name = decl_match.group(2) if decl_match.group(1) or decl_match.group(3) is not None: arg_class = VectorArg else: arg_class = ScalarArg tp = c_arg[:decl_match.start()] tp = " ".join(tp.split()) from pycuda.characterize import platform_bits from sys import platform if tp == "float": dtype = np.float32 elif tp == "double": dtype = np.float64 elif tp == "pycuda::complex<float>": dtype = np.complex64 elif tp == "pycuda::complex<double>": dtype = np.complex128 elif tp in ["int", "signed int"]: dtype = np.int32 elif tp in ["unsigned", "unsigned int"]: dtype = np.uint32 elif tp in ["long", "long int"]: if platform_bits() == 64 and 'win32' not in platform: dtype = np.int64 else: dtype = np.int32 elif tp in ["unsigned long", "unsigned long int", "long unsigned int"]: if platform_bits() == 64 and 'win32' not in platform: dtype = np.uint64 else: dtype = np.uint32 elif tp in ["short", "short int"]: dtype = np.int16 elif tp in ["unsigned short", "unsigned short int", "short unsigned int"]: dtype = np.uint16 elif tp in ["char", "signed char"]: dtype = np.int8 elif tp in ["unsigned char"]: dtype = np.uint8 elif tp in ["bool"]: dtype = np.bool else: import pycuda.gpuarray as gpuarray try: dtype = gpuarray.vec._c_name_to_dtype[tp] except KeyError: raise ValueError("unknown type '%s'" % tp) return arg_class(dtype, name)
def dtype_to_ctype(dtype, with_fp_tex_hack=False): from pycuda.characterize import platform_bits from sys import platform if dtype is None: raise ValueError("dtype may not be None") dtype = np.dtype(dtype) if dtype == np.int64 and platform_bits() == 64: if 'win32' in platform: return "long long" else: return "long" elif dtype == np.uint64 and platform_bits() == 64: if 'win32' in platform: return "unsigned long long" else: return "unsigned long" elif dtype == np.int32: return "int" elif dtype == np.uint32: return "unsigned int" elif dtype == np.int16: return "short int" elif dtype == np.uint16: return "short unsigned int" elif dtype == np.int8: return "signed char" elif dtype == np.uint8: return "unsigned char" elif dtype == np.bool: return "bool" elif dtype == np.float32: if with_fp_tex_hack: return "fp_tex_float" else: return "float" elif dtype == np.float64: if with_fp_tex_hack: return "fp_tex_double" else: return "double" elif dtype == np.complex64: return "pycuda::complex<float>" elif dtype == np.complex128: return "pycuda::complex<double>" else: import pycuda.gpuarray as gpuarray try: return gpuarray.vec._dtype_to_c_name[dtype] except KeyError: raise ValueError, "unable to map dtype '%s'" % dtype
def compile_plain(source, options, keep, nvcc, cache_dir): from os.path import join if cache_dir: checksum = _new_md5() checksum.update(source) for option in options: checksum.update(option) checksum.update(get_nvcc_version(nvcc)) from pycuda.characterize import platform_bits checksum.update(str(platform_bits())) cache_file = checksum.hexdigest() cache_path = join(cache_dir, cache_file + ".cubin") try: return open(cache_path, "rb").read() except: pass from tempfile import mkdtemp file_dir = mkdtemp() file_root = "kernel" cu_file_name = file_root + ".cu" cu_file_path = join(file_dir, cu_file_name) outf = open(cu_file_path, "w") outf.write(str(source)) outf.close() if keep: options = options[:] options.append("--keep") print "*** compiler output in %s" % file_dir cmdline = [nvcc, "--cubin"] + options + [cu_file_name] try: from pytools.prefork import call_capture_output except ImportError: from pytools.prefork import call try: result = call(cmdline, cwd=file_dir) except OSError, e: raise OSError("%s was not found (is it on the PATH?) [%s]" % (nvcc, str(e))) stdout = None stderr = None
def _create_vector_types(): name_to_dtype = {} dtype_to_name = {} from pycuda.characterize import platform_bits if platform_bits() == 32: long_dtype = np.int32 ulong_dtype = np.uint32 else: long_dtype = np.int64 ulong_dtype = np.uint64 field_names = ["x", "y", "z", "w"] for base_name, base_type, counts in [ ('char', np.int8, [1, 2, 3, 4]), ('uchar', np.uint8, [1, 2, 3, 4]), ('short', np.int16, [1, 2, 3, 4]), ('ushort', np.uint16, [1, 2, 3, 4]), ('int', np.uint32, [1, 2, 3, 4]), ('uint', np.uint32, [1, 2, 3, 4]), ('long', long_dtype, [1, 2, 3, 4]), ('ulong', ulong_dtype, [1, 2, 3, 4]), ('longlong', np.int64, [1, 2]), ('ulonglong', np.uint64, [1, 2]), ('float', np.float32, [1, 2, 3, 4]), ('ulonglong', np.float64, [1, 2]), ]: for count in counts: name = "%s%d" % (base_name, count) dtype = np.dtype([(field_names[i], base_type) for i in range(count)]) name_to_dtype[name] = dtype dtype_to_name[dtype] = name setattr(vec, name, dtype) my_field_names = ",".join(field_names[:count]) setattr( vec, "make_" + name, staticmethod( eval( "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names, my_field_names), dict(array=np.array, my_dtype=dtype)))) vec._dtype_to_c_name = dtype_to_name vec._c_name_to_dtype = name_to_dtype
def _create_vector_types(): name_to_dtype = {} dtype_to_name = {} from pycuda.characterize import platform_bits if platform_bits() == 32: long_dtype = np.int32 ulong_dtype = np.uint32 else: long_dtype = np.int64 ulong_dtype = np.uint64 field_names = ["x", "y", "z", "w"] for base_name, base_type, counts in [ ('char', np.int8, [1,2,3,4]), ('uchar', np.uint8, [1,2,3,4]), ('short', np.int16, [1,2,3,4]), ('ushort', np.uint16, [1,2,3,4]), ('int', np.int32, [1,2,3,4]), ('uint', np.uint32, [1,2,3,4]), ('long', long_dtype, [1,2,3,4]), ('ulong', ulong_dtype, [1,2,3,4]), ('longlong', np.int64, [1,2]), ('ulonglong', np.uint64, [1,2]), ('float', np.float32, [1,2,3,4]), ('ulonglong', np.float64, [1,2]), ]: for count in counts: name = "%s%d" % (base_name, count) dtype = np.dtype([ (field_names[i], base_type) for i in range(count)]) name_to_dtype[name] = dtype dtype_to_name[dtype] = name setattr(vec, name, dtype) my_field_names = ",".join(field_names[:count]) setattr(vec, "make_"+name, staticmethod(eval( "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names, my_field_names), dict(array=np.array, my_dtype=dtype)))) vec._dtype_to_c_name = dtype_to_name vec._c_name_to_dtype = name_to_dtype
def _create_vector_types(): from pycuda.characterize import platform_bits if platform_bits() == 32: long_dtype = np.int32 ulong_dtype = np.uint32 else: long_dtype = np.int64 ulong_dtype = np.uint64 field_names = ["x", "y", "z", "w"] from pycuda.tools import register_dtype for base_name, base_type, counts in [ ("char", np.int8, [1, 2, 3, 4]), ("uchar", np.uint8, [1, 2, 3, 4]), ("short", np.int16, [1, 2, 3, 4]), ("ushort", np.uint16, [1, 2, 3, 4]), ("int", np.int32, [1, 2, 3, 4]), ("uint", np.uint32, [1, 2, 3, 4]), ("long", long_dtype, [1, 2, 3, 4]), ("ulong", ulong_dtype, [1, 2, 3, 4]), ("longlong", np.int64, [1, 2]), ("ulonglong", np.uint64, [1, 2]), ("float", np.float32, [1, 2, 3, 4]), ("double", np.float64, [1, 2]), ]: for count in counts: name = "%s%d" % (base_name, count) dtype = np.dtype([(field_names[i], base_type) for i in range(count)]) register_dtype(dtype, name, alias_ok=True) setattr(vec, name, dtype) my_field_names = ",".join(field_names[:count]) setattr( vec, "make_" + name, staticmethod( eval( "lambda %s: array((%s), dtype=my_dtype)" % (my_field_names, my_field_names), dict(array=np.array, my_dtype=dtype), ) ), )
def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"): from os.path import join assert target in ["cubin", "ptx", "fatbin"] if cache_dir: checksum = _new_md5() if '#include' in source: checksum.update( preprocess_source(source, options, nvcc).encode("utf-8")) else: checksum.update(source.encode("utf-8")) for option in options: checksum.update(option.encode("utf-8")) checksum.update(get_nvcc_version(nvcc).encode("utf-8")) from pycuda.characterize import platform_bits checksum.update(str(platform_bits()).encode("utf-8")) cache_file = checksum.hexdigest() cache_path = join(cache_dir, cache_file + "." + target) try: cache_file = open(cache_path, "rb") try: return cache_file.read() finally: cache_file.close() except: pass from tempfile import mkdtemp file_dir = mkdtemp() file_root = "kernel" cu_file_name = file_root + ".cu" cu_file_path = join(file_dir, cu_file_name) outf = open(cu_file_path, "w") outf.write(str(source)) outf.close() if keep: options = options[:] options.append("--keep") print("*** compiler output in %s" % file_dir) cmdline = [nvcc, "--" + target] + options + [cu_file_name] result, stdout, stderr = call_capture_output(cmdline, cwd=file_dir, error_on_nonzero=False) try: result_f = open(join(file_dir, file_root + "." + target), "rb") except IOError: no_output = True else: no_output = False if result != 0 or (no_output and (stdout or stderr)): if result == 0: from warnings import warn warn("PyCUDA: nvcc exited with status 0, but appears to have " "encountered an error") from pycuda.driver import CompileError raise CompileError("nvcc compilation of %s failed" % cu_file_path, cmdline, stdout=stdout.decode("utf-8", "replace"), stderr=stderr.decode("utf-8", "replace")) if stdout or stderr: lcase_err_text = (stdout + stderr).decode("utf-8", "replace").lower() from warnings import warn if "demoted" in lcase_err_text or "demoting" in lcase_err_text: warn( "nvcc said it demoted types in source code it " "compiled--this is likely not what you want.", stacklevel=4) warn("The CUDA compiler succeeded, but said the following:\n" + (stdout + stderr).decode("utf-8", "replace"), stacklevel=4) result_data = result_f.read() result_f.close() if cache_dir: outf = open(cache_path, "wb") outf.write(result_data) outf.close() if not keep: from os import listdir, unlink, rmdir for name in listdir(file_dir): unlink(join(file_dir, name)) rmdir(file_dir) return result_data
def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"): from os.path import join assert target in ["cubin", "ptx", "fatbin"] if cache_dir: checksum = _new_md5() if '#include' in source: checksum.update(preprocess_source(source, options, nvcc).encode("utf-8")) else: checksum.update(source.encode("utf-8")) for option in options: checksum.update(option.encode("utf-8")) checksum.update(get_nvcc_version(nvcc).encode("utf-8")) from pycuda.characterize import platform_bits checksum.update(str(platform_bits()).encode("utf-8")) cache_file = checksum.hexdigest() cache_path = join(cache_dir, cache_file + "." + target) try: cache_file = open(cache_path, "rb") try: return cache_file.read() finally: cache_file.close() except: pass from tempfile import mkdtemp file_dir = mkdtemp() file_root = "kernel" cu_file_name = file_root + ".cu" cu_file_path = join(file_dir, cu_file_name) outf = open(cu_file_path, "w") outf.write(str(source)) outf.close() if keep: options = options[:] options.append("--keep") print("*** compiler output in %s" % file_dir) cmdline = [nvcc, "--" + target] + options + [cu_file_name] result, stdout, stderr = call_capture_output(cmdline, cwd=file_dir, error_on_nonzero=False) try: result_f = open(join(file_dir, file_root + "." + target), "rb") except IOError: no_output = True else: no_output = False if result != 0 or (no_output and (stdout or stderr)): if result == 0: from warnings import warn warn("PyCUDA: nvcc exited with status 0, but appears to have " "encountered an error") from pycuda.driver import CompileError raise CompileError("nvcc compilation of %s failed" % cu_file_path, cmdline, stdout=stdout.decode("utf-8", "replace"), stderr=stderr.decode("utf-8", "replace")) if stdout or stderr: lcase_err_text = (stdout+stderr).decode("utf-8", "replace").lower() from warnings import warn if "demoted" in lcase_err_text or "demoting" in lcase_err_text: warn("nvcc said it demoted types in source code it " "compiled--this is likely not what you want.", stacklevel=4) warn("The CUDA compiler succeeded, but said the following:\n" + (stdout+stderr).decode("utf-8", "replace"), stacklevel=4) result_data = result_f.read() result_f.close() if cache_dir: outf = open(cache_path, "wb") outf.write(result_data) outf.close() if not keep: from os import listdir, unlink, rmdir for name in listdir(file_dir): unlink(join(file_dir, name)) rmdir(file_dir) return result_data