def get_elwise_kernel_and_types(arguments, operation, name="kernel", keep=False, options=None, use_range=False, **kwargs): if isinstance(arguments, str): from pycuda.tools import parse_c_arg arguments = [parse_c_arg(arg) for arg in arguments.split(",")] if use_range: arguments.extend([ ScalarArg(np.intp, "start"), ScalarArg(np.intp, "stop"), ScalarArg(np.intp, "step"), ]) else: arguments.append(ScalarArg(np.uintp, "n")) if use_range: module_builder = get_elwise_range_module else: module_builder = get_elwise_module mod = module_builder(arguments, operation, name, keep, options, **kwargs) func = mod.get_function(name) func.prepare("".join(arg.struct_char for arg in arguments)) return func, arguments
def get_elwise_kernel_and_types(arguments, operation, name="kernel", keep=False, options=None, use_range=False, **kwargs): if isinstance(arguments, str): from pycuda.tools import parse_c_arg arguments = [parse_c_arg(arg) for arg in arguments.split(",")] if use_range: arguments.extend([ ScalarArg(np.intp, "start"), ScalarArg(np.intp, "stop"), ScalarArg(np.intp, "step"), ]) else: arguments.append(ScalarArg(np.uintp, "n")) if use_range: module_builder = get_elwise_range_module else: module_builder = get_elwise_module mod = module_builder(arguments, operation, name, keep, options, **kwargs) from pycuda.tools import get_arg_type func = mod.get_function(name) func.prepare("".join(arg.struct_char for arg in arguments)) return func, arguments
def get_elwise_kernel_and_types(arguments, operation, name="kernel", keep=False, options=[], **kwargs): if isinstance(arguments, str): from pycuda.tools import parse_c_arg arguments = [parse_c_arg(arg) for arg in arguments.split(",")] arguments.append(ScalarArg(numpy.uintp, "n")) mod = get_elwise_module(arguments, operation, name, keep, options, **kwargs) from pycuda.tools import get_arg_type func = mod.get_function(name) func.prepare("".join(arg.struct_char for arg in arguments), (1,1,1)) return func, arguments
def get_elwise_kernel_and_types(arguments, operation, name="kernel", keep=False, options=[], **kwargs): if isinstance(arguments, str): from pycuda.tools import parse_c_arg arguments = [parse_c_arg(arg) for arg in arguments.split(",")] arguments.append(ScalarArg(numpy.uintp, "n")) mod = get_elwise_module(arguments, operation, name, keep, options, **kwargs) from pycuda.tools import get_arg_type func = mod.get_function(name) func.prepare("".join(arg.struct_char for arg in arguments), (1, 1, 1)) return func, arguments
def map_expr_to_kernel(expr, name = 'kernel', dtype = 'float', bc = 'NONPBC'): """ <example>: expr = 'A[i,j] = b[i,].W[,j] + C[i,j]' """ if dtype not in ['float', 'bool', 'double', 'int', 'long', 'usigned int', 'usigned long']: raise ValueError('dtype only support float, bool, double, int, long, usigned int, usigned long') if bc not in ['PBC', 'NONPBC']: raise ValueError('bc only support PBC, NONPBC') if name == '': raise ValueError('require a non empty name') import re # sub X[i,].Y[,j] to contraction(X, i, 1, Y, j, 0) c_expr = re.sub(r'(\w+)\[(\S+),\]\s*\.\s*(\w+)\[,(\S+)\]', \ r'_contraction(_\1, \2, 1, _\3, \4, 0)', expr) # sub X[,j].Y[i,] to transvection(X, j, 0, Y, i, 1) c_expr = re.sub(r'(\w+)\[,(\S+)\]\s*\.\s*(\w+)\[(\S+),\]', \ r'_contraction(_\1, \2, 0, _\3, \4, 1)', c_expr) # sub X[].Y[,j] to contraction(X, 0, 1, Y, j, 0) c_expr = re.sub(r'(\w+)\[(\s*)\]\s*\.\s*(\w+)\[,(\S+)\]', \ r'_contraction(_\1, 0, 1, _\3, \4, 0)', c_expr) # sub X[,j].Y[] to contraction(X, j, 0, Y, 0, 1) c_expr = re.sub(r'(\w+)\[,(\S+)\]\s*\.\s*(\w+)\[(\s*)\]', \ r'_contraction(_\1, \2, 0, _\3, 0, 1)', c_expr) # sub X[i,].Y[] to contraction(X, i, 1, Y, 0, 1) c_expr = re.sub(r'(\w+)\[(\S+),\]\s*\.\s*(\w+)\[(\s*)\]', \ r'_contraction(_\1, \2, 1, _\3, 0, 1)', c_expr) # sub X[].Y[i,] to contraction(X, 0, 1, Y, i, 1) c_expr = re.sub(r'(\w+)\[(\s*)\]\s*\.\s*(\w+)\[(\S+),\]', \ r'_contraction(_\1, 0, 1, _\3, \4, 1)', c_expr) # sub X[].Y[] to contraction(X, 0, 1, Y, 0, 1) c_expr = re.sub(r'(\w+)\[(\s*)\]\s*\.\s*(\w+)\[(\s*)\]', \ r'_contraction(_\1, 0, 1, _\3, 0, 1, \1_col)', c_expr) # sub X[i,].Y[j,] to contraction(X, i, 1, Y, j, 1) c_expr = re.sub(r'(\w+)\[(\S+),\]\s*\.\s*(\w+)\[(\S+),\]', \ r'_contraction(_\1, \2, 1, _\3, \4, 1)', c_expr) # sub X[,i].Y[,j] to contraction(X, i, 0, Y, j, 0) c_expr = re.sub(r'(\w+)\[,(\S+)\]\s*\.\s*(\w+)\[,(\S+)\]', \ r'_contraction(_\1, \2, 0, _\3, \4, 0)', c_expr) # sub X[i,@] to sum(X, i, 1) c_expr = re.sub(r'(\w+)\[([^\,|\]|\@]),\@\]', r'_sum(_\1, \2, 1)', c_expr) # sub X[@,j] to sum(X, j, 0) c_expr = re.sub(r'(\w+)\[\@,([^\,|\]|\@])\]', r'_sum(_\1, \2, 0)', c_expr) # sub X[@,@] to sum(X, 0, 2) c_expr = re.sub(r'(\w+)\[\@,\@\]', r'_sum(_\1, 0, 2)', c_expr) # sub X[i,j] to X.elem(i,j) X[i] to X.elem(0,i) X[j] to X.elem(0,j) c_expr = re.sub(r'(\w+)\[([^\,|\]])\]', r'_\1.elem(0,\2)', c_expr) c_expr = re.sub(r'(\w+)\[([^\,]+),([^\]]+)\]', r'_\1.elem(\2,\3)', c_expr) # sub E += value to atomicAdd(&E[0], value) #c_expr = re.sub(r'(\w+)\s*\+\=(.+)', r'atomicAdd(&(_\1.elem(0,0)), \2)', c_expr) c_expr = re.sub(r'(\w+)\s*\+\=(.+)', r'atomicAdd(&\1[n], \2)', c_expr) c_expr = re.sub(r'(\w+)\s*\=(.+)', r'_\1.elem(0,0) = \2', c_expr) right_expr = re.sub(r'.+\=', '', expr) # get all array like variables tmp = re.findall(r'^(\w+)', expr) tmp += re.findall(r'(\w+)\[', right_expr) array_list = list(set(tmp)) array_list.sort(key = tmp.index) arrays = ", ".join([dtype + '* ' + arr for arr in array_list]) # get all scalar like variables scalar_list = re.sub(r'\w+\s*\[\S+,\S+\]|\w+\[\S+\]', '$', right_expr) scalar_list = re.sub(r'(\w+)\s*\(', '$', scalar_list) tmp = re.findall(r'[A-Za-z]+', scalar_list) scalar_list = list(set(tmp)) scalar_list.sort(key = tmp.index) scalars = ", ".join([dtype + ' ' + s for s in scalar_list]) # get variable shape shape = ",\n".join(['int ' + arr + '_col' + ', ' + \ 'int ' + arr + '_row' for arr in array_list]) # get working boundary of the i and j boundary = "\nint min_n, int max_n, int min_i, int max_i, int min_j, int max_j" statement = "\n\t\t".join(['__array__ _%s(%s, n, %s_col, %s_row);' \ % (arr, arr, arr, arr) for arr in array_list]) # combing them together if len(scalars) != 0: arguments = arrays + ', ' + scalars + ',\n' + shape + ', ' + boundary else: arguments = arrays + ',\n' + shape + ', ' + boundary mod, c_source = get_cuda_module(dtype, statement, arguments, c_expr, name, bc) func_kernel = mod.get_function(name) if isinstance(arguments, str): from pycuda.tools import parse_c_arg arguments = [parse_c_arg(arg) for arg in arguments.split(",")] func_kernel.prepare("".join(arg.struct_char for arg in arguments)) return func_kernel, c_source, arguments