Ejemplo n.º 1
0
def get_elwise_kernel_and_types(arguments,
                                operation,
                                name="kernel",
                                keep=False,
                                options=None,
                                use_range=False,
                                **kwargs):
    if isinstance(arguments, str):
        from pycuda.tools import parse_c_arg
        arguments = [parse_c_arg(arg) for arg in arguments.split(",")]

    if use_range:
        arguments.extend([
            ScalarArg(np.intp, "start"),
            ScalarArg(np.intp, "stop"),
            ScalarArg(np.intp, "step"),
        ])
    else:
        arguments.append(ScalarArg(np.uintp, "n"))

    if use_range:
        module_builder = get_elwise_range_module
    else:
        module_builder = get_elwise_module

    mod = module_builder(arguments, operation, name, keep, options, **kwargs)

    func = mod.get_function(name)
    func.prepare("".join(arg.struct_char for arg in arguments))

    return func, arguments
Ejemplo n.º 2
0
def get_elwise_kernel_and_types(arguments, operation,
        name="kernel", keep=False, options=None, use_range=False, **kwargs):
    if isinstance(arguments, str):
        from pycuda.tools import parse_c_arg
        arguments = [parse_c_arg(arg) for arg in arguments.split(",")]

    if use_range:
        arguments.extend([
            ScalarArg(np.intp, "start"),
            ScalarArg(np.intp, "stop"),
            ScalarArg(np.intp, "step"),
            ])
    else:
        arguments.append(ScalarArg(np.uintp, "n"))

    if use_range:
        module_builder = get_elwise_range_module
    else:
        module_builder = get_elwise_module

    mod = module_builder(arguments, operation, name,
            keep, options, **kwargs)

    from pycuda.tools import get_arg_type
    func = mod.get_function(name)
    func.prepare("".join(arg.struct_char for arg in arguments))

    return func, arguments
Ejemplo n.º 3
0
def get_elwise_kernel_and_types(arguments, operation,
        name="kernel", keep=False, options=[], **kwargs):
    if isinstance(arguments, str):
        from pycuda.tools import parse_c_arg
        arguments = [parse_c_arg(arg) for arg in arguments.split(",")]

    arguments.append(ScalarArg(numpy.uintp, "n"))

    mod = get_elwise_module(arguments, operation, name,
            keep, options, **kwargs)

    from pycuda.tools import get_arg_type
    func = mod.get_function(name)
    func.prepare("".join(arg.struct_char for arg in arguments), (1,1,1))

    return func, arguments
Ejemplo n.º 4
0
def get_elwise_kernel_and_types(arguments,
                                operation,
                                name="kernel",
                                keep=False,
                                options=[],
                                **kwargs):
    if isinstance(arguments, str):
        from pycuda.tools import parse_c_arg
        arguments = [parse_c_arg(arg) for arg in arguments.split(",")]

    arguments.append(ScalarArg(numpy.uintp, "n"))

    mod = get_elwise_module(arguments, operation, name, keep, options,
                            **kwargs)

    from pycuda.tools import get_arg_type
    func = mod.get_function(name)
    func.prepare("".join(arg.struct_char for arg in arguments), (1, 1, 1))

    return func, arguments
Ejemplo n.º 5
0
def map_expr_to_kernel(expr, name = 'kernel', dtype = 'float', bc = 'NONPBC'):
    
    """
    <example>:
        expr = 'A[i,j] = b[i,].W[,j] + C[i,j]'
    """
    
    if dtype not in ['float', 'bool', 'double', 'int', 'long', 'usigned int', 'usigned long']:
        raise ValueError('dtype only support float, bool, double, int, long, usigned int, usigned long')
    
    if bc not in ['PBC', 'NONPBC']:
        raise ValueError('bc only support PBC, NONPBC')
        
    if name == '':
        raise ValueError('require a non empty name')
    
    
    import re
    
    # sub X[i,].Y[,j] to contraction(X, i, 1, Y, j, 0)
    c_expr = re.sub(r'(\w+)\[(\S+),\]\s*\.\s*(\w+)\[,(\S+)\]', \
                    r'_contraction(_\1, \2, 1, _\3, \4, 0)', expr)
    
    # sub X[,j].Y[i,] to transvection(X, j, 0, Y, i, 1)
    c_expr = re.sub(r'(\w+)\[,(\S+)\]\s*\.\s*(\w+)\[(\S+),\]', \
                    r'_contraction(_\1, \2, 0, _\3, \4, 1)', c_expr)
    
    # sub X[].Y[,j] to contraction(X, 0, 1, Y, j, 0)
    c_expr = re.sub(r'(\w+)\[(\s*)\]\s*\.\s*(\w+)\[,(\S+)\]', \
                    r'_contraction(_\1, 0, 1, _\3, \4, 0)', c_expr)
      
    # sub X[,j].Y[] to contraction(X, j, 0, Y, 0, 1)
    c_expr = re.sub(r'(\w+)\[,(\S+)\]\s*\.\s*(\w+)\[(\s*)\]', \
                    r'_contraction(_\1, \2, 0, _\3, 0, 1)', c_expr)  
    
    # sub X[i,].Y[] to contraction(X, i, 1, Y, 0, 1)
    c_expr = re.sub(r'(\w+)\[(\S+),\]\s*\.\s*(\w+)\[(\s*)\]', \
                    r'_contraction(_\1, \2, 1, _\3, 0, 1)', c_expr)
    
    # sub X[].Y[i,] to contraction(X, 0, 1, Y, i, 1)
    c_expr = re.sub(r'(\w+)\[(\s*)\]\s*\.\s*(\w+)\[(\S+),\]', \
                    r'_contraction(_\1, 0, 1, _\3, \4, 1)', c_expr)
    
    # sub X[].Y[] to contraction(X, 0, 1, Y, 0, 1)
    c_expr = re.sub(r'(\w+)\[(\s*)\]\s*\.\s*(\w+)\[(\s*)\]', \
                    r'_contraction(_\1, 0, 1, _\3, 0, 1, \1_col)', c_expr)    
    
    # sub X[i,].Y[j,] to contraction(X, i, 1, Y, j, 1)
    c_expr = re.sub(r'(\w+)\[(\S+),\]\s*\.\s*(\w+)\[(\S+),\]', \
                    r'_contraction(_\1, \2, 1, _\3, \4, 1)', c_expr) 

    # sub X[,i].Y[,j] to contraction(X, i, 0, Y, j, 0)
    c_expr = re.sub(r'(\w+)\[,(\S+)\]\s*\.\s*(\w+)\[,(\S+)\]', \
                    r'_contraction(_\1, \2, 0, _\3, \4, 0)', c_expr) 
    
    
    # sub X[i,@] to sum(X, i, 1)
    c_expr = re.sub(r'(\w+)\[([^\,|\]|\@]),\@\]', r'_sum(_\1, \2, 1)', c_expr)
    # sub X[@,j] to sum(X, j, 0)
    c_expr = re.sub(r'(\w+)\[\@,([^\,|\]|\@])\]', r'_sum(_\1, \2, 0)', c_expr)                 
    # sub X[@,@] to sum(X, 0, 2)
    c_expr = re.sub(r'(\w+)\[\@,\@\]', r'_sum(_\1, 0, 2)', c_expr)    
    
    # sub X[i,j] to X.elem(i,j) X[i] to X.elem(0,i) X[j] to X.elem(0,j)
    c_expr = re.sub(r'(\w+)\[([^\,|\]])\]', r'_\1.elem(0,\2)', c_expr)
    c_expr = re.sub(r'(\w+)\[([^\,]+),([^\]]+)\]', r'_\1.elem(\2,\3)', c_expr)
    
    # sub E += value to atomicAdd(&E[0], value)
    #c_expr = re.sub(r'(\w+)\s*\+\=(.+)', r'atomicAdd(&(_\1.elem(0,0)), \2)', c_expr)
    c_expr = re.sub(r'(\w+)\s*\+\=(.+)', r'atomicAdd(&\1[n], \2)', c_expr)
    c_expr = re.sub(r'(\w+)\s*\=(.+)', r'_\1.elem(0,0) = \2', c_expr)
    
    
    right_expr = re.sub(r'.+\=', '', expr)

    # get all array like variables
    tmp = re.findall(r'^(\w+)', expr)
    tmp += re.findall(r'(\w+)\[', right_expr)         
    array_list = list(set(tmp))
    array_list.sort(key = tmp.index)
    arrays = ", ".join([dtype + '* ' + arr for arr in array_list])
    

    # get all scalar like variables
    
    scalar_list = re.sub(r'\w+\s*\[\S+,\S+\]|\w+\[\S+\]', '$', right_expr)
    scalar_list = re.sub(r'(\w+)\s*\(', '$', scalar_list)
    tmp = re.findall(r'[A-Za-z]+', scalar_list)
    scalar_list = list(set(tmp))
    scalar_list.sort(key = tmp.index)
    scalars = ", ".join([dtype + ' ' + s for s in scalar_list])
    


    # get variable shape
    shape = ",\n".join(['int ' + arr + '_col' + ', ' + \
                            'int ' + arr + '_row' for arr in array_list])
    
    # get working boundary of the i and j
    boundary = "\nint min_n, int max_n, int min_i, int max_i, int min_j, int max_j"
                
                
    statement = "\n\t\t".join(['__array__ _%s(%s, n, %s_col, %s_row);' \
                             % (arr, arr, arr, arr) for arr in array_list])
                             
                           

    # combing them together
    if len(scalars) != 0:
        arguments = arrays + ', ' + scalars + ',\n' + shape + ', ' + boundary
    else:
        arguments = arrays + ',\n' + shape + ', ' + boundary
        

    mod, c_source = get_cuda_module(dtype, statement, arguments, c_expr, name, bc)
    func_kernel = mod.get_function(name)
    
    if isinstance(arguments, str):
        from pycuda.tools import parse_c_arg
        arguments = [parse_c_arg(arg) for arg in arguments.split(",")]
        
    func_kernel.prepare("".join(arg.struct_char for arg in arguments))
    

    return func_kernel, c_source, arguments