Example #1
0
    def get_type(self, env=None):
        if isinstance(self.op, Op.ArrayRef):
            if isinstance(self.left, SymbolRef) and env is not None \
                    and env._has_key(self.left.name):
                type = env._lookup(self.left.name)._dtype_
                return get_c_type_from_numpy_dtype(type)()

        # FIXME: integer promotions and stuff like that
        if hasattr(self.left, 'get_type'):
            left_type = self.left.get_type()
        elif isinstance(self.left, SymbolRef) and env is not None \
                and env._has_key(self.left.name):
            left_type = env._lookup(self.left.name)
        elif hasattr(self.left, 'type'):
            left_type = self.left.type
        else:
            left_type = None
        if hasattr(self.right, 'get_type'):
            right_type = self.right.get_type()
        elif isinstance(self.right, SymbolRef) and env is not None \
                and env._has_key(self.right.name):
            right_type = env._lookup(self.right.name)
        elif hasattr(self.right, 'type'):
            right_type = self.right.type
        else:
            right_type = None
        if isinstance(self.op, Op.ArrayRef):
            ptr_type = left_type._type_
            return ptr_type() if hasattr(ptr_type, '__call__') else left_type
        return get_common_ctype(
            filter(lambda x: x is not None, [right_type, left_type]))
Example #2
0
    def get_type(self, env=None):
        if isinstance(self.op, Op.ArrayRef):
            if isinstance(self.left, SymbolRef) and env is not None \
                    and env._has_key(self.left.name):
                type = env._lookup(self.left.name)._dtype_
                return get_c_type_from_numpy_dtype(type)()

        # FIXME: integer promotions and stuff like that
        if hasattr(self.left, 'get_type'):
            left_type = self.left.get_type()
        elif isinstance(self.left, SymbolRef) and env is not None \
                and env._has_key(self.left.name):
            left_type = env._lookup(self.left.name)
        elif hasattr(self.left, 'type'):
            left_type = self.left.type
        else:
            left_type = None
        if hasattr(self.right, 'get_type'):
            right_type = self.right.get_type()
        elif isinstance(self.right, SymbolRef) and env is not None \
                and env._has_key(self.right.name):
            right_type = env._lookup(self.right.name)
        elif hasattr(self.right, 'type'):
            right_type = self.right.type
        else:
            right_type = None
        return get_common_ctype(filter(lambda x: x is not None, [right_type,
                                                                 left_type]))
    def transform(self, py_ast, program_config):

        # Get the initial data
        input_data = program_config[0]
        length = np.prod(input_data.size)
        pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape)
        data_type = get_c_type_from_numpy_dtype(input_data.dtype)()
        scalar_data_type = get_c_type_from_numpy_dtype(np.dtype(input_data.scalar_type))()

        apply_one = PyBasicConversions().visit(py_ast.body[0])
        apply_one.name = 'apply'
        apply_one.params[0].type = data_type
        apply_one.params[1].type = scalar_data_type
        apply_one.return_type = data_type  # TODO: figure out which data type to actually preserve

        # TODO: MAKE A CLASS THAT HANDLES SUPPORTED TYPES (INT, FLOAT, DOUBLE)

        array_add_template = StringTemplate(r"""
            #pragma omp parallel for
            for (int i = 0; i < $length; i++) {
                output[i] = apply(arr[i], scalar);
            }
        """, {
            'length': Constant(length)
        })

        array_op = CFile("generated", [
            CppInclude("omp.h"),
            CppInclude("stdio.h"),
            apply_one,
            FunctionDecl(None, FUNC_NAME,
                         params=[
                             SymbolRef("arr", pointer()),
                             SymbolRef("scalar", scalar_data_type),
                             SymbolRef("output", pointer())
                         ],
                         defn=[
                             array_add_template
                         ])
        ], 'omp')

        return [array_op]
    def transform(self, py_ast, program_config):

        # Get the initial data
        input_data = program_config[0]
        length = np.prod(input_data.size)
        pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim,
                                         input_data.shape)
        data_type = get_c_type_from_numpy_dtype(input_data.dtype)()
        scalar_data_type = get_c_type_from_numpy_dtype(
            np.dtype(input_data.scalar_type))()

        apply_one = PyBasicConversions().visit(py_ast.body[0])
        apply_one.name = 'apply'
        apply_one.params[0].type = data_type
        apply_one.params[1].type = scalar_data_type
        apply_one.return_type = data_type  # TODO: figure out which data type to actually preserve

        # TODO: MAKE A CLASS THAT HANDLES SUPPORTED TYPES (INT, FLOAT, DOUBLE)

        array_add_template = StringTemplate(
            r"""
            #pragma omp parallel for
            for (int i = 0; i < $length; i++) {
                output[i] = apply(arr[i], scalar);
            }
        """, {'length': Constant(length)})

        array_op = CFile("generated", [
            CppInclude("omp.h"),
            CppInclude("stdio.h"), apply_one,
            FunctionDecl(None,
                         FUNC_NAME,
                         params=[
                             SymbolRef("arr", pointer()),
                             SymbolRef("scalar", scalar_data_type),
                             SymbolRef("output", pointer())
                         ],
                         defn=[array_add_template])
        ], 'omp')

        return [array_op]
    def finalize(self, transform_result, program_config):
        tree = transform_result[0]

        # Get the argument type data
        input_data = program_config[0]
        pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape)
        scalar_data_type_referenced = get_c_type_from_numpy_dtype(np.dtype(input_data.scalar_type))
        entry_type = CFUNCTYPE(None, pointer, scalar_data_type_referenced, pointer)

        # Instantiation of the concrete function
        fn = ConcreteElemWiseArrayScalarOp()

        return fn.finalize(Project([tree]), FUNC_NAME, entry_type)
    def finalize(self, transform_result, program_config):
        tree = transform_result[0]

        # Get the argument type data
        input_data = program_config[0]
        pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim,
                                         input_data.shape)
        scalar_data_type_referenced = get_c_type_from_numpy_dtype(
            np.dtype(input_data.scalar_type))
        entry_type = CFUNCTYPE(None, pointer, scalar_data_type_referenced,
                               pointer)

        # Instantiation of the concrete function
        fn = ConcreteElemWiseArrayScalarOp()

        return fn.finalize(Project([tree]), FUNC_NAME, entry_type)
Example #7
0
    def transform(self, py_ast, program_config):

        # Get the initial data
        input_data = program_config[0]
        length = np.prod(input_data.size)
        pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape)
        data_type = get_c_type_from_numpy_dtype(input_data.dtype)()

        apply_one = PyBasicConversions().visit(py_ast.body[0])
        apply_one.name = 'apply'
        apply_one.params[0].type = data_type
        apply_one.params[1].type = data_type
        apply_one.return_type = data_type

        array_add_template = StringTemplate(r"""
            #pragma omp parallel for
            for (int i = 0; i < $length; i++) {
                output[i] = apply(input1[i], input2[i]);
            }
        """, {
            'length': Constant(length)
        })

        array_op = CFile("generated", [
            CppInclude("omp.h"),
            CppInclude("stdio.h"),
            apply_one,
            FunctionDecl(None, FUNC_NAME,
                         params=[
                             SymbolRef("input1", pointer()),
                             SymbolRef("input2", pointer()),
                             SymbolRef("output", pointer())
                         ],
                         defn=[
                             array_add_template
                         ])
        ], 'omp')

        return [array_op]
    def transform(self, py_ast, program_config):

        # Get the initial data
        input_data = program_config[0]

        num_2d_layers = np.prod(input_data.num_frames)
        data_height = np.prod(input_data.data_height)
        layer_length = np.prod(input_data.size // num_2d_layers)
        segment_length = np.prod(input_data.segment_length)

        inp_type = get_c_type_from_numpy_dtype(input_data.dtype)()

        input_pointer = np.ctypeslib.ndpointer(input_data.dtype, input_data.ndim, input_data.shape)
        output_pointer = np.ctypeslib.ndpointer(input_data.dtype, 1, (input_data.size, 1))

        # Get the kernel function, apply_one
        apply_one = PyBasicConversions().visit(py_ast).find(FunctionDecl)

        apply_one.return_type = inp_type
        apply_one.params[0].type = inp_type
        apply_one.params[1].type = inp_type

        # Naming our kernel method
        apply_one.name = 'apply'
        num_pfovs = int(layer_length / segment_length)
        # print ("num layers: ", num_2d_layers)
        # print ("input size: ", input_data.size)
        # print ("layer length: ", layer_length)

        # TODO: TIME TO START CPROFILING THINGS!
        reduction_template = StringTemplate(r"""
            #pragma omp parallel for collapse(2)
            for (int level = 0; level < $num_2d_layers; level++) {
                for (int i=0; i<$num_pfovs ; i++) {
                    int level_offset = level * $layer_length;
                    double avg = 0.0;
                    // #pragma omp parallel for reduction (+:avg)
                    for (int j=0; j<$pfov_length; j++) {
                        int in_layer_offset = ($pfov_length * i + j) /
                            ($layer_length / $data_height);

                        int index = (in_layer_offset + ($pfov_length * i + j) * $data_height)
                                     % $layer_length;
                        // printf ("Index: %i, I: %i, J: %i\n", index, i, j);
                        avg += input_arr[level_offset + index];
                    }
                    avg = avg / $pfov_length;

                    // #pragma omp parallel for
                    for (int j=0; j<$pfov_length; j++) {
                        int in_layer_offset = ($pfov_length * i + j) /
                            ($layer_length / $data_height);

                        int index = (in_layer_offset + ($pfov_length * i + j) * $data_height)
                                     % $layer_length;
                        output_arr[level_offset + index] = input_arr[level_offset + index] - avg;
                    }
                }
            }
        """, {
            'num_2d_layers': Constant(num_2d_layers),
            'layer_length': Constant(layer_length),
            'num_pfovs': Constant(num_pfovs),
            'pfov_length': Constant(segment_length),
            'data_height': Constant(data_height),
        })

        reducer = CFile("generated", [
            CppInclude("omp.h"),
            CppInclude("stdio.h"),
            apply_one,
            FunctionDecl(None, REDUCTION_FUNC_NAME,
                         params=[
                             SymbolRef("input_arr", input_pointer()),
                             SymbolRef("output_arr", output_pointer())
                         ],
                         defn=[
                             reduction_template
                         ])
        ], 'omp')

        return [reducer]
Example #9
0
    def transform(self, tree, program_config):
        dirname = self.config_to_dirname(program_config)
        A = program_config[0]
        len_A = np.prod(A.shape)
        data_type = get_c_type_from_numpy_dtype(A.dtype)        # Get the ctype class for the data type for the parameters
        pointer = np.ctypeslib.ndpointer(A.dtype, A.ndim, A.shape)
        apply_one = PyBasicConversions().visit(tree).find(FunctionDecl)
          
        apply_one.name = 'apply'                                # Naming our kernel method

        # Assigning a data_type instance for the  #
        # return type, and the parameter types... #
        apply_one.return_type = data_type()                     
        apply_one.params[0].type = data_type()
        apply_one.params[1].type = data_type()

        responsible_size = int(len_A / WORK_GROUP_SIZE)         # Get the appropriate number of threads for parallelizing
        
        # Creating our controller function (called "apply_kernel") to control #
        # the parallelizing of our computation, using ctree syntax...         #
        apply_kernel = FunctionDecl(None, "apply_kernel",
                                    params=[SymbolRef("A", pointer()).set_global(),
                                            SymbolRef("output_buf", pointer()).set_global(),
                                            SymbolRef("localData", pointer()).set_local()
                                    ],
                                    defn=[
                                        Assign(SymbolRef('groupId', ct.c_int()), get_group_id(0)),
                                        Assign(SymbolRef('globalId', ct.c_int()), get_global_id(0)),
                                        Assign(SymbolRef('localId', ct.c_int()), get_local_id(0)),
                                        Assign(SymbolRef('localResult', (ct.c_int() if A.dtype is np.int32 else ct.c_float())),
                                               ArrayRef(SymbolRef('A'), SymbolRef('globalId'))
                                               ),
                                        For(Assign(SymbolRef('offset', ct.c_int()), Constant(1)), Lt(SymbolRef('offset'), Constant(responsible_size)),
                                            PostInc(SymbolRef('offset')),
                                            [
                                                Assign(SymbolRef('localResult'),
                                                       FunctionCall(apply_one.name, [SymbolRef('localResult'),
                                                                              ArrayRef(SymbolRef('A'),
                                                                                       Add(SymbolRef('globalId'),
                                                                                           Mul(SymbolRef('offset'),
                                                                                               Constant(WORK_GROUP_SIZE))))])
                                                       ),
                                            ]
                                        ),
                                            Assign(ArrayRef(SymbolRef('localData'), SymbolRef('globalId')),
                                                SymbolRef('localResult')
                                               ),
                                            barrier(CLK_LOCAL_MEM_FENCE()),
                                        If(Eq(SymbolRef('globalId'), Constant(0)),
                                           [
                                                Assign(SymbolRef('localResult'), FunctionCall(SymbolRef(apply_one.name), [SymbolRef('localResult'),
                                                                                                                   ArrayRef(SymbolRef('localData'),Constant(x))]))
                                                for x in range(1, WORK_GROUP_SIZE)
                                           ] + [Assign(ArrayRef(SymbolRef('output_buf'), Constant(0)), SymbolRef('localResult'))]
                                        )
                                    ]
        ).set_kernel()

        # Hardcoded OpenCL code to compensate to begin execution of parallelized computation 
        control = StringTemplate(r"""
        #ifdef __APPLE__
        #include <OpenCL/opencl.h>
        #else
        #include <CL/cl.h>
        #endif

        #include <stdio.h>

        void apply_all(cl_command_queue queue, cl_kernel kernel, cl_mem buf, cl_mem out_buf) {
            size_t global = $local;
            size_t local = $local;
            intptr_t len = $length;
            clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf);
            clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_buf);
            clSetKernelArg(kernel, 2, local * sizeof(int), NULL);
            clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
        }
        """, {'local': Constant(WORK_GROUP_SIZE),
              'n': Constant((len_A + WORK_GROUP_SIZE - (len_A % WORK_GROUP_SIZE))/2),
              'length': Constant(len_A),
        })

        ocl_kernel = OclFile("kernel", [apply_one, apply_kernel])
        c_controller = CFile("generated", [control])
        return [ocl_kernel, c_controller]
Example #10
0
    def transform(self, tree, program_config):
        A = program_config[0]
        len_A = np.prod(A.shape)
        inner_type = get_c_type_from_numpy_dtype(A.dtype)()
        pointer = np.ctypeslib.ndpointer(A.dtype, A.ndim, A.shape)
        apply_one = PyBasicConversions().visit(tree.body[0])
        apply_one.return_type = inner_type
        apply_one.params[0].type = inner_type
        apply_one.params[1].type = inner_type
        responsible_size = int(len_A / WORK_GROUP_SIZE)
        apply_kernel = FunctionDecl(None, "apply_kernel",
                                    params=[SymbolRef("A", pointer()).set_global(),
                                            SymbolRef("output_buf", pointer()).set_global(),
                                            SymbolRef("localData", pointer()).set_local()
                                    ],
                                    defn=[
                                        Assign(SymbolRef('groupId', ct.c_int()), get_group_id(0)),
                                        Assign(SymbolRef('globalId', ct.c_int()), get_global_id(0)),
                                        Assign(SymbolRef('localId', ct.c_int()), get_local_id(0)),
                                        Assign(SymbolRef('localResult', ct.c_int()),
                                               ArrayRef(SymbolRef('A'), SymbolRef('globalId'))
                                               )
                                        ] +
                                        [Assign(SymbolRef('localResult'),
                                                FunctionCall(SymbolRef('apply'),
                                                             [SymbolRef('localResult'), ArrayRef(SymbolRef('A'),Add(SymbolRef('globalId'), Constant(i * WORK_GROUP_SIZE)))]))
                                            for i in range(1, responsible_size)] +
                                        [
                                            Assign(ArrayRef(SymbolRef('localData'), SymbolRef('globalId')),
                                                SymbolRef('localResult')
                                               ),
                                            barrier(CLK_LOCAL_MEM_FENCE()),
                                        If(Eq(SymbolRef('globalId'), Constant(0)),
                                           [
                                                Assign(SymbolRef('localResult'), FunctionCall(SymbolRef('apply'), [SymbolRef('localResult'),
                                                                                                                   ArrayRef(SymbolRef('localData'),Constant(x))]))
                                                for x in range(1, WORK_GROUP_SIZE)
                                           ] + [Assign(ArrayRef(SymbolRef('output_buf'), Constant(0)), SymbolRef('localResult'))]
                                        )
                                    ]
        ).set_kernel()

        kernel = OclFile("kernel", [apply_one, apply_kernel])

        control = StringTemplate(r"""
        #ifdef __APPLE__
        #include <OpenCL/opencl.h>
        #else
        #include <CL/cl.h>
        #endif

        #include <stdio.h>

        void apply_all(cl_command_queue queue, cl_kernel kernel, cl_mem buf, cl_mem out_buf) {
            size_t global = $local;
            size_t local = $local;
            intptr_t len = $length;
            cl_mem swap;
            clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf);
            clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_buf);
            clSetKernelArg(kernel, 2, local * sizeof(int), NULL);
            clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
        }
        """, {'local': Constant(WORK_GROUP_SIZE),
              'n': Constant((len_A + WORK_GROUP_SIZE - (len_A % WORK_GROUP_SIZE))/2),
              'length': Constant(len_A)
        })

        c_controller = CFile("generated", [control])
        return [kernel, c_controller]