Beispiel #1
0
def get_elwise_program(context, arguments, operation,
        name="elwise_kernel", options=[],
        preamble="", loop_prep="", after_loop=""):
    from pyopencl import Program
    source = ("""
        %(preamble)s

        __kernel void %(name)s(%(arguments)s)
        {
          unsigned lid = get_local_id(0);
          unsigned gsize = get_global_size(0);
          unsigned work_item_start = get_local_size(0)*get_group_id(0);
          unsigned i;

          %(loop_prep)s;

          for (i = work_item_start + lid; i < n; i += gsize)
          {
            %(operation)s;
          }

          %(after_loop)s;
        }
        """ % {
            "arguments": ", ".join(arg.declarator() for arg in arguments),
            "operation": operation,
            "name": name,
            "preamble": preamble,
            "loop_prep": loop_prep,
            "after_loop": after_loop,
            })

    return Program(context, source).build(options)
def main():
    devices = get_devices()
    try:
        debug(CL_DEVICE_TYPE(devices[0].type))
    except IndexError as ie:
        exception(ie)
    context = Context(devices)
    queue = CommandQueue(context)  # Create queue for each kernel execution

    source = read_kernel_source("median_filter.cl")
    program = Program(context, source).build()  # Kernel function instantiation

    image = imread('../data/noisyImage.jpg',
                   flatten=True).astype(float32)  # Read in image
    imshow(image)

    start_usec = perf_counter()
    args = allocate_variables(context, image)

    program.medianFilter(queue, image.shape, None, *args)  # Call Kernel.
    # Automatically takes care of block/grid distribution. Note explicit naming of kernel to execute.

    result = copy_from_buffer(queue, args[1], image.shape,
                              image.dtype)  # Copy the result back from buffer
    debug("%g milliseconds" % (1e3 * (perf_counter() - start_usec)))

    imshow(result)
    imsave('../data/medianFilter-OpenCL.jpg', result)  # Show the blurred image
    def init_grid_methods(self):
        self.init_generic_methods()
        self.set_global_working_group_size()

        grid_sources = []
        grid_sources.append(''.join(open(src_path+"grid_generic.cl")\
              .readlines()) )
        grid_sources.append(''.join( \
              open(src_path+"grid_deposit_m"+str(self.Args['M'])+".cl")\
              .readlines() ) )

        grid_sources = self.block_def_str + ''.join(grid_sources)

        prg = Program(self.ctx, grid_sources).\
            build(options=compiler_options)

        self._divide_by_dv_d_knl = prg.divide_by_dv_d
        self._divide_by_dv_c_knl = prg.divide_by_dv_c
        self._treat_axis_d_knl = prg.treat_axis_d
        self._treat_axis_c_knl = prg.treat_axis_c
        self._warp_axis_m0_d_knl = prg.warp_axis_m0_d
        self._warp_axis_m1plus_c_knl = prg.warp_axis_m1plus_c

        self._depose_scalar_knl = prg.depose_scalar
        self._depose_vector_knl = prg.depose_vector
        self._gather_and_push_knl = prg.gather_and_push

        if 'vec_comps' not in self.Args:
            self.Args['vec_comps'] = self.Args['default_vec_comps']
Beispiel #4
0
 def __init__(self):
     self.h = self.w = 800
     self.dx = self.dy = 3. / 800.
     self.x0 = -2
     self.y0 = 1.5
     ctx = create_some_context()
     fmt = ImageFormat(CHO.RGBA, CHANNEL.UNSIGNED_INT8)
     self.buf = Image(ctx, MEM.WRITE_ONLY, fmt, shape=(self.w, self.h))
     prg = Program(ctx, load_cl_text("mandelbrot.cl")).build()
     self.params = (self.w, self.h, ctx, self.buf, prg)
Beispiel #5
0
    def init_solver_methods(self):
        solver_sources = []
        solver_sources.append(''.join(open(src_path+"solver_ms_pic.cl")\
              .readlines()) )

        solver_sources = self.block_def_str + ''.join(solver_sources)

        prg = Program(self.ctx, solver_sources).\
            build(options=compiler_options)

        self._advance_e_g_m_knl = prg.advance_e_g_m
        self._profile_edges_c_knl = prg.profile_edges_c
        self._profile_edges_d_knl = prg.profile_edges_d

        if 'DampCells' in self.Args:
            self._init_field_damping()
Beispiel #6
0
    def init_generic_methods(self):
        self.set_global_working_group_size()

        generic_sources = []
        generic_sources.append(''.join(open(src_path+"generic.cl")\
                               .readlines()) )

        generic_sources = self.block_def_str + ''.join(generic_sources)

        prg = Program(self.ctx, generic_sources).\
            build(options=compiler_options)

        self._cast_array_d2c_knl = prg.cast_array_d2c
        self._axpbyz_c2c_knl = prg.axpbyz_c2c
        self._zpaxz_c2c_knl = prg.zpaxz_c2c
        self._ab_dot_x_knl = prg.ab_dot_x
        self._append_c2c_knl = prg.append_c2c
        self._set_cdouble_to_knl = prg.set_cdouble_to
        self._mult_elementwise_knl = prg.mult_elementwise_d2c
Beispiel #7
0
    def load_program(self):
        #Read all the lines of the cl file into one string (safely)
        with open("raytraced/Raytracer.cl", "r") as file:
            source = ''.join(file.readlines())

        #Create the opencl program
        program = Program(self.context, source)

        #make program options
        options = "-cl-mad-enable -cl-fast-relaxed-math -Werror -I %s" % os.path.dirname(
            os.path.abspath(__file__))

        #build program
        program.build(options=options)
        self.kernel = program.raytrace
        self.kernel.set_scalar_arg_dtypes([None, None, None, numpy.int32])

        #Match OpenCL Dtype. May not work everywhere
        cltypes.Vertex, c_decl = OpenCL.tools.match_dtype_to_c_struct(
            self.context.devices[0], 'Vertex', cltypes.Vertex)
Beispiel #8
0
    def make_add_kernel(self):
        src = """
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
__kernel void add_rand(__global const ${dtype}* a,
                       __global float* out, 
                       const float seed) {
    // modify out such that out = a+ random
    const int gid = get_global_id(0);
    float ptr = 0.0f;
    float randval =  fract(sin(gid*112.9898f + seed*237.212f) * 43758.5453f, &ptr);
    const float min = 0.;
    const float max = 1.;
    const float scaledMax =  0.005;
    const float scaledMin = -0.005;
    const float scaled = (scaledMax-scaledMin)*(randval-min)/(max-min)+scaledMin;
    out[gid] = a[gid] + scaled;
} 
        """.replace('${dtype}', dtype_str)
        prog = Program(self.ctx, src).build()
        return prog.add_rand
Beispiel #9
0
 def __init__(self):
     self.angle = 0.
     self.ch_angles = {
         "Key_UP": pi / 18.,
         "Key_Down": -pi / 18.,
         "Key_Right": -pi / 180.,
         "Key_Left": pi / 180.
     }
     ctx = create_some_context()
     in_img = lena()
     h, w = map(int32, in_img.shape[:2])
     # in pyopencl 2018.2.2 channel orders other than RGBA
     # cause segmentation fault
     i4 = zeros((h, w, 4), dtype=uint8)
     i4[:, :, 0] = in_img
     self.in_img_buf = image_from_array(ctx, i4, 4)
     fmt = ImageFormat(CHO.RGBA, CHANNEL.UNSIGNED_INT8)
     self.out_img_buf = Image(ctx, MEM.WRITE_ONLY, fmt, shape=(w, h))
     prg = Program(ctx, load_cl_text("rotation.cl")).build()
     self.params = (ctx, self.in_img_buf, self.out_img_buf, h, w, prg)
    def init_particle_methods(self):
        self.init_generic_methods()
        self.set_global_working_group_size()

        self._generator_knl = ThreefryGenerator(context=self.ctx)

        particles_sources = ''.join(
            open(src_path + "particles_generic.cl").readlines())

        particles_sources = self.block_def_str + particles_sources

        prg = Program(self.ctx, particles_sources).\
            build(options=compiler_options)

        self._data_align_dbl_knl = prg.data_align_dbl
        self._data_align_int_knl = prg.data_align_int
        self._index_and_sum_knl = prg.index_and_sum_in_cell
        self._sort_knl = prg.sort
        self._push_xyz_knl = prg.push_xyz
        self._fill_grid_knl = prg.fill_grid
        self._profile_by_interpolant_knl = prg.profile_by_interpolant
Beispiel #11
0
    def __init__(self,
                 ctx,
                 queue: CommandQueue,
                 units,
                 weight_initializer: Initializer = GlorotUniformInitializer,
                 bias_initializer: Initializer = ZeroInitializer,
                 activation='linear',
                 batch_size=64):
        self.units = units
        self.weight_initializer = weight_initializer
        self.bias_initializer = bias_initializer
        self.activation = activation
        self.ctx = ctx
        self.queue = queue
        self.is_training = True
        self.queue = queue
        self.batch_size = batch_size
        self.dtype_str = dtype_str
        self.src = ""
        for fname in [
                f'../nncl/cl/activations/{self.activation}.cl',
                self.layer_fname, '../nncl/cl/layers/gradient.cl'
        ]:
            with open(fname, 'r') as infile:
                self.src += infile.read() + "\n"
        self.src = Template(self.src).render(
            activation='activation_' + self.activation,
            derivative='derivative_' + self.activation,
            dtype=self.dtype_str)
        try:
            self.prog = Program(self.ctx, self.src).build()
        except cl.cffi_cl.RuntimeError as e:
            # print(self.src, flush=True)
            print(e, file=sys.stderr, flush=True)
            exit(1)
        self.forward_krnl = self.prog.layer_forward

        self._grad_krnl = self.prog.get_gradients
Beispiel #12
0
except (IndexError, ValueError):
    angle = pi / 4
ctx = create_some_context()
filter_buf = Buffer(ctx,
                    MEM.READ_ONLY | MEM.COPY_HOST_PTR,
                    hostbuf=GAUSSIAN_BLUR)
in_img = lena()
h, w = map(int32, in_img.shape[:2])
# in pyopencl 2018.2.2 channel orders other than RGBA cause segmentation fault
i4 = zeros((h, w, 4), dtype=uint8)
i4[:, :, 0] = in_img
in_img_buf = image_from_array(ctx, i4, 4)
fmt = ImageFormat(CHO.RGBA, CHANNEL.UNSIGNED_INT8)
out_img_buf = Image(ctx, MEM.WRITE_ONLY, fmt, shape=(w, h))
pt = perf_counter()
prg = Program(ctx, load_cl_text("convolution.cl")).build()
TIMES["Compilation"] = perf_counter() - pt
pt = perf_counter()
with CommandQueue(ctx) as queue:
    prg.convolution(queue, (w, h), None, in_img_buf, out_img_buf, filter_buf,
                    int32(5),
                    Sampler(ctx, False, ADDRESS.CLAMP_TO_EDGE, FILTER.NEAREST))
    TIMES["Execution"] = perf_counter() - pt
    pt = perf_counter()
    dest = zeros(i4.shape, dtype=uint8)
    enqueue_copy(queue, dest, out_img_buf, origin=(0, 0), region=(w, h))
    TIMES["Copying"] = perf_counter() - pt
in_img_buf.release()
out_img_buf.release()
filter_buf.release()
print("\n".join("%s:\t%g" % i for i in TIMES.items()))
Beispiel #13
0
            find_set(self.x0, self.y0, self.dx, self.dy,
                     *self.params)[:, :, :3],
            "extent":
            extent
        }

    def __del__(self):
        self.buf.release()


if __name__ == "__main__":
    from sys import argv
    try:
        xm, ym, xw, w, h = map(float, argv[1:])
        h = int32(h)
        w = int32(w)
        dx = dy = xw / w
        x0 = xm - xw / 2.
        y0 = ym + xw * (h / w) / 2.
    except (IndexError, ValueError):
        h = w = int32(800)
        x0, y0 = -2., 1.5
        dx = dy = 3. / 800
    ctx = create_some_context()
    fmt = ImageFormat(CHO.RGBA, CHANNEL.UNSIGNED_INT8)
    buf = Image(ctx, MEM.WRITE_ONLY, fmt, shape=(w, h))
    prg = Program(ctx, load_cl_text("mandelbrot.cl")).build()
    res = find_set(x0, y0, dx, dy, w, h, ctx, buf, prg)
    buf.release()
    show_img(res[:, :, :3], True)
Beispiel #14
0
try:
    n, m, p = map(int, argv[1:])
except ValueError:
    n, m, p = 3, 4, 5

a = random.randint(2, size=(n * m)).astype(float32)
b = random.randint(2, size=(m * p)).astype(float32)
c = zeros((n * p), dtype=float32)
TIMES = {}
ctx = create_some_context()

a_buf = Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
b_buf = Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
c_buf = Buffer(ctx, mf.WRITE_ONLY, c.nbytes)
pt = perf_counter()
prg = Program(ctx, load_cl_text("multiply_matr.cl")).build()
TIMES["Compilation"] = perf_counter() - pt
pt = perf_counter()
with CommandQueue(ctx) as queue:
    prg.multiply(queue, c.shape, None, uint16(n), uint16(m), uint16(p), a_buf,
                 b_buf, c_buf)
    TIMES["Execution"] = perf_counter() - pt
    pt = perf_counter()
    enqueue_copy(queue, c, c_buf)
    TIMES["Copying"] = perf_counter() - pt
a_buf.release()
b_buf.release()
c_buf.release()
print("matrix A:")
print(a.reshape(n, m))
print("matrix B:")
Beispiel #15
0
        self.angle += self.ch_angles.get(key, 0.)
        return {"img": rotate(self.angle, *self.params)[:, :, 0]}

    def __del__(self):
        print("delete rotate interactor")
        self.in_img_buf.release()
        self.out_img_buf.release()


if __name__ == "__main__":
    from sys import argv
    try:
        angle = -float(argv[1]) / 180. * pi
    except (IndexError, ValueError):
        angle = pi / 4
    ctx = create_some_context()
    in_img = lena()
    h, w = map(int32, in_img.shape[:2])
    # in pyopencl 2018.2.2 channel orders other than RGBA
    # cause segmentation fault
    i4 = zeros((h, w, 4), dtype=uint8)
    i4[:, :, 0] = in_img
    in_img_buf = image_from_array(ctx, i4, 4)
    fmt = ImageFormat(CHO.RGBA, CHANNEL.UNSIGNED_INT8)
    out_img_buf = Image(ctx, MEM.WRITE_ONLY, fmt, shape=(w, h))
    prg = Program(ctx, load_cl_text("rotation.cl")).build()
    res = rotate(angle, ctx, in_img_buf, out_img_buf, h, w, prg)
    in_img_buf.release()
    out_img_buf.release()
    show_img(res[:, :, 0])
Beispiel #16
0
#! /usr/bin/env python3

#  Calculation of π using quadrature. Using PyOpenCL.
#
#  Copyright © 2012, 2014  Russel Winder

from time import time

from pyopencl import create_some_context, CommandQueue, Program, Buffer, mem_flags, enqueue_read_buffer

import numpy

from output import out

n = 1000000000
delta = 1.0 / n
startTime = time()
context = create_some_context()
queue = CommandQueue(context)
with open('processSlice_opencl.cl', 'r') as f:
    kernel = Program(context, f.read()).build()
# Quadro FX 570 card on Anglides only supports 32-bit operations, hence float not double.
results = numpy.array(n, dtype=numpy.float32)
buffer = Buffer(context, mem_flags.WRITE_ONLY, results.nbytes)
kernel.processSlice(queue, results.shape, None, numpy.int32(n),
                    numpy.float32(delta), buffer)
enqueue_read_buffer(queue, buffer, results).wait()
pi = 4.0 * delta * numpy.sum(results)
elapseTime = time() - startTime
out(__file__, pi, n, elapseTime)
Beispiel #17
0
def get_elwise_program(context,
                       arguments,
                       operation,
                       name="elwise_kernel",
                       options=None,
                       preamble="",
                       loop_prep="",
                       after_loop="",
                       use_range=False):

    if use_range:
        body = r"""//CL//
          if (step < 0)
          {
            for (i = start + (work_group_start + lid)*step;
              i > stop; i += gsize*step)
            {
              %(operation)s;
            }
          }
          else
          {
            for (i = start + (work_group_start + lid)*step;
              i < stop; i += gsize*step)
            {
              %(operation)s;
            }
          }
          """
    else:
        body = """//CL//
          for (i = work_group_start + lid; i < n; i += gsize)
          {
            %(operation)s;
          }
          """

    import re
    return_match = re.search(r"\breturn\b", operation)
    if return_match is not None:
        from warnings import warn
        warn(
            "Using a 'return' statement in an element-wise operation will "
            "likely lead to incorrect results. Use "
            "PYOPENCL_ELWISE_CONTINUE instead.",
            stacklevel=3)

    source = ("""//CL//
        {preamble}

        #define PYOPENCL_ELWISE_CONTINUE continue

        __kernel void {name}({arguments})
        {{
          int lid = get_local_id(0);
          int gsize = get_global_size(0);
          int work_group_start = get_local_size(0)*get_group_id(0);
          long i;

          {loop_prep};
          {body}
          {after_loop};
        }}
        """.format(
        arguments=", ".join(arg.declarator() for arg in arguments),
        name=name,
        preamble=preamble,
        loop_prep=loop_prep,
        after_loop=after_loop,
        body=body % dict(operation=operation),
    ))

    from pyopencl import Program
    return Program(context, source).build(options)
Beispiel #18
0
                      enqueue_copy)
from numpy import zeros, int32
from misc import create_some_context, load_cl_text, lena
import matplotlib.pyplot as plt
try:
    from time import process_time as perf_counter
except ImportError:
    from time import perf_counter

TIMES = {}
ctx = create_some_context()
lenar = lena().astype(int32).flatten()
len_buf = Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=lenar)
histogram = zeros(256, dtype=int32)
h_buf = Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=histogram)
pt = perf_counter()
prg = Program(ctx, load_cl_text("histogram.cl")).build()
TIMES["Compilation"] = perf_counter() - pt
pt = perf_counter()
with CommandQueue(ctx) as queue:
    prg.histogram(queue, lenar.shape, None, len_buf, int32(len(lenar)), h_buf)
    TIMES["Execution"] = perf_counter() - pt
    pt = perf_counter()
    enqueue_copy(queue, histogram, h_buf)
    TIMES["Copying"] = perf_counter() - pt
h_buf.release()
len_buf.release()
print("\n".join("%s:\t%g" % i for i in TIMES.items()))
plt.plot(histogram, ",")
plt.show()
Beispiel #19
0
def get_elwise_program(context, arguments, operation,
        name="elwise_kernel", options=[],
        preamble="", loop_prep="", after_loop="",
        use_range=False):

    if use_range:
        body = r"""//CL//
          if (step < 0)
          {
            for (i = start + (work_group_start + lid)*step;
              i > stop; i += gsize*step)
            {
              %(operation)s;
            }
          }
          else
          {
            for (i = start + (work_group_start + lid)*step;
              i < stop; i += gsize*step)
            {
              %(operation)s;
            }
          }
          """
    else:
        body = """//CL//
          for (i = work_group_start + lid; i < n; i += gsize)
          {
            %(operation)s;
          }
          """

    import re
    return_match = re.search(r"\breturn\b", operation)
    if return_match is not None:
        from warnings import warn
        warn("Using a 'return' statement in an element-wise operation will "
                "likely lead to incorrect results. Use "
                "PYOPENCL_ELWISE_CONTINUE instead.",
                stacklevel=3)

    source = ("""//CL//
        %(preamble)s

        #define PYOPENCL_ELWISE_CONTINUE continue

        __kernel void %(name)s(%(arguments)s)
        {
          int lid = get_local_id(0);
          int gsize = get_global_size(0);
          int work_group_start = get_local_size(0)*get_group_id(0);
          long i;

          %(loop_prep)s;
          %(body)s
          %(after_loop)s;
        }
        """ % {
            "arguments": ", ".join(arg.declarator() for arg in arguments),
            "name": name,
            "preamble": preamble,
            "loop_prep": loop_prep,
            "after_loop": after_loop,
            "body": body % dict(operation=operation),
            })

    from pyopencl import Program
    return Program(context, source).build(options)
Beispiel #20
0
def get_elwise_program(context,
                       arguments,
                       operation,
                       name="elwise_kernel",
                       options=[],
                       preamble="",
                       loop_prep="",
                       after_loop="",
                       use_range=False):

    if use_range:
        body = r"""//CL//
          if (step < 0)
          {
            for (i = start + (work_item_start + lid)*step;
              i > stop; i += gsize*step)
            {
              %(operation)s;
            }
          }
          else
          {
            for (i = start + (work_item_start + lid)*step;
              i < stop; i += gsize*step)
            {
              %(operation)s;
            }
          }
          """
    else:
        body = """//CL//
          for (i = work_item_start + lid; i < n; i += gsize)
          {
            %(operation)s;
          }
          """

    source = ("""//CL//
        %(preamble)s

        __kernel void %(name)s(%(arguments)s)
        {
          int lid = get_local_id(0);
          int gsize = get_global_size(0);
          int work_item_start = get_local_size(0)*get_group_id(0);
          long i;

          %(loop_prep)s;
          %(body)s
          %(after_loop)s;
        }
        """ % {
        "arguments": ", ".join(arg.declarator() for arg in arguments),
        "name": name,
        "preamble": preamble,
        "loop_prep": loop_prep,
        "after_loop": after_loop,
        "body": body % dict(operation=operation),
    })

    from pyopencl import Program
    return Program(context, source).build(options)