def set_opencl(self): #Get all devices that fit requirements #from one platform good_devices = [] good_platform = None for platform in OpenCL.get_platforms(): for device in platform.get_devices(): if device.meets_requirements(): good_devices.append(device) if len(good_devices) > 0: good_platform = platform break #Raise a not-supported exception if there are no good devices if len(good_devices) == 0: raise Exception("This program is not supported on your hardware") #Create a OpenCL context with platform specific properties properties = self.get_context_properties(good_platform) self.context = Context(good_devices, properties=properties) #Create the context queue self.queue = CommandQueue(self.context) #print OpenCL version print "Using OpenCL version: " + str( good_platform.get_info(platform_info.VERSION))
def main(): devices = get_devices() try: debug(CL_DEVICE_TYPE(devices[0].type)) except IndexError as ie: exception(ie) context = Context(devices) queue = CommandQueue(context) # Create queue for each kernel execution source = read_kernel_source("median_filter.cl") program = Program(context, source).build() # Kernel function instantiation image = imread('../data/noisyImage.jpg', flatten=True).astype(float32) # Read in image imshow(image) start_usec = perf_counter() args = allocate_variables(context, image) program.medianFilter(queue, image.shape, None, *args) # Call Kernel. # Automatically takes care of block/grid distribution. Note explicit naming of kernel to execute. result = copy_from_buffer(queue, args[1], image.shape, image.dtype) # Copy the result back from buffer debug("%g milliseconds" % (1e3 * (perf_counter() - start_usec))) imshow(result) imsave('../data/medianFilter-OpenCL.jpg', result) # Show the blurred image
def setUp(self): numpy.random.seed(42) self.ctx = create_some_context(answers=[0, 0]) self.queue = CommandQueue(self.ctx) self.image = CLImage(self.ctx, (256, 256)) self.solver = FastBoxCounting()
def rotate(angle, ctx, in_img_buf, out_img_buf, h, w, prg): times = {} pt = perf_counter() with CommandQueue(ctx) as queue: prg.rotate_(queue, (w, h), None, in_img_buf, out_img_buf, w, h, float32(angle)) times["Execution"] = perf_counter() - pt pt = perf_counter() dest = zeros((w, h, 4), dtype=uint8) enqueue_copy(queue, dest, out_img_buf, origin=(0, 0), region=(w, h)) times["Copying"] = perf_counter() - pt print("\n".join("%s:\t%g" % i for i in times.items())) return dest
def find_set(x0, y0, dx, dy, w, h, ctx, buf, prg): """Find Mandelbrot set""" times = {} pt = perf_counter() with CommandQueue(ctx) as queue: prg.mandelbrot(queue, (w, h), None, buf, int32(w), int32(h), float64(x0), float64(y0), float64(dx), float64(dy)) times["Execution"] = perf_counter() - pt pt = perf_counter() dest = zeros((h, w, 4), dtype=uint8) enqueue_copy(queue, dest, buf, origin=(0, 0), region=(w, h)) times["Copying"] = perf_counter() - pt print("\n".join("%s:\t%g" % i for i in times.items())) return dest
def __init__(self, model, Observer=CLObserver, Propagator=CL1Propagator, ctx=None): if not ctx: ctx = create_some_context() self.model = model self.grid_array = self.model.build_grid() with CommandQueue(ctx) as queue: self.grid = to_device(queue, self.grid_array) self.observer = Observer(model, ctx=ctx) self.propagator = Propagator(model, ctx=ctx) self.candidate = self.observer.observe(self.grid)[1:] self.done = False
def __init__(self, **ctx_kw_args): print(""" \t############ WELCOME TO CHIMERA.CL ############ """) if ctx_kw_args == {}: print(""" \t CONTEXT IS NOT CHOSEN, PLEASE, DO IT NOW. \t TO AVOID BEING ASKED IN THE FUTURE, YOU MAY \t SPECIFY ARGUMENT OF COMMUNICATOR, e.g. \t comm = Communicator(answers=[0,2]) \t################################################ """) ctx_kw_args['interactive'] = True self.ctx = create_some_context(**ctx_kw_args) self.queue = CommandQueue(self.ctx) api = ocl_api() self.thr = api.Thread(cqd=self.queue) selected_dev = self.queue.device self.dev_type = device_type.to_string(selected_dev.type) self.dev_name = self.queue.device.name self.plat_name = selected_dev.platform.vendor self.ocl_version = selected_dev.opencl_c_version print(""" \t {} DEVICE {} IS CHOSEN \t ON {} PLATFORM \t WITH {} COMPILER """.format(self.dev_type, self.dev_name, self.plat_name, self.ocl_version)) if self.dev_type == 'CPU' and self.plat_name == 'Apple': print('\t\tReikna FFT is replaced by pyFFTW') self.fft_method = 'pyFFTW' else: self.fft_method = 'Reikna' if self.dev_type == 'CPU': print('\t\tReikna MatrixMul is replaced by numpy.dot') self.dot_method = 'NumPy' else: self.dot_method = 'Reikna'
#! /usr/bin/env python3 # Calculation of π using quadrature. Using PyOpenCL. # # Copyright © 2012, 2014 Russel Winder from time import time from pyopencl import create_some_context, CommandQueue, Program, Buffer, mem_flags, enqueue_read_buffer import numpy from output import out n = 1000000000 delta = 1.0 / n startTime = time() context = create_some_context() queue = CommandQueue(context) with open('processSlice_opencl.cl', 'r') as f: kernel = Program(context, f.read()).build() # Quadro FX 570 card on Anglides only supports 32-bit operations, hence float not double. results = numpy.array(n, dtype=numpy.float32) buffer = Buffer(context, mem_flags.WRITE_ONLY, results.nbytes) kernel.processSlice(queue, results.shape, None, numpy.int32(n), numpy.float32(delta), buffer) enqueue_read_buffer(queue, buffer, results).wait() pi = 4.0 * delta * numpy.sum(results) elapseTime = time() - startTime out(__file__, pi, n, elapseTime)
n, m, p = 3, 4, 5 a = random.randint(2, size=(n * m)).astype(float32) b = random.randint(2, size=(m * p)).astype(float32) c = zeros((n * p), dtype=float32) TIMES = {} ctx = create_some_context() a_buf = Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) b_buf = Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b) c_buf = Buffer(ctx, mf.WRITE_ONLY, c.nbytes) pt = perf_counter() prg = Program(ctx, load_cl_text("multiply_matr.cl")).build() TIMES["Compilation"] = perf_counter() - pt pt = perf_counter() with CommandQueue(ctx) as queue: prg.multiply(queue, c.shape, None, uint16(n), uint16(m), uint16(p), a_buf, b_buf, c_buf) TIMES["Execution"] = perf_counter() - pt pt = perf_counter() enqueue_copy(queue, c, c_buf) TIMES["Copying"] = perf_counter() - pt a_buf.release() b_buf.release() c_buf.release() print("matrix A:") print(a.reshape(n, m)) print("matrix B:") print(b.reshape(m, p)) print("multiplied A*B:") print(c.reshape(n, p))
def setUp(self): numpy.random.seed(42) self.ctx = create_some_context(answers=[0, 0]) self.queue = CommandQueue(self.ctx)
def setUp(self): self.ctx = create_some_context(answers=[0, 0]) self.queue = CommandQueue(self.ctx)