Python enqueue_read_buffer Beispiele, pyopencl.enqueue_read_buffer Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: main.py Projekt: jweede/UC-CS668-Parallel-Computing-Labs-2011

        def filter_primes(bit_array, offset):

            if (not len(self.primes)):
                return empty_bitarray()

            b = numpy.array(self.primes, dtype=numpy.uint32)
            a = empty_bitarray()
            c = numpy.array(offset, dtype=numpy.uint32)

            b_buf = cl.Buffer(self.ctx,
                              self.mf.READ_ONLY | self.mf.COPY_HOST_PTR,
                              hostbuf=b)
            a_buf = cl.Buffer(self.ctx,
                              self.mf.READ_WRITE | self.mf.COPY_HOST_PTR,
                              hostbuf=a)
            c_buf = cl.Buffer(self.ctx,
                              self.mf.READ_ONLY | self.mf.COPY_HOST_PTR,
                              hostbuf=c)

            # send integers and new bit mask to pfilter
            event2 = self.program.pfilter(self.queue, (self.block_size, ),
                                          None, b_buf, a_buf, c_buf)
            cl.enqueue_read_buffer(self.queue, a_buf, a)

            print 'Filter Duration:', 1e-9 * (event2.profile.end -
                                              event2.profile.start)

            return a

Beispiel #2

0

Datei anzeigen

Datei: test_wrapper.py Projekt: MaybeS/pyopencl

def test_that_python_args_fail(ctx_factory):
    context = ctx_factory()

    prg = cl.Program(context, """
        __kernel void mult(__global float *a, float b, int c)
        { a[get_global_id(0)] *= (b+c); }
        """).build()

    a = np.random.rand(50000)
    queue = cl.CommandQueue(context)
    mf = cl.mem_flags
    a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)

    knl = cl.Kernel(prg, "mult")
    try:
        knl(queue, a.shape, None, a_buf, 2, 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    try:
        prg.mult(queue, a.shape, None, a_buf, float(2), 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3))

    a_result = np.empty_like(a)
    cl.enqueue_read_buffer(queue, a_buf, a_result).wait()

Beispiel #3

0

Datei anzeigen

Datei: main.py Projekt: jameszhao00/lightwayrt

def test_cl():
    ctx = cl.create_some_context()  # (interactive=False)

    # print 'ctx', ctx
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
    f = open("part1.cl", "r")
    fstr = "".join(f.readlines())
    program = cl.Program(ctx, fstr).build()
    mf = cl.mem_flags

    cameraPos = np.array([0, 6, -1, 0])
    invView = la.inv(look_at((0, 6, -1), (0, 1, 1), (0, 1, 0)))
    invProj = la.inv(perspective(60, 1, 1, 1000))
    print "view", invView
    print "proj", invProj
    viewParamsData = (
        cameraPos.flatten().tolist()
        + np.transpose(invView).flatten().tolist()
        + np.transpose(invProj).flatten().tolist()
    )
    # print 'vpd', viewParamsData
    viewParams = struct.pack("4f16f16f", *viewParamsData)
    viewParams_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=viewParams)
    num_pixels = 1000 * 1000
    # setup opencl
    dest = np.ndarray((1000, 1000, 4), dtype=np.float32)
    dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, dest.nbytes)
    local_shape = (8, 8)
    # run kernel
    evt = program.part1(queue, (dest.shape[0], dest.shape[1]), None, viewParams_buf, dest_buf)
    # evt = program.part1(queue, dest.shape, None, dest_buf)
    cl.enqueue_read_buffer(queue, dest_buf, dest).wait()
    print "time", (evt.profile.end - evt.profile.start) * 0.000001, "ms"
    return dest

Beispiel #4

0

Datei anzeigen

    def search(self, midstate):
        msg = flipendian32(midstate)

        for i in xrange(8):
            self.sha512_fill.set_arg(i, msg[i * 4:i * 4 + 4])
        self.sha512_fill.set_arg(8, self.hashes_buf)
        self.sha512_fill.set_arg(9, self.keyhash_buf)
        # t1 = time.time()
        cl.enqueue_nd_range_kernel(self.queue, self.sha512_fill,
                                   (HASHES_NUM, ), (self.sha512_fill_ws, ))
        self.queue.finish()
        # print "fill %f" % (time.time() - t1)

        output = bytearray(OUTPUT_SIZE)
        cl.enqueue_write_buffer(self.queue, self.output_buf, output)
        self.queue.finish()

        self.ksearch.set_arg(0, self.hashes_buf)
        self.ksearch.set_arg(1, self.keyhash_buf)
        self.ksearch.set_arg(2, self.output_buf)
        cl.enqueue_nd_range_kernel(self.queue, self.ksearch, (KEYS_NUM, ),
                                   (self.ksearch_ws, ))
        self.queue.finish()
        cl.enqueue_read_buffer(self.queue, self.output_buf, output)
        self.queue.finish()
        return str(output)

Beispiel #5

0

Datei anzeigen

Datei: broadattack.py Projekt: vanhoefm/broadkey

def test_gpu_aes():
	import pyopencl as cl
	import numpy

	# Prepare context and command queue
	ctx = cl.create_some_context(interactive=False)
	queue = cl.CommandQueue(ctx)

	print "Compiling kernel ..."
	with open_cl("ralink.cl", "r") as fp:
		code = fp.read() % { 'STARTTIME': 0, 'MACADDR1': 0, 'MACADDR2': 0,
				'NONCE1': 0, 'NONCE2': 0, 'NONCE3': 0, 'NONCE4': 0,
				'KEYSTREAM1': 0, 'KEYSTREAM2': 0,}
		program = cl.Program(ctx, code).build(options="-I %s" % get_opencl_path())

	# Prepare memory
	result = numpy.zeros(shape=(8), dtype=numpy.uint32)
	result[0] = 0xffffffff;
	result[1] = 0xffffffff;
	dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY| cl.mem_flags.COPY_HOST_PTR, hostbuf=result)

	# Run the program
	print "Running kernel ..."
	program.test_aes(queue, (1,), None, dest_buf)

	# Read the result
	cl.enqueue_read_buffer(queue, dest_buf, result).wait()
	print list2hex(result)
	assert result[0] == 0xD4E415CB
	assert result[1] == 0xD038A82B
	assert result[2] == 0x10A673DE
	assert result[3] == 0xEA25B206

Beispiel #6

0

Datei anzeigen

def get_iterations(context, complex_values, iterations):
    command_queue = cl.CommandQueue(context)
    output_array = np.zeros(complex_values.shape, dtype=clar.vec.ushort4)

    flags = cl.mem_flags
    complex_values_buffer = cl.Buffer(context,
                                      flags.READ_ONLY | flags.COPY_HOST_PTR,
                                      hostbuf=complex_values)
    gradient_array_buffer = cl.Buffer(context,
                                      flags.READ_ONLY | flags.COPY_HOST_PTR,
                                      hostbuf=gradient)
    output_array_buffer = cl.Buffer(context, flags.WRITE_ONLY,
                                    output_array.nbytes)
    test = open('kernel.cl', 'r')
    program = cl.Program(context, test.read()).build()
    test.close()

    program.mandelbrot(
        command_queue,
        complex_values.shape,
        None,  # Local memory size not specified
        complex_values_buffer,
        output_array_buffer,
        gradient_array_buffer,
        #np.uint(iterations)
    )

    cl.enqueue_read_buffer(command_queue, output_array_buffer,
                           output_array).wait()

    return output_array

Beispiel #7

0

Datei anzeigen

Datei: main.py Projekt: epitron/PyCl-Convergence

	def getData(self):
		if self.tickState == False:
			self.kUtil.GetWorld(self.queue, self.a.shape, None, self.ar_ySize, self.a_buf, self.dest_buf)
			cl.enqueue_read_buffer(self.queue, self.dest_buf, self.a).wait()
		else:
			self.kUtil.GetWorld(self.queue, self.a.shape, None, self.ar_ySize, self.b_buf, self.dest_buf)
			cl.enqueue_read_buffer(self.queue, self.dest_buf, self.a).wait()

Beispiel #8

0

Datei anzeigen

    def getData(self, n, axis, data_D, data_H, name):
        """get data from device"""
        cl.enqueue_read_buffer(lbm.queue, data_D, data_H).wait()

        # retrieve mid cell points from cell node data
        if axis == 'x':
            N = lbm.X.size - 1
            X = lbm.X
            y = data_H[:, n]
        elif axis == 'y':
            N = lbm.Y.size - 1
            X = lbm.Y
            y = data_H[n, :]

        x = np.zeros((N))

        for i in range(1, X.size):
            x[i - 1] = (X[i] - X[i - 1]) / 2.0 + X[i - 1]

        self.x = x
        self.y = y
        self.n = n
        self.axis = axis
        self.data_D = data_D
        self.data_H = data_H
        self.name = name

        self.plotLine()

        return

Beispiel #9

0

Datei anzeigen

    def getData(self, data_D, data_H, name):
        """
        plot passed in data as a surface
        """

        #plotting
        fig = mlab.figure(size=(512, 512))

        cl.enqueue_read_buffer(lbm.queue, data_D, data_H).wait()

        # retrieve mid cell points from cell node data
        Nx = lbm.X.size - 1
        Ny = lbm.Y.size - 1
        x = np.zeros((Nx))
        y = np.zeros((Ny))

        for i in range(1, lbm.X.size):
            x[i - 1] = (lbm.X[i] - lbm.X[i - 1]) / 2.0 + lbm.X[i - 1]

        for i in range(1, lbm.Y.size):
            y[i - 1] = (lbm.Y[i] - lbm.Y[i - 1]) / 2.0 + lbm.Y[i - 1]

        s = mlab.surf(x, y, data_H, warp_scale='auto', colormap="jet")
        mlab.axes(s)
        sb = mlab.scalarbar(s, title=name)

        self.s = s
        self.data_D = data_D
        self.data_H = data_H

Beispiel #10

0

Datei anzeigen

Datei: visualangle.py Projekt: RyanHope/gazetools_cl

 def __call__(self, ctx, x1, y1, x2, y2, rx, ry, sw, sh, ez, ex, ey):
     self.build(ctx)
     x1 = np.array(x1, dtype=np.float32, copy=False)
     y1 = np.array(y1, dtype=np.float32, copy=False)
     x2 = np.float32(x2)
     y2 = np.float32(y2)
     ez = np.float32(ez)
     ex = np.float32(ex)
     ey = np.float32(ey)
     rx = np.float32(rx)
     ry = np.float32(ry)
     sw = np.float32(sw)
     sh = np.float32(sh)
     x1_buf = cl.Buffer(self.ctx,
                        cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                        hostbuf=x1)
     y1_buf = cl.Buffer(self.ctx,
                        cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                        hostbuf=y1)
     out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, x1.nbytes)
     queue = cl.CommandQueue(self.ctx)
     self.prg.subtended_angle2_naive(queue, x1.shape, None, x1_buf, y1_buf,
                                     x2, y2, rx, ry, sw, sh, ez, ex, ey,
                                     out_buf)
     out = np.empty_like(x1)
     cl.enqueue_read_buffer(queue, out_buf, out).wait()
     x1_buf.release()
     y1_buf.release()
     out_buf.release()
     return out

Beispiel #11

0

Datei anzeigen

    def clFindRoute(self, key, candidateTableList):
        timeKernelHash = 0
        self.pos = np.array(-1, dtype=np.int32)

        cl.enqueue_write_buffer(self.queue, self.pos_buf, self.pos)

        for elem in candidateTableList:
            key.prefixlen = (elem)
            ip = int(key.network)

            event = self.program.match(self.queue,
                                   self.tableShape[elem-1],
                                   None,
                                   self.table_buf[elem-1],
                                   self.pos_buf,
                                   np.int32(ip)
                                   )
            event.wait()
            cl.enqueue_read_buffer(self.queue, self.pos_buf, self.pos)
            timeKernelHash += event.profile.end - event.profile.start
            if (self.pos != -1):
                break
        # print("Measured Time kernel Hash (eventProfiler OpenCL function): {:5.8f}"
        #       .format(1e-9*timeKernelHash))
        return [elem, self.pos]

Beispiel #12

0

Datei anzeigen

    def compute(self, floatimage, histogram, k):
        width, height, nbins = np.shape(histogram)
        numpixels = width * height

        image_linear = np.reshape(floatimage, (numpixels, )).astype(np.float32)
        histogram_linear = np.reshape(
            histogram, (np.size(histogram), )).astype(np.float32)
        transform = np.zeros_like(image_linear).astype(np.float32)

        mf = cl.mem_flags
        self.buf_image = cl.Buffer(self.context,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=image_linear)
        self.buf_histogram = cl.Buffer(self.context,
                                       mf.READ_ONLY | mf.COPY_HOST_PTR,
                                       hostbuf=histogram_linear)
        self.output_buf = cl.Buffer(self.context, mf.READ_WRITE,
                                    transform.nbytes)

        kernel = self.program.IIF
        kernel.set_scalar_arg_dtypes([np.uintc, np.uintc, np.float32] +
                                     [None] * 3)
        kernel.set_arg(0, np.uintc(width))
        kernel.set_arg(1, np.uintc(height))
        kernel.set_arg(2, np.float32(k))
        kernel.set_arg(3, self.buf_image)
        kernel.set_arg(4, self.buf_histogram)
        kernel.set_arg(5, self.output_buf)

        cl.enqueue_nd_range_kernel(self.queue, kernel, image_linear.shape,
                                   None).wait()

        cl.enqueue_read_buffer(self.queue, self.output_buf, transform).wait()
        return np.reshape(transform, (width, height)).astype(np.float)

Beispiel #13

0

Datei anzeigen

    def compute(self, image, num_bins):
        width, height = np.shape(image)
        numpixels = width * height

        image = np.reshape(image, (numpixels, )).astype(np.float32)
        result = np.zeros((numpixels * num_bins, ), dtype=np.float32)

        mf = cl.mem_flags
        self.buf_image = cl.Buffer(self.context,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=image)
        self.output_buf = cl.Buffer(self.context, mf.READ_WRITE, result.nbytes)

        kernel = self.program.iif_binid
        kernel.set_scalar_arg_dtypes([np.uintc, np.uintc, np.ubyte] +
                                     [None] * 2)
        kernel.set_arg(0, np.uintc(width))
        kernel.set_arg(1, np.uintc(height))
        kernel.set_arg(2, np.ubyte(num_bins))
        kernel.set_arg(3, self.buf_image)
        kernel.set_arg(4, self.output_buf)

        cl.enqueue_nd_range_kernel(self.queue, kernel, image.shape,
                                   None).wait()

        cl.enqueue_read_buffer(self.queue, self.output_buf, result).wait()
        return np.reshape(result, (width, height, num_bins)).astype(np.float32)

Beispiel #14

0

Datei anzeigen

    def FuseRGBD_GPU(self, Image, boneDQ, jointDQ):
        """
        Update the TSDF volume with Image
        :param Image: RGBD image to update to its surfaces
        :param boneDQ: the dual quaternion of bone in new frame
        :param jointDQ: the dual quaternion of joint in new frame
        :param bp: the indexof body part
        :return: none
        """
        # initialize buffers
        #cl.enqueue_write_buffer(self.GPUManager.queue, self.Pose_GPU, Tg)
        cl.enqueue_write_buffer(self.GPUManager.queue, self.DepthGPU,
                                Image.depth_image)
        cl.enqueue_write_buffer(self.GPUManager.queue, self.boneDQGPU, boneDQ)
        cl.enqueue_write_buffer(self.GPUManager.queue, self.jointDQGPU,
                                jointDQ)

        # fuse data of the RGBD imnage with the TSDF volume 3D model
        self.GPUManager.programs['FuseTSDF'].FuseTSDF(self.GPUManager.queue, (self.Size[0], self.Size[1]), None, \
                                self.TSDFGPU, self.DepthGPU, self.Param, self.Size_Volume, self.Pose_GPU, \
                                self.boneDQGPU, self.jointDQGPU, self.planeF,\
                                self.Calib_GPU, np.int32(Image.Size[0]), np.int32(Image.Size[1]),self.WeightGPU)

        # update CPU array. Read the buffer to write in the CPU array.
        cl.enqueue_read_buffer(self.GPUManager.queue, self.TSDFGPU,
                               self.TSDF).wait()
        '''
        # TEST if TSDF contains NaN
        TSDFNaN = np.count_nonzero(np.isnan(self.TSDF))
        print "TSDFNaN : %d" %(TSDFNaN)
        '''
        cl.enqueue_read_buffer(self.GPUManager.queue, self.WeightGPU,
                               self.Weight).wait()

Beispiel #15

0

Datei anzeigen

Datei: openclpow.py Projekt: Bitmessage/PyBitmessage

def do_opencl_pow(hash, target):
    output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
    if (len(enabledGpus) == 0):
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order='C')
    data[0]['v'] = ("0000000000000000" + hash).decode("hex")
    data[0]['target'] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    start = time.time()
    progress = 0
    globamt = worksize*2000

    while output[0][0] == 0 and shutdown == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        cl.enqueue_read_buffer(queue, dest_buf, output)
        queue.finish()
        progress += globamt
        sofar = time.time() - start
#       logger.debug("Working for %.3fs, %.2f Mh/s", sofar, (progress / sofar) / 1000000)
    if shutdown != 0:
        raise Exception ("Interrupted")
    taken = time.time() - start
#   logger.debug("Took %d tries.", progress)
    return output[0][0]

Beispiel #16

0

Datei anzeigen

Datei: GPU_Processor.py Projekt: erikhazzard/OpenCL-RPC-Example

    def execute(self, params):
        ''' This handles the actual execution for the processing, which would
        get executed on each request - this is where we care about the
        performance
        '''

        timing.timings.start('load')
        self.load_program(params)
        timing.timings.stop('load')
        finish = timing.timings.timings['load']['timings'][-1]
        print '<<< Loaded program in %s ms' % (finish)

        timing.timings.start('execute')
        # Start the program
        self.program.worker(self.queue, 
            self.data['income'].shape,
            None,
            self.income_buf,
            self.capGains_buf,
            self.dividendsInterest_buf,
            self.children_buf,
            self.dest_buf,
        )

        # Get an empty numpy array in the shape of the original data
        result = numpy.empty_like(self.data['income'])

        #Wait for result
        cl.enqueue_read_buffer(self.queue, self.dest_buf, result).wait()

        #show timing info
        timing.timings.stop('execute')
        finish = timing.timings.timings['execute']['timings'][-1]
        print '<<< Executed in %s ms' % (finish)
        return result

Beispiel #17

0

Datei anzeigen

    def randomfill(self):
        t = getTime()
        mf = cl.mem_flags
        self.inputBuf = [
            cl.Buffer(self.ctx,
                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                      hostbuf=self.img[i]) for i in [0, 1]
        ]
        self.outputBuf = cl.Buffer(self.ctx,
                                   mf.WRITE_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=self.nff)

        self.program.randomfill(
            self.queue,
            self.effectiveSize,
            None,
            numpy.int32(self.patchSize[0]),  #patchHeight
            numpy.int32(self.patchSize[1]),  #patchWidth
            numpy.int32(self.size[0]),  #height
            numpy.int32(self.size[1]),  #width
            self.inputBuf[0],
            self.inputBuf[1],
            self.outputBuf)
        c = numpy.empty_like(self.nff)
        cl.enqueue_read_buffer(self.queue, self.outputBuf, c).wait()
        self.nff = numpy.copy(c)

        self.times["randomfill"] += getTime() - t

Beispiel #18

0

Datei anzeigen

def do_opencl_pow(hash, target):
	output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
	if (ctx == False):
		return output[0][0]
	
	data = numpy.zeros(1, dtype=hash_dt, order='C')
	data[0]['v'] = ("0000000000000000" + hash).decode("hex")
	data[0]['target'] = target
	
	hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
	dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)
	
	kernel = program.kernel_sha512
	worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, gpus[0])

	kernel.set_arg(0, hash_buf)
	kernel.set_arg(1, dest_buf)

	start = time.time()
	progress = 0
	globamt = worksize*2000

	while output[0][0] == 0:
		kernel.set_arg(2, pack("<Q", progress))
		cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
		cl.enqueue_read_buffer(queue, dest_buf, output)
		queue.finish()
		progress += globamt
		sofar = time.time() - start
		print sofar, progress / sofar, "hashes/sec"
	taken = time.time() - start
	print progress, taken
	return output[0][0]

Beispiel #19

0

Datei anzeigen

Datei: lib_gpu.py Projekt: Jak23/modular

 def execute(self, *args, **kwargs):
     self.load_data(*args, **kwargs)
     self.program.program__(self.queue, self.a.shape, None, self.a_buf,
                            self.b_buf, self.dest_buf)
     c = np.empty_like(self.a)
     cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
     return c

Beispiel #20

0

Datei anzeigen

Datei: testgen.py Projekt: sleepnezz/sparkcl

def map_function(data):
    proc = subprocess.Popen(["../bin/get-host-platform-device.sh"], stdout=subprocess.PIPE, shell=True)
    (proc_out, err) = proc.communicate()
    [SPARKCL_PLATFORM , SPARKCL_DEVICE] = proc_out.split()

    KERNEL_CODE="""
        __kernel void ArraySum(__global float *A,__global float *B,__global float *C){
       int i = get_global_id(0);
       C[i] = A[i]+B[i];
}

    """

    cl_device=cl.get_platforms()[int(SPARKCL_PLATFORM)].get_devices()[int(SPARKCL_DEVICE)]
    ctx = cl.Context([cl_device])
    queue = cl.CommandQueue(ctx)
    prg = cl.Program(ctx, KERNEL_CODE).build()
    kernel = prg.ArraySum
    mf = cl.mem_flags

    np_data = []
    np_data.append(np.array(data[0]).astype(np.float32))
    np_data.append(np.array(data[1]).astype(np.float32))

    data_buf = []
    data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[0]))
    data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[1]))

    result = np.zeros((5, )).astype(np.float32)
    result_buf = cl.Buffer(ctx, mf.WRITE_ONLY, result.nbytes)

    kernel(queue,(5,),None,data_buf[0],data_buf[1],result_buf)
    cl.enqueue_read_buffer(queue, result_buf, result).wait()
    return result

Beispiel #21

0

Datei anzeigen

Datei: CL.py Projekt: naojsoft/ginga

    def transform_uint32(self, data_np,
                         flip_x=False, flip_y=False, swap_xy=False,
                         out=None):

        height, width = data_np.shape[:2]

        new_ht, new_wd = height, width
        if swap_xy:
            new_ht, new_wd = width, height
        new_size = [new_ht, new_wd] + list(data_np.shape[2:])

        mf = cl.mem_flags

        #create OpenCL buffers on devices
        data_np = np.ascontiguousarray(data_np)
        src_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
                            hostbuf=data_np)
        dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, data_np.nbytes)

        evt = self.program.image_transform_uint32(self.queue, [height, width], None,
                                                  src_buf, dst_buf,
                                                  np.int32(width), np.int32(height),
                                                  np.int32(flip_x), np.int32(flip_y),
                                                  np.int32(swap_xy))

        if out is None:
            out = np.empty_like(data_np).reshape(new_size)
        cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()

        return out

Beispiel #22

0

Datei anzeigen

Datei: openclpow.py Projekt: N0U/PyBitmessage

def do_opencl_pow(hash, target):
	output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
	if (ctx == False):
		return output[0][0]
	
	data = numpy.zeros(1, dtype=hash_dt, order='C')
	data[0]['v'] = ("0000000000000000" + hash).decode("hex")
	data[0]['target'] = target
	
	hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
	dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)
	
	kernel = program.kernel_sha512
	worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, cl.get_platforms()[0].get_devices()[1])

	kernel.set_arg(0, hash_buf)
	kernel.set_arg(1, dest_buf)

	start = time.time()
	progress = 0
	globamt = worksize*2000

	while output[0][0] == 0:
		kernel.set_arg(2, pack("<Q", progress))
		cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
		cl.enqueue_read_buffer(queue, dest_buf, output)
		queue.finish()
		progress += globamt
		sofar = time.time() - start
		print sofar, progress / sofar, "hashes/sec"
	taken = time.time() - start
	print progress, taken
	return output[0][0]

Beispiel #23

0

Datei anzeigen

Datei: fdtd3d_gpu_cpu_non-pinned.py Projekt: wbkifun/fdtd_accelerate

	def exchange_boundary_h(s):
		for queue, eh_fields, tmpf, offset in zip(s.queues, s.eh_fields_gpus, s.tmpfs, s.offsets)[:-1]:
			cl.enqueue_read_buffer(queue, eh_fields[4], tmpf[0], offset)	# hy_gpu
			cl.enqueue_read_buffer(queue, eh_fields[5], tmpf[1], offset)	# hz_gpu
		for queue, eh_fields, tmpf in zip(s.queues[1:], s.eh_fields_gpus[1:], s.tmpfs[:-1]):
			cl.enqueue_write_buffer(queue, eh_fields[4], tmpf[0])
			cl.enqueue_write_buffer(queue, eh_fields[5], tmpf[1])

Beispiel #24

0

Datei anzeigen

Datei: test.py Projekt: roundrobin/OpenCL-Benchmark-Tests

    def execute(self):
        """ This handles the actual execution for the processing, which would
        get executed on each request - this is where we care about the
        performance
        """
        timing.timings.start("execute")

        # Start the program
        self.program.worker(self.queue, self.data1.shape, None, self.data1_buf, self.data2_buf, self.dest_buf)

        # Get an empty numpy array in the shape of the original data
        result = numpy.empty_like(self.data1)

        # Wait for result
        cl.enqueue_read_buffer(self.queue, self.dest_buf, result).wait()

        # show timing info
        timing.timings.stop("execute")
        finish = timing.timings.timings["execute"]["timings"][-1]
        print "<<< DONE in %s" % (finish)

        # Open data file to append to
        data_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../data.csv"), "a")
        data_file.write("PyOpenCl %s,%s,%s,%s\n" % (process_type, finish, num_records, num_calculations))
        data_file.close()

Beispiel #25

0

Datei anzeigen

Datei: locator_cl.py Projekt: crosslore/cuauv_software

    def update(self, sub_pos, angle, min_dist, max_dist, width, in_weight,
               out_weight):
        '''
        Perform one update on the probabilities by using the evidence that
        the sub is at position sub_pos, the target is seen at an absolute heading
        of `angle` and is most likely between min_dist and max_dist away.
        in_weight gives the chance that for every point in the region,
        if the buoy is there then we would get this result
        i.e. in_weight = P(this measurement | buoy at point p) for p in our region
        out_weight is the same but for points outside the region
        '''

        n, e = sub_pos
        cl_program.evidence(cl_queue, self.norths.shape, None, self.norths_buf,
                            self.easts_buf, self.prob_buf, float32(n),
                            float32(e), float32(radians(angle)),
                            float32(min_dist**2), float32(max_dist**2),
                            float32(width), float32(in_weight),
                            float32(out_weight))
        #TODO ?
        cl.enqueue_read_buffer(cl_queue, self.prob_buf,
                               self.probabilities).wait()

        #Normalize
        total_prob = numpy.sum(self.probabilities)
        self.probabilities /= total_prob

        cl.enqueue_write_buffer(cl_queue, self.prob_buf, self.probabilities)

Beispiel #26

0

Datei anzeigen

Datei: lib_gpu.py Projekt: ctogle/modular

	def execute(self, *args, **kwargs):
		self.load_data(*args, **kwargs)
		self.program.program__(self.queue, self.a.shape, None, 
						self.a_buf, self.b_buf, self.dest_buf)
		c = np.empty_like(self.a)
		cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
		return c

Beispiel #27

0

Datei anzeigen

Datei: OCLLingo.py Projekt: ihaque/SIML

    def getMultipleRows(self,rowbase,rowlimit): #{{{
        """Computes multiple Tanimoto rows *rowbase:rowlimit* corresponding to comparing every SMILES string
        in the query set with the reference SMILES strings having index *row*, *row+1*, ..., *rowlimit-1* in the reference set,
        and stores this block as the most recent asynchronous result.

        This method is synchronous (it will not return until the block has been completely computed).
        """
        if rowbase < 0 or rowlimit > self.nref:
            raise

        # Pad rows out to 64 byte pitch
        rowpitchInFloat = 16*((self.nquery+15)/16)

        # Using pagelocked memory and async copy seems to actually slow us down
        # on large tiled calculations
        self.resultmatrix = numpy.empty((rowlimit-rowbase,rowpitchInFloat),dtype=numpy.float32)
        self.gpu.gpumatrix = cl.Buffer(self.gpu.context,cl.mem_flags.WRITE_ONLY,size=self.resultmatrix.nbytes)

        # With precalculated magnitudes
        lmem_bytes = int(2*4*max(self.rlengths[rowbase:rowlimit]))
        threads_per_block = 192
        self.gpu.multiRowKernel(self.gpu.queue,(threads_per_block*(rowlimit-rowbase),),
                                               self.gpu.rsmiles,self.gpu.rcounts,self.gpu.rl_gpu,self.gpu.rmag_gpu,
                                               self.refPitchInInt,
                                               self.gpu.qsmiles,self.gpu.qcounts,self.gpu.ql_gpu,self.gpu.qmag_gpu,
                                               self.qPitchTInInt,
                                               self.gpu.gpumatrix, numpy.int32(rowpitchInFloat),
                                               numpy.int32(self.qshape[0]),numpy.int32(self.qshape[1]),numpy.int32(rowbase),
                                               cl.LocalMemory(lmem_bytes),cl.LocalMemory(lmem_bytes),
                                               local_size=(threads_per_block,))

        cl.enqueue_read_buffer(self.gpu.queue,self.gpu.gpumatrix,self.resultmatrix).wait()
        return self.resultmatrix[:,0:self.nquery]

Beispiel #28

0

Datei anzeigen

    def FuseRGBD_GPU(self, Image, Pose):
        """
        Update the TSDF volume with Image
        :param Image: RGBD image to update to its surfaces
        :param Pose: transform from the first camera pose to the last camera pose
        :return: none
        """
        # initialize buffers
        cl.enqueue_write_buffer(self.GPUManager.queue, self.Pose_GPU, Pose)
        cl.enqueue_write_buffer(self.GPUManager.queue, self.DepthGPU,
                                Image.depth_image)

        # fuse data of the RGBD imnage with the TSDF volume 3D model
        self.GPUManager.programs['FuseTSDF'].FuseTSDF(self.GPUManager.queue, (self.Size[0], self.Size[1]), None, \
                                self.TSDFGPU, self.DepthGPU, self.Param, self.Size_Volume, self.Pose_GPU, self.Calib_GPU, \
                                np.int32(Image.Size[0]), np.int32(Image.Size[1]),self.WeightGPU)

        # update CPU array. Read the buffer to write in the CPU array.
        cl.enqueue_read_buffer(self.GPUManager.queue, self.TSDFGPU,
                               self.TSDF).wait()
        '''
        # TEST if TSDF contains NaN
        TSDFNaN = np.count_nonzero(np.isnan(self.TSDF))
        print "TSDFNaN : %d" %(TSDFNaN)
        '''
        cl.enqueue_read_buffer(self.GPUManager.queue, self.WeightGPU,
                               self.Weight).wait()

Beispiel #29

0

Datei anzeigen

Datei: test_wrapper.py Projekt: python-jheengut/pyopencl

def test_that_python_args_fail(ctx_factory):
    context = ctx_factory()

    prg = cl.Program(
        context, """
        __kernel void mult(__global float *a, float b, int c)
        { a[get_global_id(0)] *= (b+c); }
        """).build()

    a = np.random.rand(50000)
    queue = cl.CommandQueue(context)
    mf = cl.mem_flags
    a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)

    knl = cl.Kernel(prg, "mult")
    try:
        knl(queue, a.shape, None, a_buf, 2, 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    try:
        prg.mult(queue, a.shape, None, a_buf, float(2), 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3))

    a_result = np.empty_like(a)
    cl.enqueue_read_buffer(queue, a_buf, a_result).wait()

Beispiel #30

0

Datei anzeigen

Datei: sha1c.py Projekt: gfs/ENGS116Proj

 def transform(self):
     """Realizes the calculus"""
     # Prepare the input and output memory
     mf = cl.mem_flags
     
     msg = np.char.array(self.Word_buffer)
     len_array = np.array(self.len).astype(np.int32)
     Hexdigest_array = np.char.array(['']*41*(len(len_array)))
     
     print msg
     print msg.nbytes
     print len_array
     print len_array.nbytes
     print Hexdigest_array
     print Hexdigest_array.nbytes
     
     # Allocate device memory
     msg_buf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, msg.nbytes, msg)
     len_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, len_array.nbytes, len_array)
     Hexdigest_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, Hexdigest_array.nbytes)
     
     # Start OpenCL operation and wait for it to finish
     time1 = datetime.datetime.now()
     self.prg.sha1(self.queue, (len(len_array),), msg_buf, len_buf, Hexdigest_buf)
     cl.enqueue_read_buffer(self.queue, Hexdigest_buf, Hexdigest_array).wait()
     time2 = datetime.datetime.now()
     print "Execution time OpenCL sha1: " + repr((time2 - time1).microseconds/1000) + "ms"
     
     # Convert the result into strings
     for j in range(0,len(Hexdigest_array)/41):
         self.result.append(''.join(Hexdigest_array[j*41 + 0:j*41 + 41]))

Beispiel #31

0

Datei anzeigen

Datei: gpu_countminsketch.py Projekt: CUTEYANSHU/SketchVis

    def dump_batch(self):
        keys = np.array(self.batch.keys(), dtype='S32')
        counts = np.array(self.batch.values(), dtype=np.int32)
        out = np.zeros([self.d, self.w], dtype=np.int32)

        # create the buffers to hold the values of the input
        rand_buf = cl.Buffer(self.ctx,
                             cl.mem_flags.READ_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=self.rand)
        keys_buf = cl.Buffer(self.ctx,
                             cl.mem_flags.READ_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=keys)
        counts_buf = cl.Buffer(self.ctx,
                               cl.mem_flags.READ_ONLY
                               | cl.mem_flags.COPY_HOST_PTR,
                               hostbuf=counts)

        # create output buffer
        out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, out.nbytes)

        # Kernel is now launched
        launch = self.bld.increment(self.queue, (len(keys), self.d), None,
                                    rand_buf, keys_buf, counts_buf, out_buf)
        # wait till the process completes
        launch.wait()

        cl.enqueue_read_buffer(self.queue, out_buf, out).wait()

        self.M += out
        self.batch.clear()

Beispiel #32

0

Datei anzeigen

Datei: main.py Projekt: mastensg/mandel

 def execute(self):
     self.program.part1(self.queue, self.a.shape, None, self.a_buf, self.b_buf, self.dest_buf)
     c = numpy.array(range(10), dtype=numpy.uint32)
     cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
     print "a", self.a
     print "b", self.b
     print "c", c

Beispiel #33

0

Datei anzeigen

    def execute(self):
        '''
        execute an iteration of patchMatch
        '''
        t = getTime()
        mf = cl.mem_flags
        self.inputBuf = [
            cl.Buffer(self.ctx,
                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                      hostbuf=self.img[i]) for i in [0, 1]
        ]
        self.outputBuf = cl.Buffer(self.ctx,
                                   mf.READ_WRITE | mf.COPY_HOST_PTR,
                                   hostbuf=self.nff)

        self.program.propagate(
            self.queue,
            self.effectiveSize,
            None,
            numpy.int32(self.patchSize[0]),  #patchHeight
            numpy.int32(self.patchSize[1]),  #patchWidth
            numpy.int32(self.size[0]),  #height
            numpy.int32(self.size[1]),  #width
            numpy.int32(self.iteration),
            self.inputBuf[0],
            self.inputBuf[1],
            self.outputBuf)
        c = numpy.empty_like(self.nff)
        cl.enqueue_read_buffer(self.queue, self.outputBuf, c).wait()
        self.nff = numpy.copy(c)

        self.times["execute"] += getTime() - t

Beispiel #34

0

Datei anzeigen

def gpu_array_sum(a, b):
    context = cl.create_some_context()  # Initialize the Context
    queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE)  # Instantiate a Queue with profiling (timing) enabled
    a_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
    b_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=b)
    c_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, b.nbytes)  # Create three buffers (plans for areas of memory on the device)
    program = cl.Program(context, """
    __kernel void sum(__global const float *a, __global const float *b, __global float *c)
    {
        int i = get_global_id(0);
        int j;
        for(j = 0; j < 1000; j++)
        {
            c[i] = a[i] + b[i];
        }
    }""").build()  # Compile the device program
    gpu_start_time = time()  # Get the GPU start time
    event = program.sum(queue, a.shape, None, a_buffer, b_buffer, c_buffer)  # Enqueue the GPU sum program XXX
    event.wait()  # Wait until the event finishes XXX
    elapsed = 1e-9*(event.profile.end - event.profile.start)  # Calculate the time it took to execute the kernel
    print("GPU Kernel Time: {0} s".format(elapsed))  # Print the time it took to execute the kernel
    c_gpu = np.empty_like(a)  # Create an empty array the same size as array a
    cl.enqueue_read_buffer(queue, c_buffer, c_gpu).wait()  # Read back the data from GPU memory into array c_gpu
    gpu_end_time = time()  # Get the GPU end time
    print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))  # Print the time the GPU program took, including both memory copies
    return c_gpu  # Return the sum of the two arrays

Beispiel #35

0

Datei anzeigen

Datei: PyOpenCLTestApplication.py Projekt: lancelote/parallel_python

def gpu_array_sum(a, b):
    platform = cl.get_platforms()[0]
    device = platform.get_devices()[0]
    context = cl.Context([device])
    queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE)  # Instantiate a Queue with profiling (timing) enabled
    a_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
    b_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=b)
    c_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, b.nbytes)  # Create three buffers (plans for areas of memory on the device)
    program = cl.Program(context, """
    __kernel void sum(__global const float *a, __global const float *b, __global float *c)
    {
        int i = get_global_id(0);
        int j;
        for(j = 0; j < 10000; j++)
        {
            c[i] = a[i] + b[i];
        }
    }""").build()  # Compile the device program
    gpu_start_time = time()  # Get the GPU start time
    event = program.sum(queue, a.shape, None, a_buffer, b_buffer, c_buffer)  # Enqueue the GPU sum program XXX
    event.wait()  # Wait until the event finishes XXX
    elapsed = 1e-9*(event.profile.end - event.profile.start)  # Calculate the time it took to execute the kernel
    print("GPU Kernel Time: {0} s".format(elapsed))  # Print the time it took to execute the kernel
    c_gpu = np.empty_like(a)  # Create an empty array the same size as array a
    cl.enqueue_read_buffer(queue, c_buffer, c_gpu).wait()  # Read back the data from GPU memory into array c_gpu
    gpu_end_time = time()  # Get the GPU end time
    print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))  # Print the time the GPU program took, including both memory copies
    return c_gpu  # Return the sum of the two arrays

Beispiel #36

0

Datei anzeigen

Datei: locator_cl.py Projekt: athityakumar/software

    def update(self, sub_pos, angle, min_dist, max_dist, width, in_weight, out_weight):
        '''
        Perform one update on the probabilities by using the evidence that
        the sub is at position sub_pos, the target is seen at an absolute heading
        of `angle` and is most likely between min_dist and max_dist away.
        in_weight gives the chance that for every point in the region,
        if the buoy is there then we would get this result
        i.e. in_weight = P(this measurement | buoy at point p) for p in our region
        out_weight is the same but for points outside the region
        '''

        n,e = sub_pos
        cl_program.evidence(cl_queue, self.norths.shape, None,
                            self.norths_buf, self.easts_buf, self.prob_buf,
                            float32(n), float32(e),
                            float32(radians(angle)),
                            float32(min_dist**2),
                            float32(max_dist**2),
                            float32(width),
                            float32(in_weight),
                            float32(out_weight))
        #TODO ?
        cl.enqueue_read_buffer(cl_queue, self.prob_buf, self.probabilities).wait()

        #Normalize
        total_prob = numpy.sum( self.probabilities )
        self.probabilities  /= total_prob

        cl.enqueue_write_buffer(cl_queue, self.prob_buf, self.probabilities)

Beispiel #37

0

Datei anzeigen

Datei: openclpow.py Projekt: PeterSurda/PyBitmessage

def do_opencl_pow(hash_, target):
    """Perform PoW using OpenCL"""
    output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
    if not enabledGpus:
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order='C')
    data[0]['v'] = ("0000000000000000" + hash_).decode("hex")
    data[0]['target'] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    progress = 0
    globamt = worksize * 2000

    while output[0][0] == 0 and shutdown == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        try:
            cl.enqueue_read_buffer(queue, dest_buf, output)
        except AttributeError:
            cl.enqueue_copy(queue, output, dest_buf)
        queue.finish()
        progress += globamt
    if shutdown != 0:
        raise Exception("Interrupted")
#   logger.debug("Took %d tries.", progress)
    return output[0][0]

Beispiel #38

0

Datei anzeigen

Datei: clmandel.py Projekt: QuadmasterXLII/MandelBrotGL

 def execute(self, settings):
     self.program.mandel(self.queue, (self.c_real.shape[0], ), None,
                         self.real_buf, self.imag_buf, self.depth_buf,
                         self.dest_buf)
     counts = np.zeros(settings.dim**2, dtype=np.int32)
     cl.enqueue_read_buffer(self.queue, self.dest_buf, counts).wait()
     return counts.reshape([settings.dim, settings.dim])

Beispiel #39

0

Datei anzeigen

Datei: opencl.py Projekt: amiller/rtmodel

def reduce_flatrot():
  sums = np.empty((8,4),'f')  
  evt = program.float4_sum(queue, (64*8,), (64,), 
    reduce_buf, reduce_scratch, 
    qxdyqz_buf, np.int32(length))
  cl.enqueue_read_buffer(queue, reduce_buf, sums).wait()
  return sums.sum(0)

Beispiel #40

0

Datei anzeigen

Datei: sparkcl_tmp2.py Projekt: sleepnezz/sparkcl

def map1(data):

    SPARKCL_PLATFORM = os.environ['CL_PLATFORM']
    SPARKCL_DEVICE = os.environ['CL_DEVICE']
    print str(SPARKCL_PLATFORM)+":"+str(SPARKCL_DEVICE)
    KERNEL_CODE="""
        __kernel void ArraySum(__global float *A,__global float *B,__global float *C){
       int i = get_global_id(0);
       C[i] = A[i]+B[i];
}

    """

    cl_device=cl.get_platforms()[int(SPARKCL_PLATFORM)].get_devices()[int(SPARKCL_DEVICE)]
    ctx = cl.Context([cl_device])
    queue = cl.CommandQueue(ctx)
    prg = cl.Program(ctx, KERNEL_CODE).build()
    kernel = prg.ArraySum
    mf = cl.mem_flags
    print "map" + str(data)

    np_data = []
    data_buf = []
    np_data.append(np.array(data[0]).astype(np.float32))
    data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[0]))
    np_data.append(np.array(data[1]).astype(np.float32))
    data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[1]))

    result = np.zeros((5,)).astype(np.float32)
    result_buf = cl.Buffer(ctx, mf.WRITE_ONLY, result.nbytes)

    kernel(queue,(5,),None,data_buf[0],data_buf[1],result_buf)
    cl.enqueue_read_buffer(queue, result_buf, result).wait()
    return [result.astype(np.float32)]

Beispiel #41

0

Datei anzeigen

Datei: __init__.py Projekt: lid/phoenix-miner

 def mineThread(self):
     for data in self.qr:
         for i in range(data.iterations):
             self.kernel.search(
                 self.commandQueue, (data.size, ), (self.WORKSIZE, ),
                 data.state[0], data.state[1], data.state[2], data.state[3],
                 data.state[4], data.state[5], data.state[6], data.state[7],
                 data.state2[1], data.state2[2], data.state2[3],
                 data.state2[5], data.state2[6], data.state2[7],
                 data.base[i],
                 data.f[0],
                 data.f[1],data.f[2],
                 data.f[3],data.f[4],
                 self.output_buf)
             cl.enqueue_read_buffer(
                 self.commandQueue, self.output_buf, self.output)
             self.commandQueue.finish()
             
             # The OpenCL code will flag the last item in the output buffer when
             # it finds a valid nonce. If that's the case, send it to the main
             # thread for postprocessing and clean the buffer for the next pass.
             if self.output[self.OUTPUT_SIZE]:
                 reactor.callFromThread(self.postprocess, self.output.copy(),
                 data.nr)
         
                 self.output.fill(0)
                 cl.enqueue_write_buffer(
                     self.commandQueue, self.output_buf, self.output)

Beispiel #42

0

Datei anzeigen

 def plotCurrentMembraneCoordinates(self):
     cl.enqueue_read_buffer(self.queue, self.dev_membraneCoordinatesX.data,
                            self.host_membraneCoordinatesX).wait()
     cl.enqueue_read_buffer(self.queue, self.dev_membraneCoordinatesY.data,
                            self.host_membraneCoordinatesY).wait()
     plt.plot(self.host_membraneCoordinatesX,
              self.host_membraneCoordinatesY)

Beispiel #43

0

Datei anzeigen

Datei: CL.py Projekt: naojsoft/ginga

    def resize_uint32(self, data_np, scale_x, scale_y, out=None):

        height, width = data_np.shape[:2]

        new_ht = int(height * scale_y)
        new_wd = int(width * scale_x)
        new_shape = [new_ht, new_wd] + list(data_np.shape[2:])

        mf = cl.mem_flags

        #create OpenCL buffers on devices
        data_np = np.ascontiguousarray(data_np)
        src_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
                            hostbuf=data_np)

        num_bytes = new_ht * new_wd * np.uint32(0).nbytes
        dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, num_bytes)

        evt = self.program.image_resize_uint32(self.queue, [new_ht, new_wd], None,
                                               src_buf, dst_buf,
                                               np.int32(width), np.int32(new_wd),
                                               np.float64(scale_x), np.float64(scale_y))

        if out is None:
            out = np.empty(new_shape, dtype=data_np.dtype)
        cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()

        return out

Beispiel #44

0

Datei anzeigen

Datei: pyopencl_imp.py Projekt: thusodangersimon/lomb_scargle

def lombscarge_opencl(x, y, f):
    # start up gpu
    x = np.float64(x)
    y = np.float64(y)
    f = np.float64(f)
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mf = cl.mem_flags
    # make max arrays
    Nx, Nf = np.int32(x.shape[0]), np.int32(f.shape[0])
    # send data to card
    x_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x)
    y_g = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=y)
    f_g = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=f)
    # make output
    pgram = np.empty_like(f)
    pgram_g = cl.Buffer(ctx, mf.WRITE_ONLY, pgram.nbytes)
    prg = cl.Program(ctx, lomb_txt)

    try:
        prg.build()
    except:
        print("Error:")
        print(prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG))
        raise

    prg.lombscargle(queue, pgram.shape, None, x_g, y_g, f_g, pgram_g, Nx)
    cl.enqueue_read_buffer(queue, pgram_g, pgram)

    return pgram

Beispiel #45

0

Datei anzeigen

Datei: test_opencl_benchmark_fp64_transmath.py Projekt: GongYiLiao/Python_Daily

def test_opencl_0(zz, a, b, c_result):
 
    for platform in cl.get_platforms():
        for device in [platform.get_devices()[1]]:
            print("===============================================================")
            print("Platform name:", platform.name)
            print("Platform profile:", platform.profile)
            print("Platform vendor:", platform.vendor)
            print("Platform version:", platform.version)
            print("---------------------------------------------------------------")
            print("Device name:", device.name)
            print("Device type:", cl.device_type.to_string(device.type))
            print("Device memory: ", device.global_mem_size//1024//1024, 'MB')
            print("Device max clock speed:", device.max_clock_frequency, 'MHz')
            print("Device compute units:", device.max_compute_units)

        # Simnple speed test
            ctx = cl.Context([device])
            queue = cl.CommandQueue(ctx, 
                                    properties=cl.command_queue_properties.PROFILING_ENABLE)

            mf = cl.mem_flags
            a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
            b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
            dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)

            prg = cl.Program(ctx, """
                __kernel void sum(__global const double *a,
                __global const double *b, __global double *c)
                {
                            int loop;
                            int gid = get_global_id(0);
                            for(loop=0; loop<%s;loop++)
                            {
                                    c[gid] = a[gid] + b[gid];
                                    c[gid] = c[gid] * (a[gid] + b[gid]);
                                    c[gid] = c[gid] * (a[gid] / 2);
                                    c[gid] = log(exp(c[gid]));
                            }
                }
            """ % (zz)).build()

            exec_evt = prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
            exec_evt.wait()
            elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start)

            print("Execution time of test: %g s" % elapsed)

            c = numpy.empty_like(a)
            cl.enqueue_read_buffer(queue, dest_buf, c).wait()
            error = 0
            for i in range(zz):
                if c[i] != c_result[i]:
                        print("c_i: ", c[i], " c_results_i: ", c_result[i]) 
                        print("diff: ", numpy.abs(c[i] - c_result[i]))
                        error = 1
            if error:
                print("Results doesn't match!!")
            else:
                print("Results OK")

Beispiel #46

0

Datei anzeigen

Datei: lomb_scargle.py Projekt: drdangersimon/seismo

def lomb_scargle32(x, y, f):
    '''single percesion version of lomb-scargle'''
    x = np.float32(x)
    y = np.float32(y)
    f = np.float32(f)
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mf = cl.mem_flags
    # make max arrays
    Nx, Nf = np.int32(x.shape[0]), np.int32(f.shape[0])
    # send data to card
    x_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x)
    y_g = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=y)
    f_g = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=f)
    # make output
    pgram = np.empty_like(f)
    pgram_g = cl.Buffer(ctx, mf.WRITE_ONLY, pgram.nbytes)
    prg = cl.Program(ctx, lomb_txt32)

    try:
        prg.build()
    except:
        # 
        print("Error:")
        print(prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG))
        raise

    prg.lombscargle(queue, pgram.shape, None, x_g, y_g, f_g, pgram_g, Nx)
    cl.enqueue_read_buffer(queue, pgram_g, pgram)

    return pgram

Beispiel #47

0

Datei anzeigen

Datei: fdtd3d_gpu_cpu_non-pinned.py Projekt: wbkifun/fdtd_accelerate

	def exchange_boundary_e(s):
		for queue, eh_fields, tmpf in zip(s.queues, s.eh_fields_gpus, s.tmpfs)[1:]:
			cl.enqueue_read_buffer(queue, eh_fields[1], tmpf[0])	# ey_gpu
			cl.enqueue_read_buffer(queue, eh_fields[2], tmpf[1])	# ez_gpu
		for queue, eh_fields, tmpf, offset in zip(s.queues[:-1], s.eh_fields_gpus[:-1], s.tmpfs[1:], s.offsets[:-1]):
			cl.enqueue_write_buffer(queue, eh_fields[1], tmpf[0], offset)
			cl.enqueue_write_buffer(queue, eh_fields[2], tmpf[1], offset)

Beispiel #48

0

Datei anzeigen

Datei: opencl-mm.py Projekt: vdragsic/opencl-parallelization

    def execute(self):
        """
        Runs test openCL kernel and returns elapsed time.
        """

        kernel = self.LoadKernelSrc(self.src)

        # build opencl kernel
        prg = cl.Program(self.ctx, kernel).build()

        exec_evt = prg.matrix_mul(
            self.queue,
            (
                self.m,
                self.p,
            ),
            self.A_buf,
            self.B_buf,
            self.C_buf,
            np.uint32(self.m),
            np.uint32(self.n),
            np.uint32(self.p),
            local_size=(
                self.block,
                self.block,
            ),
        ).wait()

        # read result from opencl buffer
        cl.enqueue_read_buffer(self.queue, self.C_buf, self.C).wait()

        # return elapsed time in seconds
        return 1e-9 * (exec_evt.profile.end - exec_evt.profile.start)

Beispiel #49

0

Datei anzeigen

Datei: openclpow.py Projekt: Basti1993/PyBitmessage

def do_opencl_pow(hash, target):
    global ctx, queue, program, gpus, hash_dt

    output = numpy.zeros(1, dtype=[("v", numpy.uint64, 1)])
    if ctx == False:
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order="C")
    data[0]["v"] = ("0000000000000000" + hash).decode("hex")
    data[0]["target"] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, gpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    start = time.time()
    progress = 0
    globamt = worksize * 2000

    while output[0][0] == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        cl.enqueue_read_buffer(queue, dest_buf, output)
        queue.finish()
        progress += globamt
        sofar = time.time() - start
    # 		logger.debug("Working for %.3fs, %.2f Mh/s", sofar, (progress / sofar) / 1000000)
    taken = time.time() - start
    # 	logger.debug("Took %d tries.", progress)
    return output[0][0]

Beispiel #50

0

Datei anzeigen

Datei: visualangle.py Projekt: RyanHope/gazetools_cl

 def __call__(self, ctx, x, y, rx, ry, sw, sh, ez, ex, ey):
     self.build(ctx)
     x = np.array(x, dtype=np.float32, copy=False)
     y = np.array(y, dtype=np.float32, copy=False)
     ez = np.array(ez, dtype=np.float32, copy=False)
     ex = np.array(ex, dtype=np.float32, copy=False)
     ey = np.array(ey, dtype=np.float32, copy=False)
     rx = np.float32(rx)
     ry = np.float32(ry)
     sw = np.float32(sw)
     sh = np.float32(sh)
     x_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x)
     y_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y)
     ez_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ez)
     ex_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ex)
     ey_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ey)
     out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, x.nbytes)
     queue = cl.CommandQueue(self.ctx)
     self.prg.distance_2_point(queue, x.shape, None, x_buf, y_buf, rx, ry, sw, sh, ez_buf, ex_buf, ey_buf, out_buf)
     out = np.empty_like(x)
     cl.enqueue_read_buffer(queue, out_buf, out).wait()
     x_buf.release()
     y_buf.release()
     ez_buf.release()
     ex_buf.release()
     ey_buf.release()
     out_buf.release()
     return out

Beispiel #51

0

Datei anzeigen

Datei: ode45.py Projekt: ezetl/Ligo_RungeKutta_OpenCL

 def copy_array(self, arr_like, arr_device):
     """
     This copy an array from device to host and returns it.
     """
     c = np.empty_like(arr_like)
     cl.enqueue_read_buffer(self.queue, arr_device, c).wait()
     return c

Beispiel #52

0

Datei anzeigen

Datei: visualangle.py Projekt: RyanHope/gazetools_cl

 def __call__(self, ctx, x1, y1, x2, y2, rx, ry, sw, sh, ez, ex, ey):
     self.build(ctx)
     x1 = np.array(x1, dtype=np.float32, copy=False)
     y1 = np.array(y1, dtype=np.float32, copy=False)
     x2 = np.array(x2, dtype=np.float32, copy=False)
     y2 = np.array(y2, dtype=np.float32, copy=False)
     ez = np.array(ez, dtype=np.float32, copy=False)
     ex = np.array(ex, dtype=np.float32, copy=False)
     ey = np.array(ey, dtype=np.float32, copy=False)
     rx = np.float32(rx)
     ry = np.float32(ry)
     sw = np.float32(sw)
     sh = np.float32(sh)
     x1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x1)
     y1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y1)
     x2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x2)
     y2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y2)
     ez_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ez)
     ex_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ex)
     ey_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ey)
     out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, x1.nbytes)
     queue = cl.CommandQueue(self.ctx)
     self.prg.subtended_angle_naive(queue, x1.shape, None, x1_buf, y1_buf, x2_buf, y2_buf, rx, ry, sw, sh, ez_buf, ex_buf, ey_buf, out_buf)
     out = np.empty_like(x1)
     cl.enqueue_read_buffer(queue, out_buf, out).wait()
     x1_buf.release()
     y1_buf.release()
     x2_buf.release()
     y2_buf.release()
     ez_buf.release()
     ex_buf.release()
     ey_buf.release()
     out_buf.release()
     return out

Beispiel #53

0

Datei anzeigen

    def mineThread(self):
        for data in self.qr:
            for i in range(data.iterations):
                self.kernel.search(
                    self.commandQueue, (data.size, ), (self.WORKSIZE, ),
                    data.state[0], data.state[1], data.state[2], data.state[3],
                    data.state[4], data.state[5], data.state[6], data.state[7],
                    data.state2[1], data.state2[2], data.state2[3],
                    data.state2[5], data.state2[6], data.state2[7],
                    data.base[i],
                    data.f[1],data.f[2],
                    data.f[3],data.f[4],
                    data.f[5],data.f[6],
                    data.f[7],data.f[8],
                    self.output_buf)
                cl.enqueue_read_buffer(
                    self.commandQueue, self.output_buf, self.output)
                self.commandQueue.finish()

                # The OpenCL code will flag the last item in the output buffer
                # when it finds a valid nonce. If that's the case, send it to
                # the main thread for postprocessing and clean the buffer
                # for the next pass.
                if self.output[self.OUTPUT_SIZE]:
                    reactor.callFromThread(self.postprocess,
                    self.output.copy(), data.nr)

                    self.output.fill(0)
                    cl.enqueue_write_buffer(
                        self.commandQueue, self.output_buf, self.output)

Beispiel #54

0

Datei anzeigen

Datei: cl.py Projekt: fjarri-attic/beclab

	def fromDevice(self, buf, shape=None):
		if shape is None:
			shape = buf.shape

		cpu_buf = numpy.empty(shape, dtype=buf.dtype)
		cl.enqueue_read_buffer(self.queue, buf, cpu_buf).wait()
		return cpu_buf

Beispiel #55

0

Datei anzeigen

Datei: __init__.py Projekt: BlackhatEspeed/phoenix

    def mineThread(self):
        for data in self.qr:
            for i in range(data.iterations):
                offset = (unpack('I', data.base[i])[0],) if self.GOFFSET else None
                self.kernel.search(
                    self.commandQueue, (data.size, ), (self.WORKSIZE, ),
                    data.state[0], data.state[1], data.state[2], data.state[3],
                    data.state[4], data.state[5], data.state[6], data.state[7],
                    data.state2[1], data.state2[2], data.state2[3],
                    data.state2[5], data.state2[6], data.state2[7],
                    data.base[i],
                    data.f[0], data.f[1], data.f[2], data.f[3],
                    data.f[4], data.f[5], data.f[6], data.f[7],
                    self.output_buf, global_offset=offset)
                cl.enqueue_read_buffer(self.commandQueue, self.output_buf,
                                       self.output, is_blocking=False)
                self.commandQueue.finish()

                # The OpenCL code will flag the last item in the output buffer
                # when it finds a valid nonce. If that's the case, send it to
                # the main thread for postprocessing and clean the buffer
                # for the next pass.
                if self.output[self.WORKSIZE]:
                    reactor.callFromThread(self.postprocess,
                    self.output.copy(), data.nr)

                    self.output.fill(0)
                    cl.enqueue_write_buffer(self.commandQueue, self.output_buf,
                                            self.output, is_blocking=False)

Beispiel #56

0

Datei anzeigen

    def transform_uint32(self,
                         data_np,
                         flip_x=False,
                         flip_y=False,
                         swap_xy=False,
                         out=None):

        height, width = data_np.shape[:2]

        new_ht, new_wd = height, width
        if swap_xy:
            new_ht, new_wd = width, height
        new_size = [new_ht, new_wd] + list(data_np.shape[2:])

        mf = cl.mem_flags

        #create OpenCL buffers on devices
        data_np = np.ascontiguousarray(data_np)
        src_buf = cl.Buffer(self.ctx,
                            mf.READ_ONLY | mf.COPY_HOST_PTR,
                            hostbuf=data_np)
        dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, data_np.nbytes)

        self.program.image_transform_uint32(self.queue, [height, width],
                                            None, src_buf, dst_buf,
                                            np.int32(width), np.int32(height),
                                            np.int32(flip_x), np.int32(flip_y),
                                            np.int32(swap_xy))

        if out is None:
            out = np.empty_like(data_np).reshape(new_size)
        cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()

        return out

Beispiel #57

0

Datei anzeigen

Datei: pyopencl-test.py Projekt: jlbribeiro/dei-hpc

 def execute(self):
     self.program.part1(self.queue, self.a.shape, None, self.a_buf, self.b_buf, self.dest_buf)
     c = numpy.empty_like(self.a)
     cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
     print "a", self.a
     print "b", self.b
     print "c", c

Beispiel #58

0

Datei anzeigen

    def resize_uint32(self, data_np, scale_x, scale_y, out=None):

        height, width = data_np.shape[:2]

        new_ht = int(height * scale_y)
        new_wd = int(width * scale_x)
        new_shape = [new_ht, new_wd] + list(data_np.shape[2:])

        mf = cl.mem_flags

        #create OpenCL buffers on devices
        data_np = np.ascontiguousarray(data_np)
        src_buf = cl.Buffer(self.ctx,
                            mf.READ_ONLY | mf.COPY_HOST_PTR,
                            hostbuf=data_np)

        num_bytes = new_ht * new_wd * np.uint32(0).nbytes
        dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, num_bytes)

        self.program.image_resize_uint32(self.queue, [new_ht, new_wd], None,
                                         src_buf, dst_buf, np.int32(width),
                                         np.int32(new_wd), np.float64(scale_x),
                                         np.float64(scale_y))

        if out is None:
            out = np.empty(new_shape, dtype=data_np.dtype)
        cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()

        return out

Beispiel #59

0

Datei anzeigen

Datei: camera2.py Projekt: alexandrustaetu/ai

	def prepare_environment(self, filename,camera_index):
		
		#build kernel for videocard
		kernel_file = open(filename, 'r')
		kernel_string = "".join(kernel_file.readlines())
		self.program = cl.Program(self.gpu_context, kernel_string).build()
		#get frames from the webcam
		self.stream = cv.CaptureFromCAM(camera_index)
		self.line_cols = cv.GetMat(cv.QueryFrame(self.stream)).cols
		while True:
			self.frame = cv.QueryFrame(self.stream)
			self.frame = cv.GetMat(self.frame)
			self.image_data = numpy.asarray(self.frame)
			self.image_data = numpy.array(self.image_data, dtype=numpy.int32)
			final = numpy.zeros(shape=(self.image_data.shape))
			for position,line in enumerate(self.image_data):
				if position == 0:
					continue
				if position == self.image_data.shape[0]-1:
					continue
				line = line.ravel()
				self.line_buffer = cl.Buffer(self.gpu_context, self.memory_flags.READ_ONLY | self.memory_flags.COPY_HOST_PTR, hostbuf=line)
				self.top_line_buffer = cl.Buffer(self.gpu_context, self.memory_flags.READ_ONLY | self.memory_flags.COPY_HOST_PTR, hostbuf=self.image_data[position-1])
				self.bottom_line_buffer = cl.Buffer(self.gpu_context, self.memory_flags.READ_ONLY | self.memory_flags.COPY_HOST_PTR, hostbuf=self.image_data[position+1])
				self.contour_buffer = cl.Buffer(self.gpu_context, self.memory_flags.WRITE_ONLY, line.nbytes)
				self.program.calculate_differences(self.command_queue, line.shape, None,self.top_line_buffer,self.line_buffer,self.bottom_line_buffer, self.contour_buffer)
				contour = numpy.empty_like(line)
				cl.enqueue_read_buffer(self.command_queue, self.contour_buffer, contour).wait()
				line = contour.reshape(self.line_cols,3)
				final[position] = line
			img = numpy.uint8(final)
			img = cv.fromarray(img)
			cv.ShowImage("camera_window", img)
			if cv.WaitKey(10) == 27:
				breakcv.DestroyWindow("camera_window")

Beispiel #60

0

Datei anzeigen

Datei: old_gazetools.py Projekt: RyanHope/gazetools_cl

 def subtended_angle(self, x1, y1, x2, y2, rx, ry, sw, sh, ez, ex, ey):
     x1 = np.array(x1, dtype=np.float32, copy=False)
     y1 = np.array(y1, dtype=np.float32, copy=False)
     x2 = np.array(x2, dtype=np.float32, copy=False)
     y2 = np.array(y2, dtype=np.float32, copy=False)
     ez = np.array(ez, dtype=np.float32, copy=False)
     ex = np.array(ex, dtype=np.float32, copy=False)
     ey = np.array(ey, dtype=np.float32, copy=False)
     rx = np.float32(rx)
     ry = np.float32(ry)
     sw = np.float32(sw)
     sh = np.float32(sh)
     x1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x1)
     y1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y1)
     x2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x2)
     y2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y2)
     ez_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ez)
     ex_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ex)
     ey_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ey)
     out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, x1.nbytes)
     self.cl_subtended_angle(self.queue, x1.shape, None, x1_buf, y1_buf, x2_buf, y2_buf, rx, ry, sw, sh, ez_buf, ex_buf, ey_buf, out_buf)
     self.queue.finish()
     out = np.empty_like(x1)
     cl.enqueue_read_buffer(self.queue, out_buf, out)
     return out