Esempi in Python per enqueue_read_buffer, esempi in Python per pyopencl.enqueue_read_buffer

Esempio n. 1

0

Mostra file

File: main.py Progetto: jweede/UC-CS668-Parallel-Computing-Labs-2011

        def filter_primes(bit_array, offset):

            if (not len(self.primes)):
                return empty_bitarray()

            b = numpy.array(self.primes, dtype=numpy.uint32)
            a = empty_bitarray()
            c = numpy.array(offset, dtype=numpy.uint32)

            b_buf = cl.Buffer(self.ctx,
                              self.mf.READ_ONLY | self.mf.COPY_HOST_PTR,
                              hostbuf=b)
            a_buf = cl.Buffer(self.ctx,
                              self.mf.READ_WRITE | self.mf.COPY_HOST_PTR,
                              hostbuf=a)
            c_buf = cl.Buffer(self.ctx,
                              self.mf.READ_ONLY | self.mf.COPY_HOST_PTR,
                              hostbuf=c)

            # send integers and new bit mask to pfilter
            event2 = self.program.pfilter(self.queue, (self.block_size, ),
                                          None, b_buf, a_buf, c_buf)
            cl.enqueue_read_buffer(self.queue, a_buf, a)

            print 'Filter Duration:', 1e-9 * (event2.profile.end -
                                              event2.profile.start)

            return a

Esempio n. 2

0

Mostra file

File: test_wrapper.py Progetto: MaybeS/pyopencl

def test_that_python_args_fail(ctx_factory):
    context = ctx_factory()

    prg = cl.Program(context, """
        __kernel void mult(__global float *a, float b, int c)
        { a[get_global_id(0)] *= (b+c); }
        """).build()

    a = np.random.rand(50000)
    queue = cl.CommandQueue(context)
    mf = cl.mem_flags
    a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)

    knl = cl.Kernel(prg, "mult")
    try:
        knl(queue, a.shape, None, a_buf, 2, 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    try:
        prg.mult(queue, a.shape, None, a_buf, float(2), 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3))

    a_result = np.empty_like(a)
    cl.enqueue_read_buffer(queue, a_buf, a_result).wait()

Esempio n. 3

0

Mostra file

File: main.py Progetto: jameszhao00/lightwayrt

def test_cl():
    ctx = cl.create_some_context()  # (interactive=False)

    # print 'ctx', ctx
    queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
    f = open("part1.cl", "r")
    fstr = "".join(f.readlines())
    program = cl.Program(ctx, fstr).build()
    mf = cl.mem_flags

    cameraPos = np.array([0, 6, -1, 0])
    invView = la.inv(look_at((0, 6, -1), (0, 1, 1), (0, 1, 0)))
    invProj = la.inv(perspective(60, 1, 1, 1000))
    print "view", invView
    print "proj", invProj
    viewParamsData = (
        cameraPos.flatten().tolist()
        + np.transpose(invView).flatten().tolist()
        + np.transpose(invProj).flatten().tolist()
    )
    # print 'vpd', viewParamsData
    viewParams = struct.pack("4f16f16f", *viewParamsData)
    viewParams_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=viewParams)
    num_pixels = 1000 * 1000
    # setup opencl
    dest = np.ndarray((1000, 1000, 4), dtype=np.float32)
    dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, dest.nbytes)
    local_shape = (8, 8)
    # run kernel
    evt = program.part1(queue, (dest.shape[0], dest.shape[1]), None, viewParams_buf, dest_buf)
    # evt = program.part1(queue, dest.shape, None, dest_buf)
    cl.enqueue_read_buffer(queue, dest_buf, dest).wait()
    print "time", (evt.profile.end - evt.profile.start) * 0.000001, "ms"
    return dest

Esempio n. 4

0

Mostra file

    def search(self, midstate):
        msg = flipendian32(midstate)

        for i in xrange(8):
            self.sha512_fill.set_arg(i, msg[i * 4:i * 4 + 4])
        self.sha512_fill.set_arg(8, self.hashes_buf)
        self.sha512_fill.set_arg(9, self.keyhash_buf)
        # t1 = time.time()
        cl.enqueue_nd_range_kernel(self.queue, self.sha512_fill,
                                   (HASHES_NUM, ), (self.sha512_fill_ws, ))
        self.queue.finish()
        # print "fill %f" % (time.time() - t1)

        output = bytearray(OUTPUT_SIZE)
        cl.enqueue_write_buffer(self.queue, self.output_buf, output)
        self.queue.finish()

        self.ksearch.set_arg(0, self.hashes_buf)
        self.ksearch.set_arg(1, self.keyhash_buf)
        self.ksearch.set_arg(2, self.output_buf)
        cl.enqueue_nd_range_kernel(self.queue, self.ksearch, (KEYS_NUM, ),
                                   (self.ksearch_ws, ))
        self.queue.finish()
        cl.enqueue_read_buffer(self.queue, self.output_buf, output)
        self.queue.finish()
        return str(output)

Esempio n. 5

0

Mostra file

File: broadattack.py Progetto: vanhoefm/broadkey

def test_gpu_aes():
	import pyopencl as cl
	import numpy

	# Prepare context and command queue
	ctx = cl.create_some_context(interactive=False)
	queue = cl.CommandQueue(ctx)

	print "Compiling kernel ..."
	with open_cl("ralink.cl", "r") as fp:
		code = fp.read() % { 'STARTTIME': 0, 'MACADDR1': 0, 'MACADDR2': 0,
				'NONCE1': 0, 'NONCE2': 0, 'NONCE3': 0, 'NONCE4': 0,
				'KEYSTREAM1': 0, 'KEYSTREAM2': 0,}
		program = cl.Program(ctx, code).build(options="-I %s" % get_opencl_path())

	# Prepare memory
	result = numpy.zeros(shape=(8), dtype=numpy.uint32)
	result[0] = 0xffffffff;
	result[1] = 0xffffffff;
	dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY| cl.mem_flags.COPY_HOST_PTR, hostbuf=result)

	# Run the program
	print "Running kernel ..."
	program.test_aes(queue, (1,), None, dest_buf)

	# Read the result
	cl.enqueue_read_buffer(queue, dest_buf, result).wait()
	print list2hex(result)
	assert result[0] == 0xD4E415CB
	assert result[1] == 0xD038A82B
	assert result[2] == 0x10A673DE
	assert result[3] == 0xEA25B206

Esempio n. 6

0

Mostra file

def get_iterations(context, complex_values, iterations):
    command_queue = cl.CommandQueue(context)
    output_array = np.zeros(complex_values.shape, dtype=clar.vec.ushort4)

    flags = cl.mem_flags
    complex_values_buffer = cl.Buffer(context,
                                      flags.READ_ONLY | flags.COPY_HOST_PTR,
                                      hostbuf=complex_values)
    gradient_array_buffer = cl.Buffer(context,
                                      flags.READ_ONLY | flags.COPY_HOST_PTR,
                                      hostbuf=gradient)
    output_array_buffer = cl.Buffer(context, flags.WRITE_ONLY,
                                    output_array.nbytes)
    test = open('kernel.cl', 'r')
    program = cl.Program(context, test.read()).build()
    test.close()

    program.mandelbrot(
        command_queue,
        complex_values.shape,
        None,  # Local memory size not specified
        complex_values_buffer,
        output_array_buffer,
        gradient_array_buffer,
        #np.uint(iterations)
    )

    cl.enqueue_read_buffer(command_queue, output_array_buffer,
                           output_array).wait()

    return output_array

Esempio n. 7

0

Mostra file

File: main.py Progetto: epitron/PyCl-Convergence

	def getData(self):
		if self.tickState == False:
			self.kUtil.GetWorld(self.queue, self.a.shape, None, self.ar_ySize, self.a_buf, self.dest_buf)
			cl.enqueue_read_buffer(self.queue, self.dest_buf, self.a).wait()
		else:
			self.kUtil.GetWorld(self.queue, self.a.shape, None, self.ar_ySize, self.b_buf, self.dest_buf)
			cl.enqueue_read_buffer(self.queue, self.dest_buf, self.a).wait()

Esempio n. 8

0

Mostra file

    def getData(self, n, axis, data_D, data_H, name):
        """get data from device"""
        cl.enqueue_read_buffer(lbm.queue, data_D, data_H).wait()

        # retrieve mid cell points from cell node data
        if axis == 'x':
            N = lbm.X.size - 1
            X = lbm.X
            y = data_H[:, n]
        elif axis == 'y':
            N = lbm.Y.size - 1
            X = lbm.Y
            y = data_H[n, :]

        x = np.zeros((N))

        for i in range(1, X.size):
            x[i - 1] = (X[i] - X[i - 1]) / 2.0 + X[i - 1]

        self.x = x
        self.y = y
        self.n = n
        self.axis = axis
        self.data_D = data_D
        self.data_H = data_H
        self.name = name

        self.plotLine()

        return

Esempio n. 9

0

Mostra file

    def getData(self, data_D, data_H, name):
        """
        plot passed in data as a surface
        """

        #plotting
        fig = mlab.figure(size=(512, 512))

        cl.enqueue_read_buffer(lbm.queue, data_D, data_H).wait()

        # retrieve mid cell points from cell node data
        Nx = lbm.X.size - 1
        Ny = lbm.Y.size - 1
        x = np.zeros((Nx))
        y = np.zeros((Ny))

        for i in range(1, lbm.X.size):
            x[i - 1] = (lbm.X[i] - lbm.X[i - 1]) / 2.0 + lbm.X[i - 1]

        for i in range(1, lbm.Y.size):
            y[i - 1] = (lbm.Y[i] - lbm.Y[i - 1]) / 2.0 + lbm.Y[i - 1]

        s = mlab.surf(x, y, data_H, warp_scale='auto', colormap="jet")
        mlab.axes(s)
        sb = mlab.scalarbar(s, title=name)

        self.s = s
        self.data_D = data_D
        self.data_H = data_H

Esempio n. 10

0

Mostra file

File: visualangle.py Progetto: RyanHope/gazetools_cl

 def __call__(self, ctx, x1, y1, x2, y2, rx, ry, sw, sh, ez, ex, ey):
     self.build(ctx)
     x1 = np.array(x1, dtype=np.float32, copy=False)
     y1 = np.array(y1, dtype=np.float32, copy=False)
     x2 = np.float32(x2)
     y2 = np.float32(y2)
     ez = np.float32(ez)
     ex = np.float32(ex)
     ey = np.float32(ey)
     rx = np.float32(rx)
     ry = np.float32(ry)
     sw = np.float32(sw)
     sh = np.float32(sh)
     x1_buf = cl.Buffer(self.ctx,
                        cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                        hostbuf=x1)
     y1_buf = cl.Buffer(self.ctx,
                        cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                        hostbuf=y1)
     out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, x1.nbytes)
     queue = cl.CommandQueue(self.ctx)
     self.prg.subtended_angle2_naive(queue, x1.shape, None, x1_buf, y1_buf,
                                     x2, y2, rx, ry, sw, sh, ez, ex, ey,
                                     out_buf)
     out = np.empty_like(x1)
     cl.enqueue_read_buffer(queue, out_buf, out).wait()
     x1_buf.release()
     y1_buf.release()
     out_buf.release()
     return out

Esempio n. 11

0

Mostra file

    def clFindRoute(self, key, candidateTableList):
        timeKernelHash = 0
        self.pos = np.array(-1, dtype=np.int32)

        cl.enqueue_write_buffer(self.queue, self.pos_buf, self.pos)

        for elem in candidateTableList:
            key.prefixlen = (elem)
            ip = int(key.network)

            event = self.program.match(self.queue,
                                   self.tableShape[elem-1],
                                   None,
                                   self.table_buf[elem-1],
                                   self.pos_buf,
                                   np.int32(ip)
                                   )
            event.wait()
            cl.enqueue_read_buffer(self.queue, self.pos_buf, self.pos)
            timeKernelHash += event.profile.end - event.profile.start
            if (self.pos != -1):
                break
        # print("Measured Time kernel Hash (eventProfiler OpenCL function): {:5.8f}"
        #       .format(1e-9*timeKernelHash))
        return [elem, self.pos]

Esempio n. 12

0

Mostra file

    def compute(self, floatimage, histogram, k):
        width, height, nbins = np.shape(histogram)
        numpixels = width * height

        image_linear = np.reshape(floatimage, (numpixels, )).astype(np.float32)
        histogram_linear = np.reshape(
            histogram, (np.size(histogram), )).astype(np.float32)
        transform = np.zeros_like(image_linear).astype(np.float32)

        mf = cl.mem_flags
        self.buf_image = cl.Buffer(self.context,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=image_linear)
        self.buf_histogram = cl.Buffer(self.context,
                                       mf.READ_ONLY | mf.COPY_HOST_PTR,
                                       hostbuf=histogram_linear)
        self.output_buf = cl.Buffer(self.context, mf.READ_WRITE,
                                    transform.nbytes)

        kernel = self.program.IIF
        kernel.set_scalar_arg_dtypes([np.uintc, np.uintc, np.float32] +
                                     [None] * 3)
        kernel.set_arg(0, np.uintc(width))
        kernel.set_arg(1, np.uintc(height))
        kernel.set_arg(2, np.float32(k))
        kernel.set_arg(3, self.buf_image)
        kernel.set_arg(4, self.buf_histogram)
        kernel.set_arg(5, self.output_buf)

        cl.enqueue_nd_range_kernel(self.queue, kernel, image_linear.shape,
                                   None).wait()

        cl.enqueue_read_buffer(self.queue, self.output_buf, transform).wait()
        return np.reshape(transform, (width, height)).astype(np.float)

Esempio n. 13

0

Mostra file

    def compute(self, image, num_bins):
        width, height = np.shape(image)
        numpixels = width * height

        image = np.reshape(image, (numpixels, )).astype(np.float32)
        result = np.zeros((numpixels * num_bins, ), dtype=np.float32)

        mf = cl.mem_flags
        self.buf_image = cl.Buffer(self.context,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=image)
        self.output_buf = cl.Buffer(self.context, mf.READ_WRITE, result.nbytes)

        kernel = self.program.iif_binid
        kernel.set_scalar_arg_dtypes([np.uintc, np.uintc, np.ubyte] +
                                     [None] * 2)
        kernel.set_arg(0, np.uintc(width))
        kernel.set_arg(1, np.uintc(height))
        kernel.set_arg(2, np.ubyte(num_bins))
        kernel.set_arg(3, self.buf_image)
        kernel.set_arg(4, self.output_buf)

        cl.enqueue_nd_range_kernel(self.queue, kernel, image.shape,
                                   None).wait()

        cl.enqueue_read_buffer(self.queue, self.output_buf, result).wait()
        return np.reshape(result, (width, height, num_bins)).astype(np.float32)

Esempio n. 14

0

Mostra file

    def FuseRGBD_GPU(self, Image, boneDQ, jointDQ):
        """
        Update the TSDF volume with Image
        :param Image: RGBD image to update to its surfaces
        :param boneDQ: the dual quaternion of bone in new frame
        :param jointDQ: the dual quaternion of joint in new frame
        :param bp: the indexof body part
        :return: none
        """
        # initialize buffers
        #cl.enqueue_write_buffer(self.GPUManager.queue, self.Pose_GPU, Tg)
        cl.enqueue_write_buffer(self.GPUManager.queue, self.DepthGPU,
                                Image.depth_image)
        cl.enqueue_write_buffer(self.GPUManager.queue, self.boneDQGPU, boneDQ)
        cl.enqueue_write_buffer(self.GPUManager.queue, self.jointDQGPU,
                                jointDQ)

        # fuse data of the RGBD imnage with the TSDF volume 3D model
        self.GPUManager.programs['FuseTSDF'].FuseTSDF(self.GPUManager.queue, (self.Size[0], self.Size[1]), None, \
                                self.TSDFGPU, self.DepthGPU, self.Param, self.Size_Volume, self.Pose_GPU, \
                                self.boneDQGPU, self.jointDQGPU, self.planeF,\
                                self.Calib_GPU, np.int32(Image.Size[0]), np.int32(Image.Size[1]),self.WeightGPU)

        # update CPU array. Read the buffer to write in the CPU array.
        cl.enqueue_read_buffer(self.GPUManager.queue, self.TSDFGPU,
                               self.TSDF).wait()
        '''
        # TEST if TSDF contains NaN
        TSDFNaN = np.count_nonzero(np.isnan(self.TSDF))
        print "TSDFNaN : %d" %(TSDFNaN)
        '''
        cl.enqueue_read_buffer(self.GPUManager.queue, self.WeightGPU,
                               self.Weight).wait()

Esempio n. 15

0

Mostra file

File: openclpow.py Progetto: Bitmessage/PyBitmessage

def do_opencl_pow(hash, target):
    output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
    if (len(enabledGpus) == 0):
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order='C')
    data[0]['v'] = ("0000000000000000" + hash).decode("hex")
    data[0]['target'] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    start = time.time()
    progress = 0
    globamt = worksize*2000

    while output[0][0] == 0 and shutdown == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        cl.enqueue_read_buffer(queue, dest_buf, output)
        queue.finish()
        progress += globamt
        sofar = time.time() - start
#       logger.debug("Working for %.3fs, %.2f Mh/s", sofar, (progress / sofar) / 1000000)
    if shutdown != 0:
        raise Exception ("Interrupted")
    taken = time.time() - start
#   logger.debug("Took %d tries.", progress)
    return output[0][0]

Esempio n. 16

0

Mostra file

File: GPU_Processor.py Progetto: erikhazzard/OpenCL-RPC-Example

    def execute(self, params):
        ''' This handles the actual execution for the processing, which would
        get executed on each request - this is where we care about the
        performance
        '''

        timing.timings.start('load')
        self.load_program(params)
        timing.timings.stop('load')
        finish = timing.timings.timings['load']['timings'][-1]
        print '<<< Loaded program in %s ms' % (finish)

        timing.timings.start('execute')
        # Start the program
        self.program.worker(self.queue, 
            self.data['income'].shape,
            None,
            self.income_buf,
            self.capGains_buf,
            self.dividendsInterest_buf,
            self.children_buf,
            self.dest_buf,
        )

        # Get an empty numpy array in the shape of the original data
        result = numpy.empty_like(self.data['income'])

        #Wait for result
        cl.enqueue_read_buffer(self.queue, self.dest_buf, result).wait()

        #show timing info
        timing.timings.stop('execute')
        finish = timing.timings.timings['execute']['timings'][-1]
        print '<<< Executed in %s ms' % (finish)
        return result

Esempio n. 17

0

Mostra file

    def randomfill(self):
        t = getTime()
        mf = cl.mem_flags
        self.inputBuf = [
            cl.Buffer(self.ctx,
                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                      hostbuf=self.img[i]) for i in [0, 1]
        ]
        self.outputBuf = cl.Buffer(self.ctx,
                                   mf.WRITE_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=self.nff)

        self.program.randomfill(
            self.queue,
            self.effectiveSize,
            None,
            numpy.int32(self.patchSize[0]),  #patchHeight
            numpy.int32(self.patchSize[1]),  #patchWidth
            numpy.int32(self.size[0]),  #height
            numpy.int32(self.size[1]),  #width
            self.inputBuf[0],
            self.inputBuf[1],
            self.outputBuf)
        c = numpy.empty_like(self.nff)
        cl.enqueue_read_buffer(self.queue, self.outputBuf, c).wait()
        self.nff = numpy.copy(c)

        self.times["randomfill"] += getTime() - t

Esempio n. 18

0

Mostra file

def do_opencl_pow(hash, target):
	output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
	if (ctx == False):
		return output[0][0]
	
	data = numpy.zeros(1, dtype=hash_dt, order='C')
	data[0]['v'] = ("0000000000000000" + hash).decode("hex")
	data[0]['target'] = target
	
	hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
	dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)
	
	kernel = program.kernel_sha512
	worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, gpus[0])

	kernel.set_arg(0, hash_buf)
	kernel.set_arg(1, dest_buf)

	start = time.time()
	progress = 0
	globamt = worksize*2000

	while output[0][0] == 0:
		kernel.set_arg(2, pack("<Q", progress))
		cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
		cl.enqueue_read_buffer(queue, dest_buf, output)
		queue.finish()
		progress += globamt
		sofar = time.time() - start
		print sofar, progress / sofar, "hashes/sec"
	taken = time.time() - start
	print progress, taken
	return output[0][0]

Esempio n. 19

0

Mostra file

File: lib_gpu.py Progetto: Jak23/modular

 def execute(self, *args, **kwargs):
     self.load_data(*args, **kwargs)
     self.program.program__(self.queue, self.a.shape, None, self.a_buf,
                            self.b_buf, self.dest_buf)
     c = np.empty_like(self.a)
     cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
     return c

Esempio n. 20

0

Mostra file

File: testgen.py Progetto: sleepnezz/sparkcl

def map_function(data):
    proc = subprocess.Popen(["../bin/get-host-platform-device.sh"], stdout=subprocess.PIPE, shell=True)
    (proc_out, err) = proc.communicate()
    [SPARKCL_PLATFORM , SPARKCL_DEVICE] = proc_out.split()

    KERNEL_CODE="""
        __kernel void ArraySum(__global float *A,__global float *B,__global float *C){
       int i = get_global_id(0);
       C[i] = A[i]+B[i];
}

    """

    cl_device=cl.get_platforms()[int(SPARKCL_PLATFORM)].get_devices()[int(SPARKCL_DEVICE)]
    ctx = cl.Context([cl_device])
    queue = cl.CommandQueue(ctx)
    prg = cl.Program(ctx, KERNEL_CODE).build()
    kernel = prg.ArraySum
    mf = cl.mem_flags

    np_data = []
    np_data.append(np.array(data[0]).astype(np.float32))
    np_data.append(np.array(data[1]).astype(np.float32))

    data_buf = []
    data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[0]))
    data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[1]))

    result = np.zeros((5, )).astype(np.float32)
    result_buf = cl.Buffer(ctx, mf.WRITE_ONLY, result.nbytes)

    kernel(queue,(5,),None,data_buf[0],data_buf[1],result_buf)
    cl.enqueue_read_buffer(queue, result_buf, result).wait()
    return result

Esempio n. 21

0

Mostra file

File: CL.py Progetto: naojsoft/ginga

    def transform_uint32(self, data_np,
                         flip_x=False, flip_y=False, swap_xy=False,
                         out=None):

        height, width = data_np.shape[:2]

        new_ht, new_wd = height, width
        if swap_xy:
            new_ht, new_wd = width, height
        new_size = [new_ht, new_wd] + list(data_np.shape[2:])

        mf = cl.mem_flags

        #create OpenCL buffers on devices
        data_np = np.ascontiguousarray(data_np)
        src_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
                            hostbuf=data_np)
        dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, data_np.nbytes)

        evt = self.program.image_transform_uint32(self.queue, [height, width], None,
                                                  src_buf, dst_buf,
                                                  np.int32(width), np.int32(height),
                                                  np.int32(flip_x), np.int32(flip_y),
                                                  np.int32(swap_xy))

        if out is None:
            out = np.empty_like(data_np).reshape(new_size)
        cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()

        return out

Esempio n. 22

0

Mostra file

File: openclpow.py Progetto: N0U/PyBitmessage

def do_opencl_pow(hash, target):
	output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
	if (ctx == False):
		return output[0][0]
	
	data = numpy.zeros(1, dtype=hash_dt, order='C')
	data[0]['v'] = ("0000000000000000" + hash).decode("hex")
	data[0]['target'] = target
	
	hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
	dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)
	
	kernel = program.kernel_sha512
	worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, cl.get_platforms()[0].get_devices()[1])

	kernel.set_arg(0, hash_buf)
	kernel.set_arg(1, dest_buf)

	start = time.time()
	progress = 0
	globamt = worksize*2000

	while output[0][0] == 0:
		kernel.set_arg(2, pack("<Q", progress))
		cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
		cl.enqueue_read_buffer(queue, dest_buf, output)
		queue.finish()
		progress += globamt
		sofar = time.time() - start
		print sofar, progress / sofar, "hashes/sec"
	taken = time.time() - start
	print progress, taken
	return output[0][0]

Esempio n. 23

0

Mostra file

File: fdtd3d_gpu_cpu_non-pinned.py Progetto: wbkifun/fdtd_accelerate

	def exchange_boundary_h(s):
		for queue, eh_fields, tmpf, offset in zip(s.queues, s.eh_fields_gpus, s.tmpfs, s.offsets)[:-1]:
			cl.enqueue_read_buffer(queue, eh_fields[4], tmpf[0], offset)	# hy_gpu
			cl.enqueue_read_buffer(queue, eh_fields[5], tmpf[1], offset)	# hz_gpu
		for queue, eh_fields, tmpf in zip(s.queues[1:], s.eh_fields_gpus[1:], s.tmpfs[:-1]):
			cl.enqueue_write_buffer(queue, eh_fields[4], tmpf[0])
			cl.enqueue_write_buffer(queue, eh_fields[5], tmpf[1])

Esempio n. 24

0

Mostra file

File: test.py Progetto: roundrobin/OpenCL-Benchmark-Tests

    def execute(self):
        """ This handles the actual execution for the processing, which would
        get executed on each request - this is where we care about the
        performance
        """
        timing.timings.start("execute")

        # Start the program
        self.program.worker(self.queue, self.data1.shape, None, self.data1_buf, self.data2_buf, self.dest_buf)

        # Get an empty numpy array in the shape of the original data
        result = numpy.empty_like(self.data1)

        # Wait for result
        cl.enqueue_read_buffer(self.queue, self.dest_buf, result).wait()

        # show timing info
        timing.timings.stop("execute")
        finish = timing.timings.timings["execute"]["timings"][-1]
        print "<<< DONE in %s" % (finish)

        # Open data file to append to
        data_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../data.csv"), "a")
        data_file.write("PyOpenCl %s,%s,%s,%s\n" % (process_type, finish, num_records, num_calculations))
        data_file.close()

Esempio n. 25

0

Mostra file

File: locator_cl.py Progetto: crosslore/cuauv_software

    def update(self, sub_pos, angle, min_dist, max_dist, width, in_weight,
               out_weight):
        '''
        Perform one update on the probabilities by using the evidence that
        the sub is at position sub_pos, the target is seen at an absolute heading
        of `angle` and is most likely between min_dist and max_dist away.
        in_weight gives the chance that for every point in the region,
        if the buoy is there then we would get this result
        i.e. in_weight = P(this measurement | buoy at point p) for p in our region
        out_weight is the same but for points outside the region
        '''

        n, e = sub_pos
        cl_program.evidence(cl_queue, self.norths.shape, None, self.norths_buf,
                            self.easts_buf, self.prob_buf, float32(n),
                            float32(e), float32(radians(angle)),
                            float32(min_dist**2), float32(max_dist**2),
                            float32(width), float32(in_weight),
                            float32(out_weight))
        #TODO ?
        cl.enqueue_read_buffer(cl_queue, self.prob_buf,
                               self.probabilities).wait()

        #Normalize
        total_prob = numpy.sum(self.probabilities)
        self.probabilities /= total_prob

        cl.enqueue_write_buffer(cl_queue, self.prob_buf, self.probabilities)

Esempio n. 26

0

Mostra file

File: lib_gpu.py Progetto: ctogle/modular

	def execute(self, *args, **kwargs):
		self.load_data(*args, **kwargs)
		self.program.program__(self.queue, self.a.shape, None, 
						self.a_buf, self.b_buf, self.dest_buf)
		c = np.empty_like(self.a)
		cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
		return c

Esempio n. 27

0

Mostra file

File: OCLLingo.py Progetto: ihaque/SIML

    def getMultipleRows(self,rowbase,rowlimit): #{{{
        """Computes multiple Tanimoto rows *rowbase:rowlimit* corresponding to comparing every SMILES string
        in the query set with the reference SMILES strings having index *row*, *row+1*, ..., *rowlimit-1* in the reference set,
        and stores this block as the most recent asynchronous result.

        This method is synchronous (it will not return until the block has been completely computed).
        """
        if rowbase < 0 or rowlimit > self.nref:
            raise

        # Pad rows out to 64 byte pitch
        rowpitchInFloat = 16*((self.nquery+15)/16)

        # Using pagelocked memory and async copy seems to actually slow us down
        # on large tiled calculations
        self.resultmatrix = numpy.empty((rowlimit-rowbase,rowpitchInFloat),dtype=numpy.float32)
        self.gpu.gpumatrix = cl.Buffer(self.gpu.context,cl.mem_flags.WRITE_ONLY,size=self.resultmatrix.nbytes)

        # With precalculated magnitudes
        lmem_bytes = int(2*4*max(self.rlengths[rowbase:rowlimit]))
        threads_per_block = 192
        self.gpu.multiRowKernel(self.gpu.queue,(threads_per_block*(rowlimit-rowbase),),
                                               self.gpu.rsmiles,self.gpu.rcounts,self.gpu.rl_gpu,self.gpu.rmag_gpu,
                                               self.refPitchInInt,
                                               self.gpu.qsmiles,self.gpu.qcounts,self.gpu.ql_gpu,self.gpu.qmag_gpu,
                                               self.qPitchTInInt,
                                               self.gpu.gpumatrix, numpy.int32(rowpitchInFloat),
                                               numpy.int32(self.qshape[0]),numpy.int32(self.qshape[1]),numpy.int32(rowbase),
                                               cl.LocalMemory(lmem_bytes),cl.LocalMemory(lmem_bytes),
                                               local_size=(threads_per_block,))

        cl.enqueue_read_buffer(self.gpu.queue,self.gpu.gpumatrix,self.resultmatrix).wait()
        return self.resultmatrix[:,0:self.nquery]

Esempio n. 28

0

Mostra file

    def FuseRGBD_GPU(self, Image, Pose):
        """
        Update the TSDF volume with Image
        :param Image: RGBD image to update to its surfaces
        :param Pose: transform from the first camera pose to the last camera pose
        :return: none
        """
        # initialize buffers
        cl.enqueue_write_buffer(self.GPUManager.queue, self.Pose_GPU, Pose)
        cl.enqueue_write_buffer(self.GPUManager.queue, self.DepthGPU,
                                Image.depth_image)

        # fuse data of the RGBD imnage with the TSDF volume 3D model
        self.GPUManager.programs['FuseTSDF'].FuseTSDF(self.GPUManager.queue, (self.Size[0], self.Size[1]), None, \
                                self.TSDFGPU, self.DepthGPU, self.Param, self.Size_Volume, self.Pose_GPU, self.Calib_GPU, \
                                np.int32(Image.Size[0]), np.int32(Image.Size[1]),self.WeightGPU)

        # update CPU array. Read the buffer to write in the CPU array.
        cl.enqueue_read_buffer(self.GPUManager.queue, self.TSDFGPU,
                               self.TSDF).wait()
        '''
        # TEST if TSDF contains NaN
        TSDFNaN = np.count_nonzero(np.isnan(self.TSDF))
        print "TSDFNaN : %d" %(TSDFNaN)
        '''
        cl.enqueue_read_buffer(self.GPUManager.queue, self.WeightGPU,
                               self.Weight).wait()

Esempio n. 29

0

Mostra file

File: test_wrapper.py Progetto: python-jheengut/pyopencl

def test_that_python_args_fail(ctx_factory):
    context = ctx_factory()

    prg = cl.Program(
        context, """
        __kernel void mult(__global float *a, float b, int c)
        { a[get_global_id(0)] *= (b+c); }
        """).build()

    a = np.random.rand(50000)
    queue = cl.CommandQueue(context)
    mf = cl.mem_flags
    a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)

    knl = cl.Kernel(prg, "mult")
    try:
        knl(queue, a.shape, None, a_buf, 2, 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    try:
        prg.mult(queue, a.shape, None, a_buf, float(2), 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3))

    a_result = np.empty_like(a)
    cl.enqueue_read_buffer(queue, a_buf, a_result).wait()

Esempio n. 30

0

Mostra file

File: sha1c.py Progetto: gfs/ENGS116Proj

 def transform(self):
     """Realizes the calculus"""
     # Prepare the input and output memory
     mf = cl.mem_flags
     
     msg = np.char.array(self.Word_buffer)
     len_array = np.array(self.len).astype(np.int32)
     Hexdigest_array = np.char.array(['']*41*(len(len_array)))
     
     print msg
     print msg.nbytes
     print len_array
     print len_array.nbytes
     print Hexdigest_array
     print Hexdigest_array.nbytes
     
     # Allocate device memory
     msg_buf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, msg.nbytes, msg)
     len_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, len_array.nbytes, len_array)
     Hexdigest_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, Hexdigest_array.nbytes)
     
     # Start OpenCL operation and wait for it to finish
     time1 = datetime.datetime.now()
     self.prg.sha1(self.queue, (len(len_array),), msg_buf, len_buf, Hexdigest_buf)
     cl.enqueue_read_buffer(self.queue, Hexdigest_buf, Hexdigest_array).wait()
     time2 = datetime.datetime.now()
     print "Execution time OpenCL sha1: " + repr((time2 - time1).microseconds/1000) + "ms"
     
     # Convert the result into strings
     for j in range(0,len(Hexdigest_array)/41):
         self.result.append(''.join(Hexdigest_array[j*41 + 0:j*41 + 41]))

Esempio n. 31

0

Mostra file

File: gpu_countminsketch.py Progetto: CUTEYANSHU/SketchVis

    def dump_batch(self):
        keys = np.array(self.batch.keys(), dtype='S32')
        counts = np.array(self.batch.values(), dtype=np.int32)
        out = np.zeros([self.d, self.w], dtype=np.int32)

        # create the buffers to hold the values of the input
        rand_buf = cl.Buffer(self.ctx,
                             cl.mem_flags.READ_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=self.rand)
        keys_buf = cl.Buffer(self.ctx,
                             cl.mem_flags.READ_ONLY
                             | cl.mem_flags.COPY_HOST_PTR,
                             hostbuf=keys)
        counts_buf = cl.Buffer(self.ctx,
                               cl.mem_flags.READ_ONLY
                               | cl.mem_flags.COPY_HOST_PTR,
                               hostbuf=counts)

        # create output buffer
        out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, out.nbytes)

        # Kernel is now launched
        launch = self.bld.increment(self.queue, (len(keys), self.d), None,
                                    rand_buf, keys_buf, counts_buf, out_buf)
        # wait till the process completes
        launch.wait()

        cl.enqueue_read_buffer(self.queue, out_buf, out).wait()

        self.M += out
        self.batch.clear()

Esempio n. 32

0

Mostra file

File: main.py Progetto: mastensg/mandel

 def execute(self):
     self.program.part1(self.queue, self.a.shape, None, self.a_buf, self.b_buf, self.dest_buf)
     c = numpy.array(range(10), dtype=numpy.uint32)
     cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
     print "a", self.a
     print "b", self.b
     print "c", c

Esempio n. 33

0

Mostra file

    def execute(self):
        '''
        execute an iteration of patchMatch
        '''
        t = getTime()
        mf = cl.mem_flags
        self.inputBuf = [
            cl.Buffer(self.ctx,
                      mf.READ_ONLY | mf.COPY_HOST_PTR,
                      hostbuf=self.img[i]) for i in [0, 1]
        ]
        self.outputBuf = cl.Buffer(self.ctx,
                                   mf.READ_WRITE | mf.COPY_HOST_PTR,
                                   hostbuf=self.nff)

        self.program.propagate(
            self.queue,
            self.effectiveSize,
            None,
            numpy.int32(self.patchSize[0]),  #patchHeight
            numpy.int32(self.patchSize[1]),  #patchWidth
            numpy.int32(self.size[0]),  #height
            numpy.int32(self.size[1]),  #width
            numpy.int32(self.iteration),
            self.inputBuf[0],
            self.inputBuf[1],
            self.outputBuf)
        c = numpy.empty_like(self.nff)
        cl.enqueue_read_buffer(self.queue, self.outputBuf, c).wait()
        self.nff = numpy.copy(c)

        self.times["execute"] += getTime() - t

Esempio n. 34

0

Mostra file

def gpu_array_sum(a, b):
    context = cl.create_some_context()  # Initialize the Context
    queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE)  # Instantiate a Queue with profiling (timing) enabled
    a_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
    b_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=b)
    c_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, b.nbytes)  # Create three buffers (plans for areas of memory on the device)
    program = cl.Program(context, """
    __kernel void sum(__global const float *a, __global const float *b, __global float *c)
    {
        int i = get_global_id(0);
        int j;
        for(j = 0; j < 1000; j++)
        {
            c[i] = a[i] + b[i];
        }
    }""").build()  # Compile the device program
    gpu_start_time = time()  # Get the GPU start time
    event = program.sum(queue, a.shape, None, a_buffer, b_buffer, c_buffer)  # Enqueue the GPU sum program XXX
    event.wait()  # Wait until the event finishes XXX
    elapsed = 1e-9*(event.profile.end - event.profile.start)  # Calculate the time it took to execute the kernel
    print("GPU Kernel Time: {0} s".format(elapsed))  # Print the time it took to execute the kernel
    c_gpu = np.empty_like(a)  # Create an empty array the same size as array a
    cl.enqueue_read_buffer(queue, c_buffer, c_gpu).wait()  # Read back the data from GPU memory into array c_gpu
    gpu_end_time = time()  # Get the GPU end time
    print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))  # Print the time the GPU program took, including both memory copies
    return c_gpu  # Return the sum of the two arrays

Esempio n. 35

0

Mostra file

File: PyOpenCLTestApplication.py Progetto: lancelote/parallel_python

def gpu_array_sum(a, b):
    platform = cl.get_platforms()[0]
    device = platform.get_devices()[0]
    context = cl.Context([device])
    queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE)  # Instantiate a Queue with profiling (timing) enabled
    a_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
    b_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=b)
    c_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, b.nbytes)  # Create three buffers (plans for areas of memory on the device)
    program = cl.Program(context, """
    __kernel void sum(__global const float *a, __global const float *b, __global float *c)
    {
        int i = get_global_id(0);
        int j;
        for(j = 0; j < 10000; j++)
        {
            c[i] = a[i] + b[i];
        }
    }""").build()  # Compile the device program
    gpu_start_time = time()  # Get the GPU start time
    event = program.sum(queue, a.shape, None, a_buffer, b_buffer, c_buffer)  # Enqueue the GPU sum program XXX
    event.wait()  # Wait until the event finishes XXX
    elapsed = 1e-9*(event.profile.end - event.profile.start)  # Calculate the time it took to execute the kernel
    print("GPU Kernel Time: {0} s".format(elapsed))  # Print the time it took to execute the kernel
    c_gpu = np.empty_like(a)  # Create an empty array the same size as array a
    cl.enqueue_read_buffer(queue, c_buffer, c_gpu).wait()  # Read back the data from GPU memory into array c_gpu
    gpu_end_time = time()  # Get the GPU end time
    print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))  # Print the time the GPU program took, including both memory copies
    return c_gpu  # Return the sum of the two arrays

Esempio n. 36

0

Mostra file

File: locator_cl.py Progetto: athityakumar/software

    def update(self, sub_pos, angle, min_dist, max_dist, width, in_weight, out_weight):
        '''
        Perform one update on the probabilities by using the evidence that
        the sub is at position sub_pos, the target is seen at an absolute heading
        of `angle` and is most likely between min_dist and max_dist away.
        in_weight gives the chance that for every point in the region,
        if the buoy is there then we would get this result
        i.e. in_weight = P(this measurement | buoy at point p) for p in our region
        out_weight is the same but for points outside the region
        '''

        n,e = sub_pos
        cl_program.evidence(cl_queue, self.norths.shape, None,
                            self.norths_buf, self.easts_buf, self.prob_buf,
                            float32(n), float32(e),
                            float32(radians(angle)),
                            float32(min_dist**2),
                            float32(max_dist**2),
                            float32(width),
                            float32(in_weight),
                            float32(out_weight))
        #TODO ?
        cl.enqueue_read_buffer(cl_queue, self.prob_buf, self.probabilities).wait()

        #Normalize
        total_prob = numpy.sum( self.probabilities )
        self.probabilities  /= total_prob

        cl.enqueue_write_buffer(cl_queue, self.prob_buf, self.probabilities)

Esempio n. 37

0

Mostra file

File: openclpow.py Progetto: PeterSurda/PyBitmessage

def do_opencl_pow(hash_, target):
    """Perform PoW using OpenCL"""
    output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
    if not enabledGpus:
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order='C')
    data[0]['v'] = ("0000000000000000" + hash_).decode("hex")
    data[0]['target'] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    progress = 0
    globamt = worksize * 2000

    while output[0][0] == 0 and shutdown == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        try:
            cl.enqueue_read_buffer(queue, dest_buf, output)
        except AttributeError:
            cl.enqueue_copy(queue, output, dest_buf)
        queue.finish()
        progress += globamt
    if shutdown != 0:
        raise Exception("Interrupted")
#   logger.debug("Took %d tries.", progress)
    return output[0][0]

Esempio n. 38

0

Mostra file

File: clmandel.py Progetto: QuadmasterXLII/MandelBrotGL

 def execute(self, settings):
     self.program.mandel(self.queue, (self.c_real.shape[0], ), None,
                         self.real_buf, self.imag_buf, self.depth_buf,
                         self.dest_buf)
     counts = np.zeros(settings.dim**2, dtype=np.int32)
     cl.enqueue_read_buffer(self.queue, self.dest_buf, counts).wait()
     return counts.reshape([settings.dim, settings.dim])

Esempio n. 39

0

Mostra file

File: opencl.py Progetto: amiller/rtmodel

def reduce_flatrot():
  sums = np.empty((8,4),'f')  
  evt = program.float4_sum(queue, (64*8,), (64,), 
    reduce_buf, reduce_scratch, 
    qxdyqz_buf, np.int32(length))
  cl.enqueue_read_buffer(queue, reduce_buf, sums).wait()
  return sums.sum(0)

Esempio n. 40

0

Mostra file

File: sparkcl_tmp2.py Progetto: sleepnezz/sparkcl

def map1(data):

    SPARKCL_PLATFORM = os.environ['CL_PLATFORM']
    SPARKCL_DEVICE = os.environ['CL_DEVICE']
    print str(SPARKCL_PLATFORM)+":"+str(SPARKCL_DEVICE)
    KERNEL_CODE="""
        __kernel void ArraySum(__global float *A,__global float *B,__global float *C){
       int i = get_global_id(0);
       C[i] = A[i]+B[i];
}

    """

    cl_device=cl.get_platforms()[int(SPARKCL_PLATFORM)].get_devices()[int(SPARKCL_DEVICE)]
    ctx = cl.Context([cl_device])
    queue = cl.CommandQueue(ctx)
    prg = cl.Program(ctx, KERNEL_CODE).build()
    kernel = prg.ArraySum
    mf = cl.mem_flags
    print "map" + str(data)

    np_data = []
    data_buf = []
    np_data.append(np.array(data[0]).astype(np.float32))
    data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[0]))
    np_data.append(np.array(data[1]).astype(np.float32))
    data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[1]))

    result = np.zeros((5,)).astype(np.float32)
    result_buf = cl.Buffer(ctx, mf.WRITE_ONLY, result.nbytes)

    kernel(queue,(5,),None,data_buf[0],data_buf[1],result_buf)
    cl.enqueue_read_buffer(queue, result_buf, result).wait()
    return [result.astype(np.float32)]

Esempio n. 41

0

Mostra file

File: __init__.py Progetto: lid/phoenix-miner

 def mineThread(self):
     for data in self.qr:
         for i in range(data.iterations):
             self.kernel.search(
                 self.commandQueue, (data.size, ), (self.WORKSIZE, ),
                 data.state[0], data.state[1], data.state[2], data.state[3],
                 data.state[4], data.state[5], data.state[6], data.state[7],
                 data.state2[1], data.state2[2], data.state2[3],
                 data.state2[5], data.state2[6], data.state2[7],
                 data.base[i],
                 data.f[0],
                 data.f[1],data.f[2],
                 data.f[3],data.f[4],
                 self.output_buf)
             cl.enqueue_read_buffer(
                 self.commandQueue, self.output_buf, self.output)
             self.commandQueue.finish()
             
             # The OpenCL code will flag the last item in the output buffer when
             # it finds a valid nonce. If that's the case, send it to the main
             # thread for postprocessing and clean the buffer for the next pass.
             if self.output[self.OUTPUT_SIZE]:
                 reactor.callFromThread(self.postprocess, self.output.copy(),
                 data.nr)
         
                 self.output.fill(0)
                 cl.enqueue_write_buffer(
                     self.commandQueue, self.output_buf, self.output)

Esempio n. 42

0

Mostra file

 def plotCurrentMembraneCoordinates(self):
     cl.enqueue_read_buffer(self.queue, self.dev_membraneCoordinatesX.data,
                            self.host_membraneCoordinatesX).wait()
     cl.enqueue_read_buffer(self.queue, self.dev_membraneCoordinatesY.data,
                            self.host_membraneCoordinatesY).wait()
     plt.plot(self.host_membraneCoordinatesX,
              self.host_membraneCoordinatesY)

Esempio n. 43

0

Mostra file

File: CL.py Progetto: naojsoft/ginga

    def resize_uint32(self, data_np, scale_x, scale_y, out=None):

        height, width = data_np.shape[:2]

        new_ht = int(height * scale_y)
        new_wd = int(width * scale_x)
        new_shape = [new_ht, new_wd] + list(data_np.shape[2:])

        mf = cl.mem_flags

        #create OpenCL buffers on devices
        data_np = np.ascontiguousarray(data_np)
        src_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
                            hostbuf=data_np)

        num_bytes = new_ht * new_wd * np.uint32(0).nbytes
        dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, num_bytes)

        evt = self.program.image_resize_uint32(self.queue, [new_ht, new_wd], None,
                                               src_buf, dst_buf,
                                               np.int32(width), np.int32(new_wd),
                                               np.float64(scale_x), np.float64(scale_y))

        if out is None:
            out = np.empty(new_shape, dtype=data_np.dtype)
        cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()

        return out

Esempio n. 44

0

Mostra file

File: pyopencl_imp.py Progetto: thusodangersimon/lomb_scargle

def lombscarge_opencl(x, y, f):
    # start up gpu
    x = np.float64(x)
    y = np.float64(y)
    f = np.float64(f)
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mf = cl.mem_flags
    # make max arrays
    Nx, Nf = np.int32(x.shape[0]), np.int32(f.shape[0])
    # send data to card
    x_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x)
    y_g = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=y)
    f_g = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=f)
    # make output
    pgram = np.empty_like(f)
    pgram_g = cl.Buffer(ctx, mf.WRITE_ONLY, pgram.nbytes)
    prg = cl.Program(ctx, lomb_txt)

    try:
        prg.build()
    except:
        print("Error:")
        print(prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG))
        raise

    prg.lombscargle(queue, pgram.shape, None, x_g, y_g, f_g, pgram_g, Nx)
    cl.enqueue_read_buffer(queue, pgram_g, pgram)

    return pgram

Esempio n. 45

0

Mostra file

File: test_opencl_benchmark_fp64_transmath.py Progetto: GongYiLiao/Python_Daily

def test_opencl_0(zz, a, b, c_result):
 
    for platform in cl.get_platforms():
        for device in [platform.get_devices()[1]]:
            print("===============================================================")
            print("Platform name:", platform.name)
            print("Platform profile:", platform.profile)
            print("Platform vendor:", platform.vendor)
            print("Platform version:", platform.version)
            print("---------------------------------------------------------------")
            print("Device name:", device.name)
            print("Device type:", cl.device_type.to_string(device.type))
            print("Device memory: ", device.global_mem_size//1024//1024, 'MB')
            print("Device max clock speed:", device.max_clock_frequency, 'MHz')
            print("Device compute units:", device.max_compute_units)

        # Simnple speed test
            ctx = cl.Context([device])
            queue = cl.CommandQueue(ctx, 
                                    properties=cl.command_queue_properties.PROFILING_ENABLE)

            mf = cl.mem_flags
            a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
            b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
            dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)

            prg = cl.Program(ctx, """
                __kernel void sum(__global const double *a,
                __global const double *b, __global double *c)
                {
                            int loop;
                            int gid = get_global_id(0);
                            for(loop=0; loop<%s;loop++)
                            {
                                    c[gid] = a[gid] + b[gid];
                                    c[gid] = c[gid] * (a[gid] + b[gid]);
                                    c[gid] = c[gid] * (a[gid] / 2);
                                    c[gid] = log(exp(c[gid]));
                            }
                }
            """ % (zz)).build()

            exec_evt = prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
            exec_evt.wait()
            elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start)

            print("Execution time of test: %g s" % elapsed)

            c = numpy.empty_like(a)
            cl.enqueue_read_buffer(queue, dest_buf, c).wait()
            error = 0
            for i in range(zz):
                if c[i] != c_result[i]:
                        print("c_i: ", c[i], " c_results_i: ", c_result[i]) 
                        print("diff: ", numpy.abs(c[i] - c_result[i]))
                        error = 1
            if error:
                print("Results doesn't match!!")
            else:
                print("Results OK")

Esempio n. 46

0

Mostra file

File: lomb_scargle.py Progetto: drdangersimon/seismo

def lomb_scargle32(x, y, f):
    '''single percesion version of lomb-scargle'''
    x = np.float32(x)
    y = np.float32(y)
    f = np.float32(f)
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    mf = cl.mem_flags
    # make max arrays
    Nx, Nf = np.int32(x.shape[0]), np.int32(f.shape[0])
    # send data to card
    x_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x)
    y_g = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=y)
    f_g = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=f)
    # make output
    pgram = np.empty_like(f)
    pgram_g = cl.Buffer(ctx, mf.WRITE_ONLY, pgram.nbytes)
    prg = cl.Program(ctx, lomb_txt32)

    try:
        prg.build()
    except:
        # 
        print("Error:")
        print(prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG))
        raise

    prg.lombscargle(queue, pgram.shape, None, x_g, y_g, f_g, pgram_g, Nx)
    cl.enqueue_read_buffer(queue, pgram_g, pgram)

    return pgram

Esempio n. 47

0

Mostra file

File: fdtd3d_gpu_cpu_non-pinned.py Progetto: wbkifun/fdtd_accelerate

	def exchange_boundary_e(s):
		for queue, eh_fields, tmpf in zip(s.queues, s.eh_fields_gpus, s.tmpfs)[1:]:
			cl.enqueue_read_buffer(queue, eh_fields[1], tmpf[0])	# ey_gpu
			cl.enqueue_read_buffer(queue, eh_fields[2], tmpf[1])	# ez_gpu
		for queue, eh_fields, tmpf, offset in zip(s.queues[:-1], s.eh_fields_gpus[:-1], s.tmpfs[1:], s.offsets[:-1]):
			cl.enqueue_write_buffer(queue, eh_fields[1], tmpf[0], offset)
			cl.enqueue_write_buffer(queue, eh_fields[2], tmpf[1], offset)

Esempio n. 48

0

Mostra file

File: opencl-mm.py Progetto: vdragsic/opencl-parallelization

    def execute(self):
        """
        Runs test openCL kernel and returns elapsed time.
        """

        kernel = self.LoadKernelSrc(self.src)

        # build opencl kernel
        prg = cl.Program(self.ctx, kernel).build()

        exec_evt = prg.matrix_mul(
            self.queue,
            (
                self.m,
                self.p,
            ),
            self.A_buf,
            self.B_buf,
            self.C_buf,
            np.uint32(self.m),
            np.uint32(self.n),
            np.uint32(self.p),
            local_size=(
                self.block,
                self.block,
            ),
        ).wait()

        # read result from opencl buffer
        cl.enqueue_read_buffer(self.queue, self.C_buf, self.C).wait()

        # return elapsed time in seconds
        return 1e-9 * (exec_evt.profile.end - exec_evt.profile.start)

Esempio n. 49

0

Mostra file

File: openclpow.py Progetto: Basti1993/PyBitmessage

def do_opencl_pow(hash, target):
    global ctx, queue, program, gpus, hash_dt

    output = numpy.zeros(1, dtype=[("v", numpy.uint64, 1)])
    if ctx == False:
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order="C")
    data[0]["v"] = ("0000000000000000" + hash).decode("hex")
    data[0]["target"] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, gpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    start = time.time()
    progress = 0
    globamt = worksize * 2000

    while output[0][0] == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        cl.enqueue_read_buffer(queue, dest_buf, output)
        queue.finish()
        progress += globamt
        sofar = time.time() - start
    # 		logger.debug("Working for %.3fs, %.2f Mh/s", sofar, (progress / sofar) / 1000000)
    taken = time.time() - start
    # 	logger.debug("Took %d tries.", progress)
    return output[0][0]

Esempio n. 50

0

Mostra file

File: visualangle.py Progetto: RyanHope/gazetools_cl

 def __call__(self, ctx, x, y, rx, ry, sw, sh, ez, ex, ey):
     self.build(ctx)
     x = np.array(x, dtype=np.float32, copy=False)
     y = np.array(y, dtype=np.float32, copy=False)
     ez = np.array(ez, dtype=np.float32, copy=False)
     ex = np.array(ex, dtype=np.float32, copy=False)
     ey = np.array(ey, dtype=np.float32, copy=False)
     rx = np.float32(rx)
     ry = np.float32(ry)
     sw = np.float32(sw)
     sh = np.float32(sh)
     x_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x)
     y_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y)
     ez_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ez)
     ex_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ex)
     ey_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ey)
     out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, x.nbytes)
     queue = cl.CommandQueue(self.ctx)
     self.prg.distance_2_point(queue, x.shape, None, x_buf, y_buf, rx, ry, sw, sh, ez_buf, ex_buf, ey_buf, out_buf)
     out = np.empty_like(x)
     cl.enqueue_read_buffer(queue, out_buf, out).wait()
     x_buf.release()
     y_buf.release()
     ez_buf.release()
     ex_buf.release()
     ey_buf.release()
     out_buf.release()
     return out

Esempio n. 51

0

Mostra file

File: ode45.py Progetto: ezetl/Ligo_RungeKutta_OpenCL

 def copy_array(self, arr_like, arr_device):
     """
     This copy an array from device to host and returns it.
     """
     c = np.empty_like(arr_like)
     cl.enqueue_read_buffer(self.queue, arr_device, c).wait()
     return c

Esempio n. 52

0

Mostra file

File: visualangle.py Progetto: RyanHope/gazetools_cl

 def __call__(self, ctx, x1, y1, x2, y2, rx, ry, sw, sh, ez, ex, ey):
     self.build(ctx)
     x1 = np.array(x1, dtype=np.float32, copy=False)
     y1 = np.array(y1, dtype=np.float32, copy=False)
     x2 = np.array(x2, dtype=np.float32, copy=False)
     y2 = np.array(y2, dtype=np.float32, copy=False)
     ez = np.array(ez, dtype=np.float32, copy=False)
     ex = np.array(ex, dtype=np.float32, copy=False)
     ey = np.array(ey, dtype=np.float32, copy=False)
     rx = np.float32(rx)
     ry = np.float32(ry)
     sw = np.float32(sw)
     sh = np.float32(sh)
     x1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x1)
     y1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y1)
     x2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x2)
     y2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y2)
     ez_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ez)
     ex_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ex)
     ey_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ey)
     out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, x1.nbytes)
     queue = cl.CommandQueue(self.ctx)
     self.prg.subtended_angle_naive(queue, x1.shape, None, x1_buf, y1_buf, x2_buf, y2_buf, rx, ry, sw, sh, ez_buf, ex_buf, ey_buf, out_buf)
     out = np.empty_like(x1)
     cl.enqueue_read_buffer(queue, out_buf, out).wait()
     x1_buf.release()
     y1_buf.release()
     x2_buf.release()
     y2_buf.release()
     ez_buf.release()
     ex_buf.release()
     ey_buf.release()
     out_buf.release()
     return out

Esempio n. 53

0

Mostra file

    def mineThread(self):
        for data in self.qr:
            for i in range(data.iterations):
                self.kernel.search(
                    self.commandQueue, (data.size, ), (self.WORKSIZE, ),
                    data.state[0], data.state[1], data.state[2], data.state[3],
                    data.state[4], data.state[5], data.state[6], data.state[7],
                    data.state2[1], data.state2[2], data.state2[3],
                    data.state2[5], data.state2[6], data.state2[7],
                    data.base[i],
                    data.f[1],data.f[2],
                    data.f[3],data.f[4],
                    data.f[5],data.f[6],
                    data.f[7],data.f[8],
                    self.output_buf)
                cl.enqueue_read_buffer(
                    self.commandQueue, self.output_buf, self.output)
                self.commandQueue.finish()

                # The OpenCL code will flag the last item in the output buffer
                # when it finds a valid nonce. If that's the case, send it to
                # the main thread for postprocessing and clean the buffer
                # for the next pass.
                if self.output[self.OUTPUT_SIZE]:
                    reactor.callFromThread(self.postprocess,
                    self.output.copy(), data.nr)

                    self.output.fill(0)
                    cl.enqueue_write_buffer(
                        self.commandQueue, self.output_buf, self.output)

Esempio n. 54

0

Mostra file

File: cl.py Progetto: fjarri-attic/beclab

	def fromDevice(self, buf, shape=None):
		if shape is None:
			shape = buf.shape

		cpu_buf = numpy.empty(shape, dtype=buf.dtype)
		cl.enqueue_read_buffer(self.queue, buf, cpu_buf).wait()
		return cpu_buf

Esempio n. 55

0

Mostra file

File: __init__.py Progetto: BlackhatEspeed/phoenix

    def mineThread(self):
        for data in self.qr:
            for i in range(data.iterations):
                offset = (unpack('I', data.base[i])[0],) if self.GOFFSET else None
                self.kernel.search(
                    self.commandQueue, (data.size, ), (self.WORKSIZE, ),
                    data.state[0], data.state[1], data.state[2], data.state[3],
                    data.state[4], data.state[5], data.state[6], data.state[7],
                    data.state2[1], data.state2[2], data.state2[3],
                    data.state2[5], data.state2[6], data.state2[7],
                    data.base[i],
                    data.f[0], data.f[1], data.f[2], data.f[3],
                    data.f[4], data.f[5], data.f[6], data.f[7],
                    self.output_buf, global_offset=offset)
                cl.enqueue_read_buffer(self.commandQueue, self.output_buf,
                                       self.output, is_blocking=False)
                self.commandQueue.finish()

                # The OpenCL code will flag the last item in the output buffer
                # when it finds a valid nonce. If that's the case, send it to
                # the main thread for postprocessing and clean the buffer
                # for the next pass.
                if self.output[self.WORKSIZE]:
                    reactor.callFromThread(self.postprocess,
                    self.output.copy(), data.nr)

                    self.output.fill(0)
                    cl.enqueue_write_buffer(self.commandQueue, self.output_buf,
                                            self.output, is_blocking=False)

Esempio n. 56

0

Mostra file

    def transform_uint32(self,
                         data_np,
                         flip_x=False,
                         flip_y=False,
                         swap_xy=False,
                         out=None):

        height, width = data_np.shape[:2]

        new_ht, new_wd = height, width
        if swap_xy:
            new_ht, new_wd = width, height
        new_size = [new_ht, new_wd] + list(data_np.shape[2:])

        mf = cl.mem_flags

        #create OpenCL buffers on devices
        data_np = np.ascontiguousarray(data_np)
        src_buf = cl.Buffer(self.ctx,
                            mf.READ_ONLY | mf.COPY_HOST_PTR,
                            hostbuf=data_np)
        dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, data_np.nbytes)

        self.program.image_transform_uint32(self.queue, [height, width],
                                            None, src_buf, dst_buf,
                                            np.int32(width), np.int32(height),
                                            np.int32(flip_x), np.int32(flip_y),
                                            np.int32(swap_xy))

        if out is None:
            out = np.empty_like(data_np).reshape(new_size)
        cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()

        return out

Esempio n. 57

0

Mostra file

File: pyopencl-test.py Progetto: jlbribeiro/dei-hpc

 def execute(self):
     self.program.part1(self.queue, self.a.shape, None, self.a_buf, self.b_buf, self.dest_buf)
     c = numpy.empty_like(self.a)
     cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
     print "a", self.a
     print "b", self.b
     print "c", c

Esempio n. 58

0

Mostra file

    def resize_uint32(self, data_np, scale_x, scale_y, out=None):

        height, width = data_np.shape[:2]

        new_ht = int(height * scale_y)
        new_wd = int(width * scale_x)
        new_shape = [new_ht, new_wd] + list(data_np.shape[2:])

        mf = cl.mem_flags

        #create OpenCL buffers on devices
        data_np = np.ascontiguousarray(data_np)
        src_buf = cl.Buffer(self.ctx,
                            mf.READ_ONLY | mf.COPY_HOST_PTR,
                            hostbuf=data_np)

        num_bytes = new_ht * new_wd * np.uint32(0).nbytes
        dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, num_bytes)

        self.program.image_resize_uint32(self.queue, [new_ht, new_wd], None,
                                         src_buf, dst_buf, np.int32(width),
                                         np.int32(new_wd), np.float64(scale_x),
                                         np.float64(scale_y))

        if out is None:
            out = np.empty(new_shape, dtype=data_np.dtype)
        cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()

        return out

Esempio n. 59

0

Mostra file

File: camera2.py Progetto: alexandrustaetu/ai

	def prepare_environment(self, filename,camera_index):
		
		#build kernel for videocard
		kernel_file = open(filename, 'r')
		kernel_string = "".join(kernel_file.readlines())
		self.program = cl.Program(self.gpu_context, kernel_string).build()
		#get frames from the webcam
		self.stream = cv.CaptureFromCAM(camera_index)
		self.line_cols = cv.GetMat(cv.QueryFrame(self.stream)).cols
		while True:
			self.frame = cv.QueryFrame(self.stream)
			self.frame = cv.GetMat(self.frame)
			self.image_data = numpy.asarray(self.frame)
			self.image_data = numpy.array(self.image_data, dtype=numpy.int32)
			final = numpy.zeros(shape=(self.image_data.shape))
			for position,line in enumerate(self.image_data):
				if position == 0:
					continue
				if position == self.image_data.shape[0]-1:
					continue
				line = line.ravel()
				self.line_buffer = cl.Buffer(self.gpu_context, self.memory_flags.READ_ONLY | self.memory_flags.COPY_HOST_PTR, hostbuf=line)
				self.top_line_buffer = cl.Buffer(self.gpu_context, self.memory_flags.READ_ONLY | self.memory_flags.COPY_HOST_PTR, hostbuf=self.image_data[position-1])
				self.bottom_line_buffer = cl.Buffer(self.gpu_context, self.memory_flags.READ_ONLY | self.memory_flags.COPY_HOST_PTR, hostbuf=self.image_data[position+1])
				self.contour_buffer = cl.Buffer(self.gpu_context, self.memory_flags.WRITE_ONLY, line.nbytes)
				self.program.calculate_differences(self.command_queue, line.shape, None,self.top_line_buffer,self.line_buffer,self.bottom_line_buffer, self.contour_buffer)
				contour = numpy.empty_like(line)
				cl.enqueue_read_buffer(self.command_queue, self.contour_buffer, contour).wait()
				line = contour.reshape(self.line_cols,3)
				final[position] = line
			img = numpy.uint8(final)
			img = cv.fromarray(img)
			cv.ShowImage("camera_window", img)
			if cv.WaitKey(10) == 27:
				breakcv.DestroyWindow("camera_window")

Esempio n. 60

0

Mostra file

File: old_gazetools.py Progetto: RyanHope/gazetools_cl

 def subtended_angle(self, x1, y1, x2, y2, rx, ry, sw, sh, ez, ex, ey):
     x1 = np.array(x1, dtype=np.float32, copy=False)
     y1 = np.array(y1, dtype=np.float32, copy=False)
     x2 = np.array(x2, dtype=np.float32, copy=False)
     y2 = np.array(y2, dtype=np.float32, copy=False)
     ez = np.array(ez, dtype=np.float32, copy=False)
     ex = np.array(ex, dtype=np.float32, copy=False)
     ey = np.array(ey, dtype=np.float32, copy=False)
     rx = np.float32(rx)
     ry = np.float32(ry)
     sw = np.float32(sw)
     sh = np.float32(sh)
     x1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x1)
     y1_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y1)
     x2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=x2)
     y2_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=y2)
     ez_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ez)
     ex_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ex)
     ey_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=ey)
     out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, x1.nbytes)
     self.cl_subtended_angle(self.queue, x1.shape, None, x1_buf, y1_buf, x2_buf, y2_buf, rx, ry, sw, sh, ez_buf, ex_buf, ey_buf, out_buf)
     self.queue.finish()
     out = np.empty_like(x1)
     cl.enqueue_read_buffer(self.queue, out_buf, out)
     return out