def setup_opencl(data, cube_size):
    import pycl

    blocking = True

    with timeify("Making context, loading kernel"):
        devices = pycl.clGetDeviceIDs()
        ctx = pycl.clCreateContext(devices=devices)
        queue = pycl.clCreateCommandQueue(ctx)

        program = pycl.clCreateProgramWithSource(ctx, SOURCE).build()

        score_matrix = program['score_matrix_to_rms']
        score_matrix.argtypes = (pycl.cl_mem, pycl.cl_mem, pycl.cl_mem,
                                 pycl.cl_mem, pycl.cl_mem, pycl.cl_int,
                                 pycl.cl_int)

    sub_divisions = cube_size**3

    with timeify("Creating buffers"):
        in_r_buf, in_evt1 = pycl.buffer_from_pyarray(queue,
                                                     data['in_r'],
                                                     blocking=blocking)
        in_g_buf, in_evt2 = pycl.buffer_from_pyarray(queue,
                                                     data['in_g'],
                                                     blocking=blocking)
        in_b_buf, in_evt3 = pycl.buffer_from_pyarray(queue,
                                                     data['in_b'],
                                                     blocking=blocking)

        out_r = data['out_r']
        out_r_buf, in_evt4 = pycl.buffer_from_pyarray(queue,
                                                      out_r,
                                                      blocking=blocking)

        score = array.array('f', [0 for x in range(sub_divisions)])
        score_buf, in_evt5 = pycl.buffer_from_pyarray(queue,
                                                      score,
                                                      blocking=blocking)

    with timeify("Run kernel r"):
        run_evt = score_matrix(
            #in_r_buf, in_g_buf, in_b_buf, out_r_buf, score_buf,
            in_r_buf,
            in_g_buf,
            in_b_buf,
            in_r_buf,
            score_buf,
            len(data['in_r']),
            cube_size,
            wait_for=[in_evt1, in_evt2, in_evt3, in_evt4,
                      in_evt5]).on(queue, sub_divisions)

    with timeify("Retrive data"):
        score_from_gpu, evt = pycl.buffer_to_pyarray(queue,
                                                     score_buf,
                                                     wait_for=run_evt,
                                                     like=score)

    return score_from_gpu
Example #2
0
def run_conv( prog, queue ):
    func = prog['conv__num_imgs_20__in_pad_3__in_dim_0_227__in_dim_1_227__conv_has_relu_1__kern_sz_7__stride_2__out_chans_64__in_chans_3']
    print func
    func.argtypes = (cl.cl_mem, cl.cl_mem, cl.cl_mem, cl.cl_mem)
    in_ar = cl.array('f', range(100 * 1000 * 1000))
    in_buf, in_evt = cl.buffer_from_pyarray(queue, in_ar, blocking=False)
    filt_buf = in_buf.empty_like_this()
    bias_buf = in_buf.empty_like_this()
    out_buf = in_buf.empty_like_this()
    #run_evt = func(filt_buf, bias_buf, in_buf, out_buf).on(queue, gsize=(63,), lsize=(32,), wait_for=in_evt)
    func.setarg( 0, filt_buf )
    func.setarg( 1, bias_buf )
    func.setarg( 2, in_buf )
    func.setarg( 3, out_buf )
    run_evt = cl.clEnqueueNDRangeKernel( queue, func, gsize=(120*2166,), lsize=(120,), wait_for=in_evt)
    out, evt = cl.buffer_to_pyarray(queue, out_buf, wait_for=run_evt, like=in_ar)

    print "start wait"
    evt.wait()
    print "end wait, start loop"
    for i in range(1000):
        run_evt = cl.clEnqueueNDRangeKernel( queue, func, gsize=(120*2166,), lsize=(120,) )
        run_evt.wait()
    print "end loop"
    print out[0:10]
Example #3
0
def run_conv(prog, queue):
    func = prog[
        "conv__num_imgs_20__in_pad_3__in_dim_0_227__in_dim_1_227__conv_has_relu_1__kern_sz_7__stride_2__out_chans_64__in_chans_3"
    ]
    print func
    func.argtypes = (cl.cl_mem, cl.cl_mem, cl.cl_mem, cl.cl_mem)
    in_ar = cl.array("f", range(100 * 1000 * 1000))
    in_buf, in_evt = cl.buffer_from_pyarray(queue, in_ar, blocking=False)
    filt_buf = in_buf.empty_like_this()
    bias_buf = in_buf.empty_like_this()
    out_buf = in_buf.empty_like_this()
    # run_evt = func(filt_buf, bias_buf, in_buf, out_buf).on(queue, gsize=(63,), lsize=(32,), wait_for=in_evt)
    func.setarg(0, filt_buf)
    func.setarg(1, bias_buf)
    func.setarg(2, in_buf)
    func.setarg(3, out_buf)
    run_evt = cl.clEnqueueNDRangeKernel(queue, func, gsize=(120 * 2166,), lsize=(120,), wait_for=in_evt)
    out, evt = cl.buffer_to_pyarray(queue, out_buf, wait_for=run_evt, like=in_ar)

    print "start wait"
    evt.wait()
    print "end wait, start loop"
    for i in range(1000):
        run_evt = cl.clEnqueueNDRangeKernel(queue, func, gsize=(120 * 2166,), lsize=(120,))
        run_evt.wait()
    print "end loop"
    print out[0:10]
def setup_opencl(data, cube_size):
    import pycl

    blocking = True

    with timeify("Making context, loading kernel"):
        devices = pycl.clGetDeviceIDs()
        ctx = pycl.clCreateContext(devices = devices)
        queue = pycl.clCreateCommandQueue(ctx)

        program = pycl.clCreateProgramWithSource(ctx, SOURCE).build()

        score_matrix = program['score_matrix_to_rms']
        score_matrix.argtypes = (pycl.cl_mem, pycl.cl_mem, pycl.cl_mem,
                                 pycl.cl_mem, pycl.cl_mem, pycl.cl_int, pycl.cl_int)

    sub_divisions = cube_size**3

    with timeify("Creating buffers"):
        in_r_buf, in_evt1 = pycl.buffer_from_pyarray(queue, data['in_r'], blocking = blocking)
        in_g_buf, in_evt2 = pycl.buffer_from_pyarray(queue, data['in_g'], blocking = blocking)
        in_b_buf, in_evt3 = pycl.buffer_from_pyarray(queue, data['in_b'], blocking = blocking)

        out_r = data['out_r']
        out_r_buf, in_evt4 = pycl.buffer_from_pyarray(queue, out_r, blocking = blocking)

        score = array.array('f', [0 for x in range(sub_divisions)])
        score_buf, in_evt5 = pycl.buffer_from_pyarray(queue, score, blocking = blocking)


    with timeify("Run kernel r"):
        run_evt = score_matrix(
            #in_r_buf, in_g_buf, in_b_buf, out_r_buf, score_buf,
            in_r_buf, in_g_buf, in_b_buf, in_r_buf, score_buf,
            len(data['in_r']), cube_size,
            wait_for = [in_evt1, in_evt2, in_evt3, in_evt4, in_evt5]).on(queue,
                                                                         sub_divisions)

    with timeify("Retrive data"):
        score_from_gpu, evt = pycl.buffer_to_pyarray(queue, score_buf,
                                                     wait_for=run_evt,
                                                     like=score)

    return score_from_gpu
Example #5
0
def run_mxplusb( prog, queue ):
    func = prog['mxplusb']
    print func
    func.argtypes = (cl.cl_float, cl.cl_mem, cl.cl_float, cl.cl_mem)
    x = cl.array('f', range(100))
    x_buf, in_evt = cl.buffer_from_pyarray(queue, x, blocking=False)
    y_buf = x_buf.empty_like_this()
    run_evt = func(2, x_buf, 5, y_buf).on(queue, len(x), wait_for=in_evt)
    y, evt = cl.buffer_to_pyarray(queue, y_buf, wait_for=run_evt, like=x)
    evt.wait()
    print y[0:10]
Example #6
0
def run_mxplusb(prog, queue):
    func = prog["mxplusb"]
    print func
    func.argtypes = (cl.cl_float, cl.cl_mem, cl.cl_float, cl.cl_mem)
    x = cl.array("f", range(100))
    x_buf, in_evt = cl.buffer_from_pyarray(queue, x, blocking=False)
    y_buf = x_buf.empty_like_this()
    run_evt = func(2, x_buf, 5, y_buf).on(queue, len(x), wait_for=in_evt)
    y, evt = cl.buffer_to_pyarray(queue, y_buf, wait_for=run_evt, like=x)
    evt.wait()
    print y[0:10]