import VkInline as vki import numpy as np darr = vki.device_vector_from_list(range(1,1025), 'int') BLOCK_SIZE = 256 kernel = vki.Computer(['dst', 'src', 'n'], ''' shared {0} s_buf[{1}]; void main() {{ uint tid = gl_LocalInvocationID.x; uint i = gl_GlobalInvocationID.x; if (i<n) s_buf[tid] = get_value(src, i); barrier(); for (uint s = {1}/2; s>0; s>>=1) {{ if (tid < s && i+s<n) s_buf[tid] += s_buf[tid + s]; barrier(); }} if (tid==0) set_value(dst, gl_WorkGroupID.x, s_buf[tid]); }} '''.format('int',str(BLOCK_SIZE))) dst = darr while dst.size()>1: src = dst n = src.size() blocks = int((n + BLOCK_SIZE - 1) / BLOCK_SIZE) dst = vki.SVVector("int", blocks)
sky_cube = np.array(Image.open('cubemap.png').convert('RGBA')) gpu_sky_cube = vki.Cubemap(512, 512, VK_FORMAT_R8G8B8A8_SRGB) gpu_sky_cube.upload(sky_cube) sky = fri.TexturedSky(0) kernel = vki.Computer(['camera', 'sky'], ''' void main() { int x = int(gl_GlobalInvocationID.x); int y = int(gl_GlobalInvocationID.y); if (x >= camera.film.width || y>=camera.film.height) return; float fx = float(x)+0.5; float fy = float(y)+0.5; vec3 origin, dir; generate_ray(camera, fx, fy, 0.0, 0.0, origin, dir); Spectrum col = get_sky_color(sky, dir); incr_pixel(camera.film, x, y, col); } ''') blockSize = (8,8) gridSize = (int((width+7)/8), int((height+7)/8)) kernel.launch(gridSize, blockSize, [camera, sky], cubemaps = [gpu_sky_cube]) camera.m_film.inc_times_exposure()
class RNGInitializer(vki.ShaderViewable): def __init__(self): xorwow_data = np.fromfile(os.path.dirname(__file__) + '/' + 'xor_wow_data.bin', dtype=np.uint32) self.d_xorwow_data = vki.device_vector_from_numpy(xorwow_data) self.m_cptr = SVCombine_Create({'data': self.d_xorwow_data}, ''' void matvec_i(int i, uint v_i, in Comb_#hash# initializer, int offset, inout V5 result) { for (int j = 0; j < 32; j++) if ((v_i & (1 << j))!=0) { int k = (i * 32 + j)*5 + offset; result.v0 ^= get_value(initializer.data, k); result.v1 ^= get_value(initializer.data, k + 1); result.v2 ^= get_value(initializer.data, k + 2); result.v3 ^= get_value(initializer.data, k + 3); result.v4 ^= get_value(initializer.data, k + 4); } } void matvec(in V5 vector, in Comb_#hash# initializer, int offset, inout V5 result) { result.v0 = result.v1 = result.v2 = result.v3 = result.v4 = 0; matvec_i(0, vector.v0, initializer, offset, result); matvec_i(1, vector.v1, initializer, offset, result); matvec_i(2, vector.v2, initializer, offset, result); matvec_i(3, vector.v3, initializer, offset, result); matvec_i(4, vector.v4, initializer, offset, result); } void state_init(in Comb_#hash# initializer, uint64_t seed, uint64_t subsequence, inout RNGState state) { if (subsequence>= (1<<18) ) subsequence= (1<<18) -1; uint s0 = uint(seed) ^ 0xaad26b49U; uint s1 = uint(seed >> 32) ^ 0xf7dcefddU; uint t0 = 1099087573U * s0; uint t1 = 2591861531U * s1; state.d = 6615241 + t1 + t0; state.v.v0 = 123456789U + t0; state.v.v1 = 362436069U ^ t0; state.v.v2 = 521288629U + t1; state.v.v3 = 88675123U ^ t1; state.v.v4 = 5783321U + t0; // apply sequence matrix V5 result; uint64_t p = subsequence; int i_mat = 0; while (p!=0 && i_mat<7) { for (uint t = 0; t < (p & 3); t++) { matvec(state.v, initializer, i_mat*800, result); state.v = result; } p >>= 2; i_mat++; } for (uint t = 0; t < (p & 0xF); t++) { matvec(state.v, initializer, i_mat*800, result); state.v = result; } } ''') rand_init = vki.Computer(['initializer', 'arr_out'], ''' void main() { uint id = gl_GlobalInvocationID.x; if (id >= get_size(arr_out)) return; RNGState state; state_init(initializer, 1234, uint64_t(id), state); set_value(arr_out, id, state); } ''', type_locked=True) def InitRNGVector(self, arr_out): blocks = int((arr_out.size() + 127) / 128) self.rand_init.launch(blocks, 128, [self, arr_out])
# interface with numpy harr = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype='float32') darr = vki.device_vector_from_numpy(harr) print(darr.to_host()) # GLSL data type print(darr.name_view_type()) harr2 = np.array([6, 7, 8, 9, 10], dtype='int32') darr2 = vki.device_vector_from_numpy(harr2) # kernel with auto parameters, launched twice with different types kernel = vki.Computer(['arr_in', 'arr_out', 'k'], ''' void main() { uint id = gl_GlobalInvocationID.x; if (id >= get_size(arr_in)) return; set_value(arr_out, id, get_value(arr_in, id)*k); } ''') darr_out = vki.SVVector('float', 5) kernel.launch(1, 128, [darr, darr_out, vki.SVFloat(10.0)]) print(darr_out.to_host()) darr_out = vki.SVVector('int', 5) kernel.launch(1, 128, [darr2, darr_out, vki.SVInt32(5)]) print(darr_out.to_host()) # create a vector from python list with GLSL type specified darr3 = vki.device_vector_from_list([3.0, 5.0, 7.0, 9.0, 11.0], 'float') print(darr3.to_host())
width = image_in.shape[1] height = image_in.shape[0] tex2d = vki.Texture2D(width, height, VK_FORMAT_R8G8B8A8_SRGB) tex2d.upload(image_in) darr = vki.SVVector('vec4', width * height) kernel = vki.Computer(['width', 'height', 'arr'], ''' void main() { uint x = gl_GlobalInvocationID.x; uint y = gl_GlobalInvocationID.y; if (x >= width || y>=height) return; float u = (float(x)+0.5)/float(width); float v = (float(y)+0.5)/float(height); vec4 rgba = texture(arr_tex2d[0], vec2(u,v)); set_value(arr, x+y*width, rgba); } ''') blockSize = (8, 8) gridSize = (int((width + 7) / 8), int((height + 7) / 8)) kernel.launch( gridSize, blockSize, [vki.SVInt32(width), vki.SVInt32(height), darr],
import FeiRaysInline as fri from PIL import Image width = 640 height = 480 film = fri.Film(width, height) kernel = vki.Computer(['film'], ''' void main() { int x = int(gl_GlobalInvocationID.x); int y = int(gl_GlobalInvocationID.y); if (x >= film.width || y>=film.height) return; float u = (float(x)+0.5)/float(film.width); float v = (float(y)+0.5)/float(film.height); Spectrum color; from_rgb(color, vec3(u, v, 0.0)); incr_pixel(film, x, y, color); } ''') blockSize = (8, 8) gridSize = (int((width + 7) / 8), int((height + 7) / 8)) kernel.launch(gridSize, blockSize, [film]) film.inc_times_exposure() img_out = film.download_srgb() Image.fromarray(img_out, 'RGBA').save('output.png')