def test_mix(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(vec4)), data2: ("buffer", 1, Array(vec4)), ): index = index_xyz.x v = data1[index] v1 = mix(v.x, v.y, v.z) v2 = mix(vec2(v.x, v.x), vec2(v.y, v.y), v.z) data2[index] = vec4(v1, v2.x, v2.y, 0.0) skip_if_no_wgpu() values1 = [-4, -3, -2, -1, +0, +0, +1, +2, +3, +4] values2 = [-2, -5, -5, +2, +2, -1, +3, +1, +1, -6] weights = [0.1 * i for i in range(10)] stubs = [0] * 10 values = sum(zip(values1, values2, weights, stubs), ()) inp_arrays = {0: (ctypes.c_float * 40)(*values)} out_arrays = {1: ctypes.c_float * 40} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=10) res = list(out[1]) ref = [ values1[i] * (1 - w) + values2[i] * w for i, w in enumerate(weights) ] assert iters_close(res[0::4], ref) assert iters_close(res[1::4], ref) assert iters_close(res[2::4], ref)
def test_pow(): # note hat a**2 is converted to a*a and a**0.5 to sqrt(a) @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(f32)), data2: ("buffer", 1, Array(vec4)), ): index = index_xyz.x a = data1[index] data2[index] = vec4(a**2, a**0.5, a**3.0, a**3.1) skip_if_no_wgpu() values1 = [i - 5 for i in range(10)] inp_arrays = {0: (ctypes.c_float * 10)(*values1)} out_arrays = {1: ctypes.c_float * 40} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) res = list(out[1]) assert res[0::4] == [i**2 for i in values1] assert iters_close(res[1::4], [i**0.5 for i in values1]) assert res[2::4] == [i**3 for i in values1] assert iters_close(res[3::4], [i**3.1 for i in values1])
def test_tuple_unpacking2(): # Python implementations deal with tuple packing/unpacking differently. # Python 3.8+ has rot_four, pypy3 resolves by changing the order of the # store ops in the bytecode itself, and seems to even ditch unused variables. @python2shader_and_validate_nochecks def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data2: ("buffer", 1, "Array(vec2)"), ): i = f32(index.x) a, b = 1.0, 2.0 # Cover Python storing this as a tuple const c, d = a + i, b + 1.0 c, d = d, c c += 100.0 c, d = d, c c += 200.0 c, d, _ = c, d, 0.0 # 3-tuple c, d, _, _ = c, d, 0.0, 0.0 # 4-tuple c, d, _, _, _ = c, d, 0.0, 0.0, 0.0 # 5-tuple data2[index.x] = vec2(c, d) skip_if_no_wgpu() out_arrays = {1: ctypes.c_float * 20} out = compute_with_buffers({}, out_arrays, compute_shader, n=10) res = list(out[1]) assert res[0::2] == [200 + i + 1 for i in range(10)] assert res[1::2] == [100 + 3 for i in range(10)]
def test_compute_1_3(): @python2shader def compute_shader( index: ("input", "GlobalInvocationId", ivec3), in1: ("buffer", 0, Array(i32)), out1: ("buffer", 1, Array(i32)), out2: ("buffer", 2, Array(i32)), ): i = index.x out1[i] = in1[i] out2[i] = i # Create an array of 100 random int32 in1 = [int(random.uniform(0, 100)) for i in range(100)] in1 = (c_int32 * 100)(*in1) outspecs = {0: 100 * c_int32, 1: 100 * c_int32, 2: 100 * c_int32} out = compute_with_buffers({0: in1}, outspecs, compute_shader) assert isinstance(out, dict) and len(out) == 3 assert isinstance(out[0], ctypes.Array) assert isinstance(out[1], ctypes.Array) assert isinstance(out[2], ctypes.Array) assert iters_equal(out[0], in1) # because it's the same buffer assert iters_equal(out[1], in1) # because the shader copied the data assert iters_equal(out[2], range(100)) # because this is the index
def test_cast_vec_ivec3_vec3(): return # raise pytest.skip(msg="Cannot do vec3 storage buffers") # Exception: SpirV invalid: # error: line 23: Structure id 10 decorated as BufferBlock for # variable in Uniform storage class must follow standard storage # buffer layout rules: member 0 contains an array with stride 12 # not satisfying alignment to 16 @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(ivec3)), data2: ("buffer", 1, Array(vec3)), ): i = index.x data2[i] = vec3(data1[i]) skip_if_no_wgpu() # vec3's are padded to 16 bytes! I guess it's a "feature" # https://stackoverflow.com/questions/38172696 # ... so now I updated my driver and then it works ... sigh values1 = [-999999, -100, -4, 1, 4, 100, 32767, 32760, 999999] values2 = [-999999, -100, -4, 0, 4, 100, 32767, 0, 999999] inp_arrays = {0: (ctypes.c_int32 * len(values1))(*values1)} out_arrays = {1: ctypes.c_float * len(values1)} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=3) it_works = iters_equal(out[1], values1) it_fails = iters_equal(out[1], values2) assert it_works or it_fails # ah well ...
def test_abs(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(f32)), data2: ("buffer", 1, Array(i32)), data3: ("buffer", 2, Array(vec2)), ): index = index_xyz.x v1 = abs(data1[index]) # float v2 = abs(data2[index]) # int data3[index] = vec2(f32(v1), v2) skip_if_no_wgpu() values1 = [random.uniform(-2, 2) for i in range(10)] values2 = [random.randint(-100, 100) for i in range(10)] inp_arrays = { 0: (ctypes.c_float * 10)(*values1), 1: (ctypes.c_int * 10)(*values2) } out_arrays = {2: ctypes.c_float * 20} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=10) res = list(out[2]) assert iters_close(res[0::2], [abs(v) for v in values1]) assert res[1::2] == [abs(v) for v in values2]
def test_add_sub3(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(f32)), data2: ("buffer", 1, Array(vec2)), ): index = index_xyz.x a = data1[index] a -= -1.0 b = vec2(a, a) b += 2.0 data2[index] = b skip_if_no_wgpu() values1 = [i - 5 for i in range(10)] inp_arrays = {0: (ctypes.c_float * 10)(*values1)} out_arrays = {1: ctypes.c_float * 20} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) res = list(out[1]) assert res[0::2] == [i + 3 for i in values1] assert res[1::2] == [i + 3 for i in values1]
def test_mul_modulo(): # There are two module functions, one in which the result takes the sign # of the divisor and one in which it takes the sign of the divident. # In Python these are `%` and math.fmod respectively. Here we test that # the SpirV code matches that (fmod and frem). @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(vec2)), data2: ("buffer", 1, Array(vec2)), ): index = index_xyz.x a = data1[index] data2[index] = vec2(a.x % a.y, math.fmod(a.x, a.y)) skip_if_no_wgpu() values1 = [i - 5 for i in range(10)] values2 = [-2 if i % 2 else 2 for i in range(10)] values = sum(zip(values1, values2), ()) inp_arrays = {0: (ctypes.c_float * 20)(*values)} out_arrays = {1: ctypes.c_float * 20} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) res = list(out[1]) assert res[0::2] == [i % j for i, j in zip(values1, values2)] assert res[1::2] == [math.fmod(i, j) for i, j in zip(values1, values2)]
def test_copy_vec3(): return # raise pytest.skip(msg="Cannot do vec3 storage buffers") @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(vec3)), data2: ("buffer", 1, Array(vec3)), data3: ("buffer", 2, Array(ivec3)), ): i = index.x data2[i] = data1[i].xyz data3[i] = ivec3(i, i, i) # # Equivalent shader in GLSL # compute_shader = pyshader.dev.glsl2spirv(""" # #version 450 # layout(std430 , set=0, binding=0) buffer Foo1 { vec3[] data1; }; # layout(std430 , set=0, binding=1) buffer Foo2 { vec3[] data2; }; # layout(std430 , set=0, binding=2) buffer Foo3 { ivec3[] data3; }; # # void main() { # uint index = gl_GlobalInvocationID.x; # data2[index] = data1[index]; # data3[index] = ivec3(index, index, index); # } # """, "compute") skip_if_no_wgpu() inp_arrays = {0: (ctypes.c_float * 60)(*range(60))} out_arrays = {1: ctypes.c_float * 60, 2: ctypes.c_int32 * 60} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=20) # NOPE, buffers alignments are rounded to vec4 ... # https://stackoverflow.com/questions/38172696/ # assert iters_equal(out[1], range(60)) assert iters_equal(out[1][0::4], range(0, 60, 4)) assert iters_equal(out[1][1::4], range(1, 60, 4)) assert iters_equal(out[1][2::4], range(2, 60, 4)) # Depending on your driver, this might or might not work align_ok = iters_equal(out[1][3::4], range(3, 60, 4)) align_fail = iters_equal(out[1][3::4], [0 for i in range(3, 60, 4)]) assert align_ok or align_fail if align_ok: assert iters_equal(out[2][0::3], range(20)) assert iters_equal(out[2][1::3], range(20)) assert iters_equal(out[2][2::3], range(20)) if align_fail: assert iters_equal(out[2][0::4], range(15)) assert iters_equal(out[2][1::4], range(15)) assert iters_equal(out[2][2::4], range(15)) assert iters_equal(out[2][3::4], [0 for i in range(15)])
def test_compute_0_1_ctype(): @python2shader def compute_shader( index: ("input", "GlobalInvocationId", ivec3), out: ("buffer", 0, Array(i32)), ): out[index.x] = index.x # Create some ints! out = compute_with_buffers({}, {0: c_int32 * 100}, compute_shader) assert isinstance(out, dict) and len(out) == 1 assert isinstance(out[0], ctypes.Array) assert iters_equal(out[0], range(100)) # Same, but specify in bytes out = compute_with_buffers({}, {0: c_ubyte * 80}, compute_shader, n=20) assert isinstance(out, dict) and len(out) == 1 assert isinstance(out[0], ctypes.Array) out0 = (c_int32 * 20).from_buffer(out[0]) # cast (a view in np) assert iters_equal(out0, range(20))
def test_compute_0_1_int(): @python2shader def compute_shader( index: ("input", "GlobalInvocationId", ivec3), out: ("buffer", 0, Array(i32)), ): out[index.x] = index.x out = compute_with_buffers({}, {0: 400}, compute_shader) assert isinstance(out, dict) and len(out) == 1 assert isinstance(out[0], memoryview) assert out[0].cast("i").tolist() == list(range(100))
def test_array3(): @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data2: ("buffer", 0, Array(i32)), ): i = index.x data2[i] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9][i] * 2 skip_if_no_wgpu() out = compute_with_buffers({}, {0: (10, "i")}, compute_shader, n=10) assert list(out[0]) == list(range(0, 20, 2))
def test_index(): @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data2: ("buffer", 1, Array(i32)), ): data2[index.x] = index.x skip_if_no_wgpu() inp_arrays = {} out_arrays = {1: ctypes.c_int32 * 20} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) assert iters_equal(out[1], range(20))
def test_cast_f32_f64(): @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(f32)), data2: ("buffer", 1, Array(f64)), ): i = index.x data2[i] = f64(data1[i]) skip_if_no_wgpu() inp_arrays = {0: (ctypes.c_float * 20)(*range(20))} out_arrays = {1: ctypes.c_double * 20} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) assert iters_equal(out[1], range(20))
def test_cast_vec_any_vec4(): # Look how all args in a vector are converted :) @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data2: ("buffer", 1, Array(vec4)), ): data2[index.x] = vec4(7.0, 3, ivec2(False, 2.7)) skip_if_no_wgpu() values2 = [7.0, 3.0, 0.0, 2.0] * 2 inp_arrays = {} out_arrays = {1: ctypes.c_float * len(values2)} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=2) assert iters_equal(out[1], values2)
def test_cast_u8_f32(): @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(u8)), data2: ("buffer", 1, Array(f32)), ): i = index.x data2[i] = f32(data1[i]) skip_if_no_wgpu() values1 = [0, 1, 4, 127, 128, 255] inp_arrays = {0: (ctypes.c_ubyte * len(values1))(*values1)} out_arrays = {1: ctypes.c_float * len(values1)} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) assert iters_equal(out[1], values1)
def test_cast_i32_f32(): @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", "ivec3"), data1: ("buffer", 0, "Array(i32)"), data2: ("buffer", 1, "Array(f32)"), ): i = index.x data2[i] = f32(data1[i]) skip_if_no_wgpu() values1 = [-999999, -100, -4, 0, 4, 100, 32767, 32768, 999999] inp_arrays = {0: (ctypes.c_int32 * len(values1))(*values1)} out_arrays = {1: ctypes.c_float * len(values1)} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) assert iters_equal(out[1], values1)
def test_tuple_unpacking1(): @python2shader_and_validate_nochecks def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data2: ("buffer", 1, "Array(vec2)"), ): i = f32(index.x) a, b = 1.0, 2.0 # Cover Python storing this as a tuple const c, d = a + i, b + 1.0 data2[index.x] = vec2(c, d) skip_if_no_wgpu() out_arrays = {1: ctypes.c_float * 20} out = compute_with_buffers({}, out_arrays, compute_shader, n=10) res = list(out[1]) assert res[0::2] == [i + 1 for i in range(10)] assert res[1::2] == [3 for i in range(10)]
def test_cast_i64_i16(): @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(i64)), data2: ("buffer", 1, Array(i16)), ): i = index.x data2[i] = i16(data1[i]) skip_if_no_wgpu() values1 = [-999999, -100, -4, 0, 4, 100, 32767, 32768, 999999] values2 = [-16959, -100, -4, 0, 4, 100, 32767, -32768, 16959] inp_arrays = {0: (ctypes.c_longlong * len(values1))(*values1)} out_arrays = {1: ctypes.c_short * len(values1)} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) assert iters_equal(out[1], values2)
def test_cast_vec_ivec2_vec2(): # This triggers the direct number-vector conversion @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(ivec2)), data2: ("buffer", 1, Array(vec2)), ): i = index.x data2[i] = vec2(data1[i]) skip_if_no_wgpu() values1 = [-999999, -100, -4, 1, 4, 100, 32767, 32760, 0, 999999] inp_arrays = {0: (ctypes.c_int32 * len(values1))(*values1)} out_arrays = {1: ctypes.c_float * len(values1)} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=5) assert iters_equal(out[1], values1)
def test_cast_i16_u8(): @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(i16)), data2: ("buffer", 1, Array(u8)), ): i = index.x data2[i] = u8(data1[i]) skip_if_no_wgpu() values1 = [-3, -2, -1, 0, 1, 2, 3, 127, 128, 255, 256, 300] values2 = [253, 254, 255, 0, 1, 2, 3, 127, 128, 255, 0, 44] inp_arrays = {0: (ctypes.c_short * len(values1))(*values1)} out_arrays = {1: ctypes.c_ubyte * len(values1)} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) assert iters_equal(out[1], values2)
def test_min_max_clamp(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(vec4)), data2: ("buffer", 1, Array(vec4)), data3: ("buffer", 2, Array(vec4)), ): index = index_xyz.x v = data1[index].x mi = data1[index].y ma = data1[index].z data2[index] = vec4(min(v, ma), max(v, mi), clamp(v, mi, ma), 0.0) data3[index] = vec4(nmin(v, ma), nmax(v, mi), nclamp(v, mi, ma), 0.0) skip_if_no_wgpu() the_vals = [-4, -3, -2, -1, +0, +0, +1, +2, +3, +4] min_vals = [-2, -5, -5, +2, +2, -1, +3, +1, +1, -6] max_vals = [+2, -1, -3, +3, +3, +1, +9, +9, +2, -3] stubs = [0] * 10 values = sum(zip(the_vals, min_vals, max_vals, stubs), ()) inp_arrays = {0: (ctypes.c_float * 40)(*values)} out_arrays = {1: ctypes.c_float * 40, 2: ctypes.c_float * 40} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=10) res1 = list(out[1]) res2 = list(out[2]) ref_min = [min(the_vals[i], max_vals[i]) for i in range(10)] ref_max = [max(the_vals[i], min_vals[i]) for i in range(10)] ref_clamp = [ min(max(min_vals[i], the_vals[i]), max_vals[i]) for i in range(10) ] # Test normal variant assert res1[0::4] == ref_min assert res1[1::4] == ref_max assert res1[2::4] == ref_clamp # Test NaN-safe variant assert res2[0::4] == ref_min assert res2[1::4] == ref_max assert res2[2::4] == ref_clamp
def test_math_constants(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data2: ("buffer", 1, Array(f32)), ): index = index_xyz.x if index % 2 == 0: data2[index] = math.pi else: data2[index] = math.e skip_if_no_wgpu() out = compute_with_buffers({}, {1: ctypes.c_float * 10}, compute_shader, n=10) res = list(out[1]) assert iters_close(res, [math.pi, math.e] * 5)
def test_copy_vec2(): @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(vec2)), data2: ("buffer", 1, Array(vec2)), data3: ("buffer", 2, Array(ivec2)), ): i = index.x data2[i] = data1[i].xy data3[i] = ivec2(i, i) skip_if_no_wgpu() inp_arrays = {0: (ctypes.c_float * 60)(*range(60))} out_arrays = {1: ctypes.c_float * 60, 2: ctypes.c_int32 * 60} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=30) assert iters_equal(out[1], range(60)) assert iters_equal(out[2][0::2], range(30)) assert iters_equal(out[2][1::2], range(30))
def test_cast_ivec2_bvec2(): # This triggers the per-element vector conversion @python2shader_and_validate def compute_shader( index: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(ivec2)), data2: ("buffer", 1, Array(ivec2)), ): i = index.x tmp = bvec2(data1[i]) data2[i] = ivec2(tmp) # ext visible storage cannot be bool skip_if_no_wgpu() values1 = [-999999, -100, 0, 1, 4, 100, 32767, 32760, 0, 999999] values2 = [True, True, False, True, True, True, True, True, False, True] inp_arrays = {0: (ctypes.c_int32 * len(values1))(*values1)} out_arrays = {1: ctypes.c_int32 * len(values1)} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=5) assert iters_equal(out[1], values2)
def test_length(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(vec2)), data2: ("buffer", 1, Array(f32)), ): index = index_xyz.x data2[index] = length(data1[index]) skip_if_no_wgpu() values1 = [random.uniform(-2, 2) for i in range(20)] inp_arrays = {0: (ctypes.c_float * 20)(*values1)} out_arrays = {1: ctypes.c_float * 10} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=10) res = list(out[1]) ref = [(values1[i * 2]**2 + values1[i * 2 + 1]**2)**0.5 for i in range(10)] assert iters_close(res, ref)
def test_mul_dot(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(f32)), data2: ("buffer", 1, Array(f32)), ): index = index_xyz.x a = vec2(data1[index], data1[index]) data2[index] = a @ a skip_if_no_wgpu() values1 = [i - 5 for i in range(10)] inp_arrays = {0: (ctypes.c_float * 10)(*values1)} out_arrays = {1: ctypes.c_float * 10} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) res = list(out[1]) assert res == [i**2 * 2 for i in values1]
def test_mul_div2(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(f32)), data2: ("buffer", 1, Array(vec2)), ): index = index_xyz.x a = data1[index] data2[index] = 2.0 * vec2(a * 2.0, a / 2.0) * 3.0 skip_if_no_wgpu() values1 = [i - 5 for i in range(10)] inp_arrays = {0: (ctypes.c_float * 10)(*values1)} out_arrays = {1: ctypes.c_float * 20} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) res = list(out[1]) assert res[0::2] == [6 * i * 2 for i in values1] assert res[1::2] == [6 * i / 2 for i in values1]
def test_integer_div(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(i32)), data2: ("buffer", 1, Array(i32)), ): index = index_xyz.x a = data1[index] data2[index] = 12 // a skip_if_no_wgpu() values1 = [(i - 5) or 12 for i in range(10)] inp_arrays = {0: (ctypes.c_int * 10)(*values1)} out_arrays = {1: ctypes.c_int * 10} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader) # NOTE: the shader // truncates, not floor like Python res = list(out[1]) assert res == [math.trunc(12 / i) for i in values1]
def test_normalize(): @python2shader_and_validate def compute_shader( index_xyz: ("input", "GlobalInvocationId", ivec3), data1: ("buffer", 0, Array(f32)), data2: ("buffer", 1, Array(vec2)), ): index = index_xyz.x v = data1[index] data2[index] = normalize(vec2(v, v)) skip_if_no_wgpu() values1 = [i - 5 for i in range(10)] inp_arrays = {0: (ctypes.c_float * 10)(*values1)} out_arrays = {1: ctypes.c_float * 20} out = compute_with_buffers(inp_arrays, out_arrays, compute_shader, n=10) res = list(out[1]) assert iters_close(res[:10], [-(2**0.5) / 2 for i in range(10)]) assert iters_close(res[-8:], [+(2**0.5) / 2 for i in range(8)]) assert math.isnan(res[10]) and math.isnan( res[11]) # or can this also be inf?