def convert_rgba_to_r(op: ConvertRGBAtoR) -> List[Kernel]: x = op.inputs["x0"] y = op.outputs["y"] assert ChannelMode.get(x) == ChannelModeEnum.RGBA assert ChannelMode.get(y) == ChannelModeEnum.R assert x.order == y.order # noinspection PyUnresolvedReferences inv_x_shape = [ np.double(1) / np.double(v) for v in texture_shape(x)[:2][::-1] ] code = KernelCode([ """ void main() { ivec3 texture_position_x = """, convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], texture_shape(x), texture_stride(x)), """; vec2 texture_coord_x = (vec2(texture_position_x.yx) + 0.5) * """, vec2(inv_x_shape), """; vec4 x = texture2D(""", x, """, texture_coord_x); gl_FragColor.r = texture_position_x.z == 0 ? x.r : texture_position_x.z == 1 ? x.g : texture_position_x.z == 2 ? x.b : x.a; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def depth2space(op: Depth2Space) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] C2 = y.shape_dict[Axis.C] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c2 = variable_position_y.w; int h1 = h2 / {r}; int w1 = w2 / {r}; int c1 = c2 + (w2-w1*{r})*{C2} + (h2-h1*{r})*{C2}*{r}; gl_FragColor.r = """, texel_fetch(x, change_order("vec4(n, h1, w1, c1)", OrderNHWC, x.order)), """.r; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def col2im(op: Col2Im) -> List[Kernel]: col = op.inputs["col"] im = op.outputs["im"] assert col.order.check_same_axes( Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C])) assert col.order.axes_dict[Axis.KH] + 2 == col.order.axes_dict[ Axis.KW] + 1 == col.order.axes_dict[Axis.C] == 5 assert im.order.check_same_axes(OrderNHWC) assert ChannelMode.get(col) == ChannelModeEnum.R assert ChannelMode.get(im) == ChannelModeEnum.R col_shape = col.shape[0:3] + (mul(col.shape[3:6]), ) col_stride = [mul(col_shape[i + 1:]) for i in range(len(col_shape))] col_order = Order(col.order.axes[0:3] + (Axis.C, )) code = KernelCode([ """ void main() { ivec4 variable_position_im = """, change_order(get_output_position(im), im.order, OrderNHWC), f"""; int n = variable_position_im.x; int h1 = variable_position_im.y; int w1 = variable_position_im.z; int c1 = variable_position_im.w; float sum = 0.0; for (int kh = 0; kh < {op.KH}; kh++) {{ int h2 = (h1 + {op.PH} - kh) / {op.SH}; if (mod(h1 + {op.PH} - kh, {op.SH}) != 0 || h2 < 0 || h2 >= {col.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w2 = (w1 + {op.PW} - kw) / {op.SW}; if (mod(w1 + {op.PW} - kw, {op.SW}) != 0 || w2 < 0 || w2 >= {col.shape_dict[Axis.W]}) continue; int khkwc1 = (kh * {op.KW} + kw) * {im.shape_dict[Axis.C]} + c1; sum += texture2D(""", col, ",", convert_coord( change_order("vec4(n, h2, w2, khkwc1)", OrderNHWC, col_order), col_shape, col_stride, texture_shape(col)[:2][::-1], texture_stride(col)[:2][::-1]), """).r; } } gl_FragColor.r = sum; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, im)]
def average_pooling_2d(op: AveragePooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R if op.parameters["divide_without_padding"]: divider_init = "float divider = 1e-8;" divider_add = "divider += 1.0;" divider_get = "divider" else: divider_init = "" divider_add = "" divider_get = str(float(op.ksize[0] * op.ksize[1])) code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float sum = 0.0; {divider_init} for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 * {op.SH} - {op.PH} + kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 * {op.SW} - {op.PW} + kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]}) continue; sum += """, texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), f""".r; {divider_add} }} }} gl_FragColor.r = sum / {divider_get}; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def split_axis(op: SplitAxis) -> List[Kernel]: x = op.inputs["x"] ys = [op.outputs[f"y{i}"] for i in range(len(op.outputs))] sections = [0] + op.sections axis = op.axis kernels = [] for i, y in enumerate(ys): assert x.order.check_same_axes(y.order) assert ChannelMode.get(x) == ChannelMode.get(y) == ChannelModeEnum.R if x.ndim > 4: # simplify orders orders, shape_dicts = simplify_orders([x, y], keep_axes=[axis]) shapes = { v: [shape_dicts[v][a] for a in order.axes] for v, order in orders.items() } strides = { v: [mul(shapes[v][i + 1:]) for i in range(order.ndim)] for v, order in orders.items() } else: orders = {y: y.order, x: x.order} shapes = {y: y.shape, x: x.shape} strides = {y: y.stride, x: x.stride} code = KernelCode([ f""" void main() {{ """, Type.Ivec.get_name(shapes[x]), f""" variable_position_x = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x]), f"""; variable_position_x[{orders[x].axes_dict[axis]}] += {sections[i]}; gl_FragColor.r = texture2D(""", x, ",", convert_coord("variable_position_x", shapes[x], strides[x], texture_shape(x)[:2][::-1], texture_stride(x)[:2][::-1]), f""").r; }} """ ], name=op.__class__.__name__) source = code.generate() kernels.append( Kernel(source, code.name, code.samplers, code.uniforms, y)) return kernels
def concat(op: Concat) -> List[Kernel]: assert len(op.inputs) == 2 x0 = op.inputs["x0"] x1 = op.inputs["x1"] y = op.outputs["y"] axis = op.axis assert x0.order.check_same_axes(y.order) assert x1.order.check_same_axes(y.order) assert ChannelMode.get(x0) == ChannelMode.get(x1) == ChannelMode.get(y) if x0.ndim > 4 or x1.ndim > 4: # simplify orders orders, shape_dicts = simplify_orders([x0, x1, y], keep_axes=[axis]) shapes = {v: [shape_dicts[v][a] for a in order.axes] for v, order in orders.items()} strides = {v: [mul(shapes[v][i + 1:]) for i in range(order.ndim)] for v, order in orders.items()} else: orders = {y: y.order, x0: x0.order, x1: x1.order} shape_dicts = {y: y.shape_dict, x0: x0.shape_dict, x1: x1.shape_dict} shapes = {y: y.shape, x0: x0.shape, x1: x1.shape} strides = {y: y.stride, x0: x0.stride, x1: x1.stride} code = KernelCode([f""" void main() {{ """, Type.Ivec.get_name(shapes[x0]), f""" variable_position_x0 = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x0] ), f"""; """, Type.Ivec.get_name(shapes[x1]), f""" variable_position_x1 = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x1] ), f"""; variable_position_x1[{orders[x1].axes_dict[axis]}] -= {x0.shape_dict[axis]}; gl_FragColor.r = ( (variable_position_x0[{orders[x0].axes_dict[axis]}] >= {shape_dicts[x0][axis]}) ? texture2D(""", x1, ",", convert_coord("variable_position_x1", shapes[x1], strides[x1], texture_shape(x1)[:2][::-1], texture_stride(x1)[:2][::-1]), f""") : texture2D(""", x0, ",", convert_coord("variable_position_x0", shapes[x0], strides[x0], texture_shape(x0)[:2][::-1], texture_stride(x0)[:2][::-1]), f""") ).r; }} """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def _generate_template(op: Reduce, reduction_size: int, shapes: Dict[Variable, Sequence[int]], strides: Dict[Variable, Sequence[int]]): x = op.inputs["x"] y = op.outputs["y"] params = [] for key, callable in _registered_items[op.__class__].parameters.items(): value = callable(op) params.append(GlobalDeclarationNode(VType.Float if isinstance(value, float) else VType.Int, key, value=value, with_value=True)) return KernelCode([f""" void main() {{ ivec4 variable_position_y = """, convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), f"""; ivec4 variable_position_x = mod(variable_position_y, """, ivec(shapes[x]), f"""); const int n_x = {reduction_size}; float y; """, params, f""" """, _registered_items[op.__class__].pre_reduction_snippet, f""" for (int i_x = 0; i_x < {reduction_size}; i_x++) {{ variable_position_x.w = i_x; float x = texture2D(""", x, ", ", convert_coord(f"variable_position_x", shapes[x], strides[x], texture_shape(x)[:2][::-1], texture_stride(x)[:2][::-1]), f""").r; {{ """, _registered_items[op.__class__].body_snippet, f""" }} }} """, _registered_items[op.__class__].post_reduction_snippet, f""" gl_FragColor.r = y; }} """], name=op.__class__.__name__)
def _generate_template_no_convert_position(op: Elementwise): load_nodes = [] for k, v in op.inputs.items(): load_nodes += [ f"float {k} = texture2D(", v, ", gl_FragCoord.xy / ", vec2(texture_shape(v)[:2][::-1]), ").r;\n" ] for key, callable in _registered_items[op.__class__].parameters.items(): value = callable(op) load_nodes.append( GlobalDeclarationNode( VType.Float if isinstance(value, float) else VType.Int, key, value=value, with_value=True)) return KernelCode([ """ void main() { float y; """, load_nodes, _registered_items[op.__class__].code, """ gl_FragColor = vec4(y, 0, 0, 0); } """ ], name=op.__class__.__name__)
def average_pooling_2d(op: Unpooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) code = KernelCode([ f""" void main() {{ ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float sum = 0.0; for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 + {op.PH} - kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]} * {op.SH}) continue; if (mod(h1, {op.SH}) != 0) continue; h1 /= {op.SH}; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 + {op.PW} - kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]} * {op.SW}) continue; if (mod(w1, {op.SW}) != 0) continue; w1 /= {op.SW}; sum += """, texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), f""".r; }} }} gl_FragColor.r = sum; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def max_pooling_2d(op: MaxPooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float v = -1e5; for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 * {op.SH} - {op.PH} + kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 * {op.SW} - {op.PW} + kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]}) continue; v = max(""", texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), """.r, v); } } gl_FragColor.r = v; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def _generate_template_convert_position(op: Elementwise, shapes: Dict[Variable, Sequence[int]], strides: Dict[Variable, Sequence[int]]): load_nodes = [] y = op.outputs["y"] for k, v in op.inputs.items(): if shapes[v] == shapes[y]: load_nodes += [ f"float {k} = texture2D(", v, ", ", convert_coord(f"variable_position_y", shapes[v], strides[v], texture_shape(v)[:2][::-1], texture_stride(v)[:2][::-1]), ").r;\n" ] else: load_nodes += [ f"ivec4 variable_position_{k} = mod(variable_position_y, ", ivec(shapes[v]), f");\n" ] load_nodes += [ f"float {k} = texture2D(", v, ", ", convert_coord(f"variable_position_{k}", shapes[v], strides[v], texture_shape(v)[:2][::-1], texture_stride(v)[:2][::-1]), ").r;\n" ] for key, callable in _registered_items[op.__class__].parameters.items(): value = callable(op) load_nodes.append( GlobalDeclarationNode( VType.Float if isinstance(value, float) else VType.Int, key, value=value, with_value=True)) return KernelCode([ """ void main() { float y; ivec4 variable_position_y = """, convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), """; """, load_nodes, _registered_items[op.__class__].code, """ gl_FragColor = vec4(y, 0, 0, 0); } """ ], name=op.__class__.__name__)
def space2depth(op: Space2Depth) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] C1 = x.shape_dict[Axis.C] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([""" void main() { ivec4 variable_position_y = """, get_output_position(y), f"""; int n = variable_position_y[{y.order.axes_dict[Axis.N]}]; int h2 = variable_position_y[{y.order.axes_dict[Axis.H]}]; int w2 = variable_position_y[{y.order.axes_dict[Axis.W]}]; int c2 = variable_position_y[{y.order.axes_dict[Axis.C]}]; int c1 = mod(c2, {C1}); int h1 = h2 * {r} + c2 / {C1} / {r}; int w1 = w2 * {r} + mod(c2 / {C1}, {r}); ivec4 variable_position_x; variable_position_x[{x.order.axes_dict[Axis.N]}] = n; variable_position_x[{x.order.axes_dict[Axis.H]}] = h1; variable_position_x[{x.order.axes_dict[Axis.W]}] = w1; variable_position_x[{x.order.axes_dict[Axis.C]}] = c1; gl_FragColor.r = """, texel_fetch(x, "variable_position_x"), """.r; } """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def reshape(op: Tile) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] # y -{broadcast}-> x_position_in_y_order -{change_order}-> x code = KernelCode([f""" void main() {{ gl_FragColor.r = """, texel_fetch(x, change_order( ExpressionNode(["mod(", get_output_position(y), ", ", ivec([x.shape_dict[a] for a in y.order.axes]), ")"]), y.order, x.order )), f""".r; }} """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def reinterpret_axis(op: ReinterpretAxis) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] y_axes_order_in_x_order = Order( [op.out_order.axes[op.in_order.axes_dict[a]] for a in x.order.axes]) # FIXME: optimize code = KernelCode([ f""" void main() {{ gl_FragColor.r = """, texel_fetch( x, change_order(get_output_position(y), y.order, y_axes_order_in_x_order)), f""".r; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def convert_r_to_rgba(op: ConvertRtoRGBA) -> List[Kernel]: x = op.inputs["x0"] y = op.outputs["y"] assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.RGBA assert x.order == y.order shape_x = texture_shape(x) stride_x = texture_stride(x) shape_y = texture_shape(y) stride_y = texture_stride(y) code = KernelCode([ """ void main() { float y0 = texture2D(""", x, ", ", convert_coord("ivec3(gl_FragCoord.y, gl_FragCoord.x, 0)", shape_y, stride_y, shape_x, stride_x), """.yx).r; float y1 = texture2D(""", x, ", ", convert_coord("ivec3(gl_FragCoord.y, gl_FragCoord.x, 1)", shape_y, stride_y, shape_x, stride_x), """.yx).r; float y2 = texture2D(""", x, ", ", convert_coord("ivec3(gl_FragCoord.y, gl_FragCoord.x, 2)", shape_y, stride_y, shape_x, stride_x), """.yx).r; float y3 = texture2D(""", x, ", ", convert_coord("ivec3(gl_FragCoord.y, gl_FragCoord.x, 3)", shape_y, stride_y, shape_x, stride_x), """.yx).r; gl_FragColor = vec4(y0, y1, y2, y3); } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def concat(op: Concat) -> List[Kernel]: xs = [op.inputs[f"x{i}"] for i in range(len(op.inputs) - 1)] workspace = op.inputs["workspace"] y = op.outputs["y"] axis = op.axis kernels = [] # noinspection PyUnresolvedReferences inv_texture_shape_y = [ float(np.double(1.0) / np.double(v)) for v in texture_shape(y)[:2][::-1] ] # noinspection PyUnresolvedReferences inv_texture_shape_workspace = [ float(np.double(1.0) / np.double(v)) for v in texture_shape(workspace)[:2][::-1] ] sections = [0] for x in xs: sections.append(sections[-1] + x.shape_dict[axis]) for i, x in enumerate(xs): assert x.order.check_same_axes(y.order) assert ChannelMode.get(x) == ChannelMode.get(y) if x.ndim > 4: # simplify orders orders, shape_dicts = simplify_orders([x, y], keep_axes=[axis]) shapes = { v: [shape_dicts[v][a] for a in order.axes] for v, order in orders.items() } strides = { v: [mul(shapes[v][i + 1:]) for i in range(order.ndim)] for v, order in orders.items() } else: orders = {y: y.order, x: x.order} shape_dicts = {y: y.shape_dict, x: x.shape_dict} shapes = {y: y.shape, x: x.shape} strides = {y: y.stride, x: x.stride} # copy xs[i] or workspace's value into y code1 = KernelCode([ f""" void main() {{ """, Type.Ivec.get_name(shapes[x]), f""" variable_position_x = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x]), f"""; variable_position_x[{orders[x].axes_dict[axis]}] -= {sections[i]}; gl_FragColor.r = ( variable_position_x[{orders[x].axes_dict[axis]}] < 0 || variable_position_x[{orders[x].axes_dict[axis]}] >= {shape_dicts[x][axis]} ) ? texture2D(""", workspace, """, gl_FragCoord.xy * """, inv_texture_shape_workspace, """).r : texture2D(""", x, ",", convert_coord("variable_position_x", shapes[x], strides[x], texture_shape(x)[:2][::-1], texture_stride(x)[:2][::-1]), f""").r; }} """ ], name="Concat_copy_to_y") # copy y's value into workspace code2 = KernelCode([ """ void main() { gl_FragColor = texture2D(""", y, """, gl_FragCoord.xy * """, inv_texture_shape_y, """); } """ ], name="Concat_escape_to_ws") source1 = code1.generate() source2 = code2.generate() kernels += [ Kernel(source1, code1.name, code1.samplers, code1.uniforms, y), Kernel(source2, code2.name, code2.samplers, code2.uniforms, workspace) ] return kernels
def reshape(op: Reshape) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] in_order = op.parameters["in_order"] out_order = op.parameters["out_order"] dummy_y = Variable(y.shape, y.order).change_order(out_order) orders_y_dy, shapes_y_dy = simplify_orders([y, dummy_y]) if orders_y_dy[y] == orders_y_dy[dummy_y]: order = Order([None] * 4) shape = factorize(y.size) stride = [mul(shape[i + 1:]) for i in range(4)] dummy_y = Variable(y.shape, y.order) shapes_y_dy = {y: shape, dummy_y: shape} strides_y_dy = {y: stride, dummy_y: stride} orders_y_dy = {y: order, dummy_y: order} else: shapes_y_dy = {v: [shapes_y_dy[v][a] for a in orders_y_dy[v].axes] for v in [y, dummy_y]} strides_y_dy = {v: [mul(shapes_y_dy[v][i + 1:]) for i in range(orders_y_dy[v].ndim)] for v in [y, dummy_y]} dummy_x = Variable(x.shape, x.order).change_order(in_order) orders_x_dx, shapes_x_dx = simplify_orders([x, dummy_x]) if orders_x_dx[x] == orders_x_dx[dummy_x]: order = Order([None] * 4) shape = factorize(x.size) stride = [mul(shape[i + 1:]) for i in range(4)] dummy_x = Variable(x.shape, x.order) shapes_x_dx = {x: shape, dummy_x: shape} strides_x_dx = {x: stride, dummy_x: stride} orders_x_dx = {x: order, dummy_x: order} else: shapes_x_dx = {v: [shapes_x_dx[v][a] for a in orders_x_dx[v].axes] for v in [x, dummy_x]} strides_x_dx = {v: [mul(shapes_x_dx[v][i + 1:]) for i in range(orders_x_dx[v].ndim)] for v in [x, dummy_x]} # FIXME: optimize # y -{change_order}-> dummy_y -{convert_position}-> dummy_x -{change_order}-> x code = KernelCode([f""" void main() {{ gl_FragColor.r = texture2D(""", x, """,""", convert_coord( change_order( convert_position( change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes_y_dy[y], strides_y_dy[y]), orders_y_dy[y], orders_y_dy[dummy_y] ), shapes_y_dy[dummy_y], strides_y_dy[dummy_y], shapes_x_dx[dummy_x], strides_x_dx[dummy_x] ), orders_x_dx[dummy_x], orders_x_dx[x] ), shapes_x_dx[x], strides_x_dx[x], texture_shape(x)[:2][::-1], texture_stride(x)[:2][::-1] ), f""").r; }} """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def slice_handler(op: Slice) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert ChannelMode.get(x) == ChannelMode.get(y) == ChannelModeEnum.R x_shape = [] x_stride = [] x_index_offset = 0 y_shape = [] y_stride = [] x_stride_dict = x.stride_dict y_shape_dict = y.shape_dict y_stride_dict = y.stride_dict x_axes = list(x.order.axes) # reduce number of axis flag_removed = False merge_target = None # type: Axis for axis in reversed(x.order.axes): if not isinstance(op.indices[axis], slice): flag_removed = False merge_target = None continue index = normalize_slice(op.indices[axis], x.shape_dict[axis]) if index.start != 0 or index.stop != x.shape_dict[ axis] or index.step != 1: flag_removed = False merge_target = None continue # This axis is not changed, so it can be simplified if flag_removed == True: del x_stride_dict[axis] x_axes.remove(axis) del y_stride_dict[axis] y_shape_dict[merge_target] *= y_shape_dict[axis] del y_shape_dict[axis] else: flag_removed = True merge_target = axis for axis in x_axes: if isinstance(op.indices[axis], slice): index = normalize_slice(op.indices[axis], x.shape_dict[axis]) x_shape.append(y_shape_dict[axis]) x_stride.append(x_stride_dict[axis] * index.step) x_index_offset += x_stride_dict[axis] * index.start y_shape.append(y_shape_dict[axis]) y_stride.append(y_stride_dict[axis]) elif isinstance(op.indices[axis], int): x_index_offset += x_stride_dict[axis] * op.indices[axis] if len(y_shape) == 1: y_shape.append(0) y_stride.append(1) x_stride.append(0) x_shape.append(0) code = KernelCode([ """ void main() { gl_FragColor.r = texture2D(""", x, ", (", convert_coord( ExpressionNode([ convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], y_shape, y_stride) ]), x_shape, x_stride, texture_shape(x)[:2], texture_stride(x)[:2], x_index_offset), """).yx).r; } """ ], name="Slice") source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def tensordot(op: Tensordot) -> List[Kernel]: A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] axes = op.axes assert ChannelMode.get(A) == ChannelMode.get(B) assert ChannelMode.get(C) == ChannelModeEnum.R # Reduced axes must be located on inside of input variables. assert A.order.axes[-len(axes[0]):] == axes[0] assert B.order.axes[-len(axes[1]):] == axes[1] # output variable's axes order must be as [*a_remained_axes, *b_remained_axes] assert C.order.axes[:A.ndim - len(axes[0])] == A.order.axes[:-len(axes[0])] assert C.order.axes[-(B.ndim - len(axes[1])):] == B.order.axes[:-len(axes[1])] assert C.ndim == A.ndim - len(axes[0]) + B.ndim - len(axes[1]) K = mul(A.shape[-len(axes[0]):]) M = A.size // K N = B.size // K if ChannelMode.get(A) == ChannelModeEnum.R: code = KernelCode([ f""" void main() {{ ivec2 variable_position_c = """, convert_position("gl_FragCoord.yx", texture_shape(C)[:2], texture_stride(C)[:2], [M, N], [N, 1]), f"""; int m = variable_position_c.x; int n = variable_position_c.y; float v = 0.0; for (int k = 0; k < {int(K)}; k++) {{ float v_a = texture2D(""", A, f""", (vec2(k, m) + 0.5) * """, vec([1.0 / K, 1.0 / M]), f""").r; float v_b = texture2D(""", B, f""", (vec2(k, n) + 0.5) * """, vec([1.0 / K, 1.0 / N]), f""").r; v += v_a * v_b; }} gl_FragColor.r = v; }} """ ], name="Tensordot_R") elif ChannelMode.get(A) == ChannelModeEnum.RGBA: code = KernelCode([ f""" void main() {{ ivec2 variable_position_c = """, convert_position("gl_FragCoord.yx", texture_shape(C)[:2], texture_stride(C)[:2], [M, N], [N, 1]), f"""; int m = variable_position_c.x; int n = variable_position_c.y; float v = 0.0; for (int k = 0; k < {int(K // 4)}; k++) {{ vec4 v_a = texture2D(""", A, f""", (vec2(k, m) + 0.5) * """, vec([1.0 / (K // 4), 1.0 / M]), f"""); vec4 v_b = texture2D(""", B, f""", (vec2(k, n) + 0.5) * """, vec([1.0 / (K // 4), 1.0 / N]), f"""); v += dot(v_a, v_b); }} gl_FragColor.r = v; }} """ ], name="Tensordot_RGBA") else: raise NotImplementedError source = code.generate() kernel = Kernel(source, code.name, code.samplers, code.uniforms, C) return [kernel]
def im2col(op: Im2Col) -> List[Kernel]: im = op.inputs["im"] col = op.outputs["col"] H1 = im.shape_dict[Axis.H] W1 = im.shape_dict[Axis.W] C1 = im.shape_dict[Axis.C] assert col.order.check_same_axes( Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C])) assert col.order.axes_dict[Axis.KH] + 2 == col.order.axes_dict[ Axis.KW] + 1 == col.order.axes_dict[Axis.C] == 5 assert im.order.check_same_axes(OrderNHWC) assert ChannelMode.get(im) == ChannelModeEnum.R col_shape = col.shape[0:3] + (mul(col.shape[3:6]), ) col_stride = [mul(col_shape[i + 1:]) for i in range(len(col_shape))] col_order = Order(col.order.axes[0:3] + (Axis.C, )) if ChannelMode.get(col) == ChannelModeEnum.R: code = KernelCode([ """ void main() { ivec4 variable_position_col = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(col)[:2], texture_stride(col)[:2], col_shape, col_stride), col_order, OrderNHWC), f"""; int n = variable_position_col.x; int h2 = variable_position_col.y; int w2 = variable_position_col.z; int khkwc1 = variable_position_col.w; int kh = khkwc1 / {C1} / {op.KW}; int kw = khkwc1 / {C1} - kh * {op.KW}; int c1 = khkwc1 - (kh * {op.KW} + kw) * {C1}; int h1 = h2 * {op.SH} - {op.PH} + kh * {op.DH}; int w1 = w2 * {op.SW} - {op.PW} + kw * {op.DW}; if (h1 < 0 || h1 >= {H1} || w1 < 0 || w1 >= {W1}) {{ gl_FragColor.r = 0.0; }} else {{ gl_FragColor.r = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1)", OrderNHWC, im.order)), f""".r; }} }} """ ], name="Im2Col_R") elif ChannelMode.get(col) == ChannelModeEnum.RGBA: code = KernelCode([ """ void main() { ivec4 variable_position_col = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(col)[:2], texture_stride(col)[:2], col_shape, col_stride), col_order, OrderNHWC), f"""; int n = variable_position_col.x; int h2 = variable_position_col.y; int w2 = variable_position_col.z; int khkwc1 = variable_position_col.w; int kh = khkwc1 / {C1} / {op.KW}; int kw = khkwc1 / {C1} - kh * {op.KW}; int c1 = khkwc1 - (kh * {op.KW} + kw) * {C1}; int h1 = h2 * {op.SH} - {op.PH} + kh * {op.DH}; int w1 = w2 * {op.SW} - {op.PW} + kw * {op.DW}; if (h1 < 0 || h1 >= {H1} || w1 < 0 || w1 >= {W1}) {{ gl_FragColor = vec4(0.0, 0.0, 0.0, 0.0); }} else {{ gl_FragColor.r = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1 + 0)", OrderNHWC, im.order)), f""".r; gl_FragColor.g = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1 + 1)", OrderNHWC, im.order)), f""".r; gl_FragColor.b = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1 + 2)", OrderNHWC, im.order)), f""".r; gl_FragColor.a = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1 + 3)", OrderNHWC, im.order)), f""".r; }} }} """ ], name="Im2Col_RGBA") else: raise NotImplementedError source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, col)]