def elementwise_add(op: Sgemm) -> List[Kernel]: A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] assert ChannelMode.get_mode(A) == ChannelMode.get_mode(B) name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "A": A, "B": B, "s_c": texture_stride(C), "d_C": [op.M, op.N], "s_C": [op.N, 1], "d_a": texture_shape(A), "s_a": texture_stride(A), "s_A": [op.K, 1] if op.transpose_A else [1, op.M], "d_b": texture_shape(B), "s_b": texture_stride(B), "s_B": [op.N, 1] if op.transpose_B else [1, op.K], "K": op.K }) source = generate_template(mode=ChannelMode.get_mode(A), K=op.K) source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, C) return [kernel]
def convert_rgba_to_r(op: ConvertRGBAtoR) -> List[Kernel]: x = op.inputs["x0"] y = op.outputs["y"] assert ChannelMode.get(x) == ChannelModeEnum.RGBA assert ChannelMode.get(y) == ChannelModeEnum.R assert x.order == y.order # noinspection PyUnresolvedReferences inv_x_shape = [ np.double(1) / np.double(v) for v in texture_shape(x)[:2][::-1] ] code = KernelCode([ """ void main() { ivec3 texture_position_x = """, convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], texture_shape(x), texture_stride(x)), """; vec2 texture_coord_x = (vec2(texture_position_x.yx) + 0.5) * """, vec2(inv_x_shape), """; vec4 x = texture2D(""", x, """, texture_coord_x); gl_FragColor.r = texture_position_x.z == 0 ? x.r : texture_position_x.z == 1 ? x.g : texture_position_x.z == 2 ? x.b : x.a; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def fold_constance(self): x = self.inputs["x0"] # type:ConstantVariable y = self.outputs["y"] # type:Variable self.remove_all() y.replace(x) ChannelMode.set(x, ChannelModeEnum.RGBA) x.change_order(y.order)
def depth2space(op: Depth2Space) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] C2 = y.shape_dict[Axis.C] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c2 = variable_position_y.w; int h1 = h2 / {r}; int w1 = w2 / {r}; int c1 = c2 + (w2-w1*{r})*{C2} + (h2-h1*{r})*{C2}*{r}; gl_FragColor.r = """, texel_fetch(x, change_order("vec4(n, h1, w1, c1)", OrderNHWC, x.order)), """.r; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def __call__(self, *xs: "variable.Variable"): y_axes = [] y_shape_dict = AxisKeyDict() # Check variable in descent order of the number of dimensions. # Without this procedure, in case that x0.order=C and x1.order=NC, the output order is CN. Expected result is NC. xs_order = [(i, x) for i, x in enumerate(xs)] xs_order.sort(key=lambda d: d[1].ndim, reverse=True) for i, x in xs_order: for axis in x.order.axes: if axis in y_axes: if y_shape_dict[axis] == 1: # broadcast y_shape_dict[axis] = x.shape_dict[axis] else: y_axes.append(axis) y_shape_dict[axis] = x.shape_dict[axis] if Placeholder.check_resolved(x.shape_dict[axis]): if Placeholder.check_resolved(y_shape_dict[axis]): assert y_shape_dict[axis] == x.shape_dict[axis] or x.shape_dict[axis] == 1, \ "All input variables of elementwise operator should be same shape: " \ f"y.shape_dict[{axis}]={y_shape_dict[axis]}, " \ f"x{i}.shape_dict[{axis}]={x.shape_dict[axis]}" else: y_shape_dict[axis] = x.shape_dict[axis] y = variable.Variable([y_shape_dict[axis] for axis in y_axes], Order(y_axes)) ChannelMode.set(y, ChannelModeEnum.R) for i, x in enumerate(xs): self.append_input(f"x{i}", x) self.append_output("y", y) return y,
def convert_rgba_to_r(op: ConvertRGBAtoR) -> List[Kernel]: x0 = op.inputs["x0"] y = op.outputs["y"] assert ChannelMode.get(x0) == ChannelModeEnum.RGBA assert ChannelMode.get(y) == ChannelModeEnum.R if x0.order != y.order: raise NotImplementedError name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "X0": x0, "s_y": texture_stride(y), "d_x0": texture_shape(x0), "s_x0": texture_stride(x0), }) source = template source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, y) return [kernel]
def optimize(self, graph: Graph): flag_changed = False """ before) v0[RGBA] -{ConvertRtoRGBA}- v1[RGBA] after) v0[RGBA] -{ConvertRGBAtoR}- v2[Order=v0.order][R] -{Transpose}- v3[Order=v1.order][R]-{ConvertRtoRGBA}- v1[RGBA] """ matches = traverse.search_sub_structure( graph, [Variable, ConvertRtoRGBA, Variable]) while len(matches) > 0: v0, r2rgba, v1 = matches.pop( ) # type: Variable, ConvertRtoRGBA, Variable if not (ChannelMode.get(v0) == ChannelMode.get(v1) == ChannelModeEnum.RGBA): continue flag_changed = True r2rgba.remove_all() v2 = convert_rgba_to_r(v0) v2.change_order(v0.order) v3 = v2.transpose(v1.order) v1_new = convert_r_to_rgba(v3) v1_new.change_order(v1.order) OptimizeRule.replace_variable(graph, v1_new, v1) """ before) v0[R] -{ConvertRGBAtoR}- v1[R] after) v0[R] -{Transpose}- v1[R] """ matches = traverse.search_sub_structure( graph, [Variable, ConvertRGBAtoR, Variable]) while len(matches) > 0: v0, rgba2r, v1 = matches.pop( ) # type: Variable, ConvertRGBAtoR, Variable if not (ChannelMode.get(v0) == ChannelMode.get(v1) == ChannelModeEnum.R): continue flag_changed = True rgba2r.remove_all() OptimizeRule.replace_variable(graph, v0.transpose(v1.order), v1) return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Convolution2D): # type: Convolution2D x = op.inputs["x"] w = op.inputs["w"] # type: ConstantVariable old_y = op.outputs["y"] flag_changed = True op.remove_all() assert x.order == OrderNHWC assert isinstance(w, ConstantVariable) assert old_y.order == OrderNHWC w.change_order(OrderNHWC) col, = Im2Col(None, ksize=op.ksize, stride=op.stride, padding=op.padding, dilation_rate=op.dilation_rate)(x) col.change_order(OrderNHWC) ChannelMode.set_mode(col, ChannelModeEnum.RGBA) M = col.shape_dict[Axis.N] * col.shape_dict[ Axis.H] * col.shape_dict[Axis.W] N = w.shape_dict[Axis.N] K = col.shape_dict[Axis.C] if K > (w.size // N): w2_data = np.hstack([ w.data.reshape(N, w.size // N), np.zeros([N, K - w.size // N]) ]) else: w2_data = w.data.reshape(N, w.size // N) w = ConstantVariable(w2_data, OrderNC) ChannelMode.set_mode(w, ChannelModeEnum.RGBA) sgemm = Sgemm(None, M=M, N=N, K=K, out_shape=[ col.shape_dict[Axis.N], col.shape_dict[Axis.H], col.shape_dict[Axis.W], w.shape_dict[Axis.N] ], out_order=OrderNHWC, transpose_A=True, transpose_B=False) new_y, = sgemm(col, w) sgemm.replace_output(new_y, old_y) return graph, flag_changed
def convert_r_to_rgba(op: ConvertRtoRGBA) -> List[Kernel]: x = op.inputs["x0"] y = op.outputs["y"] assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.RGBA orders, shape_dicts = simplify_orders([x, y]) shapes = {v: [shape_dicts[v][a] for a in orders[v].axes] for v in [x, y]} strides = { v: [mul(shapes[v][orders[v].axes_dict[a] + 1:]) for a in orders[v].axes] for v in [x, y] } stride_dicts = {v: AxisKeyDict(orders[v].axes, strides[v]) for v in [x, y]} # Change x's shapes and strides order to same as y's order shapes[x] = [ shape_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes ] strides[x] = [ stride_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes ] # Padding shapes and strides to 4D if orders[y].ndim > 4: raise NotImplementedError(f"Too large number of dimension: {y}") for v in [x, y]: shape = shapes[v] stride = strides[v] while len(shape) < 4: stride.append(1) shape.append(1) name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "sampler_x": x, "texture_stride_y": texture_stride(y), "variable_shape_y": shapes[y], "variable_stride_y": strides[y], "texture_shape_x": texture_shape(x), "texture_stride_x": texture_stride(x), "variable_shape_x": shapes[x], "variable_stride_x": strides[x], }) source = template source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, y) return [kernel]
def partial_im2col(op: PartialIm2Col) -> List[Kernel]: im = op.inputs["im"] cols = [op.outputs[f"col{i}"] for i in range(len(op.outputs))] sections = [0] + op.sections axis = op.axis kernels = [] for i, col in enumerate(cols): assert im.order == col.order == OrderNHWC assert ChannelMode.get(im) == ChannelModeEnum.R name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() offset = [sections[i] if a == axis else 0 for a in col.order.axes] uniform_injector.register({ "sampler_im": im, "texture_stride_col": texture_stride(col), "variable_shape_col": col.shape, "variable_stride_col": col.stride, "offset_col": offset, "texture_shape_im": texture_shape(im), "texture_stride_im": texture_stride(im), "variable_shape_im": im.shape, "variable_stride_im": im.stride, "C1": im.shape_dict[Axis.C], "H1": im.shape_dict[Axis.H], "W1": im.shape_dict[Axis.W], "KH": op.KH, "KW": op.KW, "DH": op.DH, "DW": op.DW, "SH": op.SH, "SW": op.SW, "PH": op.PH, "PW": op.PW, }) source = template_R if ChannelMode.get(col) == ChannelModeEnum.R else template_RGBA source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel( source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, col ) kernels.append(kernel) return kernels
def col2im(op: Col2Im) -> List[Kernel]: col = op.inputs["col"] im = op.outputs["im"] assert col.order.check_same_axes( Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C])) assert col.order.axes_dict[Axis.KH] + 2 == col.order.axes_dict[ Axis.KW] + 1 == col.order.axes_dict[Axis.C] == 5 assert im.order.check_same_axes(OrderNHWC) assert ChannelMode.get(col) == ChannelModeEnum.R assert ChannelMode.get(im) == ChannelModeEnum.R col_shape = col.shape[0:3] + (mul(col.shape[3:6]), ) col_stride = [mul(col_shape[i + 1:]) for i in range(len(col_shape))] col_order = Order(col.order.axes[0:3] + (Axis.C, )) code = KernelCode([ """ void main() { ivec4 variable_position_im = """, change_order(get_output_position(im), im.order, OrderNHWC), f"""; int n = variable_position_im.x; int h1 = variable_position_im.y; int w1 = variable_position_im.z; int c1 = variable_position_im.w; float sum = 0.0; for (int kh = 0; kh < {op.KH}; kh++) {{ int h2 = (h1 + {op.PH} - kh) / {op.SH}; if (mod(h1 + {op.PH} - kh, {op.SH}) != 0 || h2 < 0 || h2 >= {col.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w2 = (w1 + {op.PW} - kw) / {op.SW}; if (mod(w1 + {op.PW} - kw, {op.SW}) != 0 || w2 < 0 || w2 >= {col.shape_dict[Axis.W]}) continue; int khkwc1 = (kh * {op.KW} + kw) * {im.shape_dict[Axis.C]} + c1; sum += texture2D(""", col, ",", convert_coord( change_order("vec4(n, h2, w2, khkwc1)", OrderNHWC, col_order), col_shape, col_stride, texture_shape(col)[:2][::-1], texture_stride(col)[:2][::-1]), """).r; } } gl_FragColor.r = sum; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, im)]
def average_pooling_2d(op: AveragePooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R if op.parameters["divide_without_padding"]: divider_init = "float divider = 1e-8;" divider_add = "divider += 1.0;" divider_get = "divider" else: divider_init = "" divider_add = "" divider_get = str(float(op.ksize[0] * op.ksize[1])) code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float sum = 0.0; {divider_init} for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 * {op.SH} - {op.PH} + kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 * {op.SW} - {op.PW} + kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]}) continue; sum += """, texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), f""".r; {divider_add} }} }} gl_FragColor.r = sum / {divider_get}; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def get_output_position(output_variable: Variable): if ChannelMode.get(output_variable) == ChannelModeEnum.R: return convert_position("gl_FragCoord.yx", texture_shape(output_variable)[:2], texture_stride(output_variable)[:2], output_variable.shape, output_variable.stride) elif ChannelMode.get(output_variable) == ChannelModeEnum.RGBA: return convert_position("vec3(gl_FragCoord.y, gl_FragCoord.x, 0)", texture_shape(output_variable), texture_stride(output_variable), output_variable.shape, output_variable.stride)
def split_axis(op: SplitAxis) -> List[Kernel]: x = op.inputs["x"] ys = [op.outputs[f"y{i}"] for i in range(len(op.outputs))] sections = [0] + op.sections axis = op.axis kernels = [] for i, y in enumerate(ys): assert x.order.check_same_axes(y.order) assert ChannelMode.get(x) == ChannelMode.get(y) == ChannelModeEnum.R if x.ndim > 4: # simplify orders orders, shape_dicts = simplify_orders([x, y], keep_axes=[axis]) shapes = { v: [shape_dicts[v][a] for a in order.axes] for v, order in orders.items() } strides = { v: [mul(shapes[v][i + 1:]) for i in range(order.ndim)] for v, order in orders.items() } else: orders = {y: y.order, x: x.order} shapes = {y: y.shape, x: x.shape} strides = {y: y.stride, x: x.stride} code = KernelCode([ f""" void main() {{ """, Type.Ivec.get_name(shapes[x]), f""" variable_position_x = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x]), f"""; variable_position_x[{orders[x].axes_dict[axis]}] += {sections[i]}; gl_FragColor.r = texture2D(""", x, ",", convert_coord("variable_position_x", shapes[x], strides[x], texture_shape(x)[:2][::-1], texture_stride(x)[:2][::-1]), f""").r; }} """ ], name=op.__class__.__name__) source = code.generate() kernels.append( Kernel(source, code.name, code.samplers, code.uniforms, y)) return kernels
def concat(op: Concat) -> List[Kernel]: assert len(op.inputs) == 2 x0 = op.inputs["x0"] x1 = op.inputs["x1"] y = op.outputs["y"] axis = op.axis assert x0.order.check_same_axes(y.order) assert x1.order.check_same_axes(y.order) assert ChannelMode.get(x0) == ChannelMode.get(x1) == ChannelMode.get(y) if x0.ndim > 4 or x1.ndim > 4: # simplify orders orders, shape_dicts = simplify_orders([x0, x1, y], keep_axes=[axis]) shapes = {v: [shape_dicts[v][a] for a in order.axes] for v, order in orders.items()} strides = {v: [mul(shapes[v][i + 1:]) for i in range(order.ndim)] for v, order in orders.items()} else: orders = {y: y.order, x0: x0.order, x1: x1.order} shape_dicts = {y: y.shape_dict, x0: x0.shape_dict, x1: x1.shape_dict} shapes = {y: y.shape, x0: x0.shape, x1: x1.shape} strides = {y: y.stride, x0: x0.stride, x1: x1.stride} code = KernelCode([f""" void main() {{ """, Type.Ivec.get_name(shapes[x0]), f""" variable_position_x0 = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x0] ), f"""; """, Type.Ivec.get_name(shapes[x1]), f""" variable_position_x1 = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x1] ), f"""; variable_position_x1[{orders[x1].axes_dict[axis]}] -= {x0.shape_dict[axis]}; gl_FragColor.r = ( (variable_position_x0[{orders[x0].axes_dict[axis]}] >= {shape_dicts[x0][axis]}) ? texture2D(""", x1, ",", convert_coord("variable_position_x1", shapes[x1], strides[x1], texture_shape(x1)[:2][::-1], texture_stride(x1)[:2][::-1]), f""") : texture2D(""", x0, ",", convert_coord("variable_position_x0", shapes[x0], strides[x0], texture_shape(x0)[:2][::-1], texture_stride(x0)[:2][::-1]), f""") ).r; }} """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def __init__(self, width: IntLike, height: IntLike, channel_mode: ChannelModeEnum, begin: int = _T_UNKNOWN, end: int = _T_UNKNOWN, name: str = None): super(WebGLAllocation, self).__init__(size=width * height * ChannelMode.elements_per_pixel(channel_mode), offset=-1, begin=begin, end=end, name=name) self.width = width self.height = height self.channel_mode = channel_mode
def elementwise_add(op: Tanh) -> List[Kernel]: x0 = op.inputs["x0"] y = op.outputs["y"] shapes, strides = optimize_loop_structure([x0, y], y) name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "X0": x0, "s_y": texture_stride(y), "d_Y": shapes[y], "s_Y": strides[y], "d_x0": texture_shape(x0), "s_x0": texture_stride(x0), "d_X0": shapes[x0], "s_X0": strides[x0], }) source = template_R if ChannelMode.get( y) == ChannelModeEnum.R else template_RGBA source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, y) return [kernel]
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: global _rgba_support_operators flag_changed = False for node in traverse.listup_nodes(graph): if node.has_attribute(ChannelMode): continue if isinstance(node, ConvertRtoRGBA) or isinstance(node, ConvertRGBAtoR): continue flag_changed = True node.attributes.add(ChannelMode(node, ChannelModeEnum.R)) if isinstance(node, Operator): node.attributes.add(SupportedChannelMode(node, ChannelModeEnum.R)) if node.__class__ not in _rgba_support_operators: continue variables = list(node.inputs.values()) + list(node.outputs.values()) if not all(v.order == variables[0].order for v in variables): continue if not all(v.shape == variables[0].shape for v in variables): continue node.attributes.add(SupportedChannelMode(node, ChannelModeEnum.RGBA)) return graph, flag_changed
def split_axis(op: SplitAxis) -> List[Kernel]: x = op.inputs["x"] ys = [op.outputs[f"y{i}"] for i in range(len(op.outputs))] sections = [0] + op.sections axis = op.axis kernels = [] for i, y in enumerate(ys): assert x.order.check_same_axes(y.order) assert ChannelMode.get(x) == ChannelMode.get(y) == ChannelModeEnum.R name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() offset = [sections[i] if a == axis else 0 for a in y.order.axes] uniform_injector.register({ "sampler_x": x, "texture_stride_y": texture_stride(y), "variable_shape_y": _pad_to_4d(y.shape), "variable_stride_y": _pad_to_4d(y.stride), "texture_shape_x": texture_shape(x), "texture_stride_x": texture_stride(x), "variable_shape_x": _pad_to_4d([x.shape_dict[a] for a in y.order.axes]), "variable_stride_x": _pad_to_4d([x.stride_dict[a] for a in y.order.axes]), "offset": _pad_to_4d(offset, 0) }) source = template source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, y) kernels.append(kernel) return kernels
def col2im(op: Col2Im) -> List[Kernel]: col = op.inputs["col"] im = op.outputs["im"] assert col.order == OrderNHWC assert im.order == OrderNHWC assert ChannelMode.get(col) == ChannelModeEnum.R assert ChannelMode.get(im) == ChannelModeEnum.R name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "col": col, "s_im": texture_stride(im), "d_Im": im.shape, "s_Im": im.stride, "d_col": texture_shape(col), "s_col": texture_stride(col), "d_Col": col.shape, "s_Col": col.stride, "H2": col.shape_dict[Axis.H], "W2": col.shape_dict[Axis.W], "C1": im.shape_dict[Axis.C], "SH": op.SH, "SW": op.SW, "PH": op.PH, "PW": op.PW, }) source = generate_template(op) source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel( source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, im ) return [kernel]
def tensordot(op: Tensordot) -> List[Kernel]: A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] axes = op.axes assert ChannelMode.get(A) == ChannelMode.get(B) assert ChannelMode.get(C) == ChannelModeEnum.R # Reduced axes must be located on inside of input variables. assert A.order.axes[-len(axes[0]):] == axes[0] assert B.order.axes[-len(axes[1]):] == axes[1] # output variable's axes order must be as [*a_remained_axes, *b_remained_axes] assert C.order.axes[:A.ndim - len(axes[0])] == A.order.axes[:-len(axes[0])] assert C.order.axes[-(B.ndim - len(axes[1])):] == B.order.axes[:-len(axes[1])] assert C.ndim == A.ndim - len(axes[0]) + B.ndim - len(axes[1]) K = mul(A.shape[-len(axes[0]):]) M = A.size // K N = B.size // K name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "A": A, "B": B, "s_c": texture_stride(C), "d_C": [M, N], "s_C": [N, 1], "d_a": texture_shape(A), "d_b": texture_shape(B), "K": K }) source = generate_template(mode=ChannelMode.get(A), reduction_size=K) source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, C) return [kernel]
def max_pooling_2d(op: MaxPooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float v = -1e5; for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 * {op.SH} - {op.PH} + kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 * {op.SW} - {op.PW} + kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]}) continue; v = max(""", texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), """.r, v); } } gl_FragColor.r = v; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def im2col(op: Im2Col) -> List[Kernel]: im = op.inputs["im"] col = op.outputs["col"] assert im.order == OrderNHWC assert col.order == OrderNHWC assert ChannelMode.get(im) == ChannelModeEnum.R name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "im": im, "s_col": texture_stride(col), "d_Col": col.shape, "s_Col": col.stride, "d_im": texture_shape(im), "s_im": texture_stride(im), "d_Im": im.shape, "s_Im": im.stride, "C1": im.shape_dict[Axis.C], "H1": im.shape_dict[Axis.H], "W1": im.shape_dict[Axis.W], "KH": op.KH, "KW": op.KW, "DH": op.DH, "DW": op.DW, "SH": op.SH, "SW": op.SW, "PH": op.PH, "PW": op.PW, }) source = template_R if ChannelMode.get( col) == ChannelModeEnum.R else template_RGBA source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, col) return [kernel]
def space2depth(op: Space2Depth) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] C1 = x.shape_dict[Axis.C] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([""" void main() { ivec4 variable_position_y = """, get_output_position(y), f"""; int n = variable_position_y[{y.order.axes_dict[Axis.N]}]; int h2 = variable_position_y[{y.order.axes_dict[Axis.H]}]; int w2 = variable_position_y[{y.order.axes_dict[Axis.W]}]; int c2 = variable_position_y[{y.order.axes_dict[Axis.C]}]; int c1 = mod(c2, {C1}); int h1 = h2 * {r} + c2 / {C1} / {r}; int w1 = w2 * {r} + mod(c2 / {C1}, {r}); ivec4 variable_position_x; variable_position_x[{x.order.axes_dict[Axis.N]}] = n; variable_position_x[{x.order.axes_dict[Axis.H]}] = h1; variable_position_x[{x.order.axes_dict[Axis.W]}] = w1; variable_position_x[{x.order.axes_dict[Axis.C]}] = c1; gl_FragColor.r = """, texel_fetch(x, "variable_position_x"), """.r; } """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def __init__(self, base: Variable): if base.has_attribute(TextureShape): raise ValueError( f"\'TextureShape\' attribute has been already registered to {base}." ) MAX_TEXTURE_SIZE = config.WEBGL_MAX_TEXTURE_SIZE super(TextureShape, self).__init__(base) spacial_size = base.size // ChannelMode.elements_per_pixel(base) self.width = MAX_TEXTURE_SIZE if spacial_size > MAX_TEXTURE_SIZE else spacial_size # type: int self.height = ( spacial_size + MAX_TEXTURE_SIZE - 1 ) // MAX_TEXTURE_SIZE if spacial_size > MAX_TEXTURE_SIZE else 1 # type: int
def _replace_output(op: Operator, var_name: str, target: ChannelModeEnum): """ before) -{op}- v after) -{op}- v' -{conversion}- v """ v = op.outputs[var_name] if ChannelMode.get(v) == target: return False v_new = Variable(v.shape, v.order) ChannelMode.set(v_new, target) op.replace_output(v, v_new) if target == ChannelModeEnum.RGBA: convert_rgba_to_r(v_new).change_order(v.order).replace(v) else: convert_r_to_rgba(v_new).change_order(v.order).replace(v) return True
def convert_r_to_rgba(op: ConvertRtoRGBA) -> List[Kernel]: x = op.inputs["x0"] y = op.outputs["y"] assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.RGBA assert x.order == y.order shape_x = texture_shape(x) stride_x = texture_stride(x) shape_y = texture_shape(y) stride_y = texture_stride(y) code = KernelCode([ """ void main() { float y0 = texture2D(""", x, ", ", convert_coord("ivec3(gl_FragCoord.y, gl_FragCoord.x, 0)", shape_y, stride_y, shape_x, stride_x), """.yx).r; float y1 = texture2D(""", x, ", ", convert_coord("ivec3(gl_FragCoord.y, gl_FragCoord.x, 1)", shape_y, stride_y, shape_x, stride_x), """.yx).r; float y2 = texture2D(""", x, ", ", convert_coord("ivec3(gl_FragCoord.y, gl_FragCoord.x, 2)", shape_y, stride_y, shape_x, stride_x), """.yx).r; float y3 = texture2D(""", x, ", ", convert_coord("ivec3(gl_FragCoord.y, gl_FragCoord.x, 3)", shape_y, stride_y, shape_x, stride_x), """.yx).r; gl_FragColor = vec4(y0, y1, y2, y3); } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def texture_shape(v: Variable): channel_mode = ChannelMode.get_mode(v) if channel_mode == ChannelModeEnum.R: texture_length = v.size elif channel_mode == ChannelModeEnum.RGBA: texture_length = (v.size + 4 - 1) // 4 else: raise NotImplementedError(f"Unknown channel mode: {channel_mode}") return [ texture_length if texture_length < 2048 else 2048, (texture_length + 2048 - 1) // 2048 ]
def _replace_output(op: Operator, var_name: str, target: ChannelModeEnum): """ before) -{op}- v after) -{op}- v' -{conversion}- v """ v = op.outputs[var_name] if ChannelMode.get(v) == target: return False v_new = Variable(v.shape, v.order) ChannelMode.set(v_new, target) op.replace_output(v, v_new) if target == ChannelModeEnum.RGBA: ConvertRGBAtoR(None)(v_new)[0].replace(v) else: ConvertRtoRGBA(None)(v_new)[0].replace(v) return True
def texture_stride(v: Variable): result = [] channel_mode = ChannelMode.get(v) if channel_mode == ChannelModeEnum.R: s = 1 elif channel_mode == ChannelModeEnum.RGBA: s = 4 else: raise NotImplementedError(f"Unknown channel mode: {channel_mode}") for d in texture_shape(v): result.append(s) s *= d return result