def convert_r_to_rgba(op: ConvertRtoRGBA) -> List[Kernel]: x = op.inputs["x0"] y = op.outputs["y"] assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.RGBA orders, shape_dicts = simplify_orders([x, y]) shapes = {v: [shape_dicts[v][a] for a in orders[v].axes] for v in [x, y]} strides = { v: [mul(shapes[v][orders[v].axes_dict[a] + 1:]) for a in orders[v].axes] for v in [x, y] } stride_dicts = {v: AxisKeyDict(orders[v].axes, strides[v]) for v in [x, y]} # Change x's shapes and strides order to same as y's order shapes[x] = [ shape_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes ] strides[x] = [ stride_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes ] # Padding shapes and strides to 4D if orders[y].ndim > 4: raise NotImplementedError(f"Too large number of dimension: {y}") for v in [x, y]: shape = shapes[v] stride = strides[v] while len(shape) < 4: stride.append(1) shape.append(1) name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "sampler_x": x, "texture_stride_y": texture_stride(y), "variable_shape_y": shapes[y], "variable_stride_y": strides[y], "texture_shape_x": texture_shape(x), "texture_stride_x": texture_stride(x), "variable_shape_x": shapes[x], "variable_stride_x": strides[x], }) source = template source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, y) return [kernel]
def split_axis(op: SplitAxis) -> List[Kernel]: x = op.inputs["x"] ys = [op.outputs[f"y{i}"] for i in range(len(op.outputs))] sections = [0] + op.sections axis = op.axis kernels = [] for i, y in enumerate(ys): assert x.order.check_same_axes(y.order) assert ChannelMode.get(x) == ChannelMode.get(y) == ChannelModeEnum.R if x.ndim > 4: # simplify orders orders, shape_dicts = simplify_orders([x, y], keep_axes=[axis]) shapes = { v: [shape_dicts[v][a] for a in order.axes] for v, order in orders.items() } strides = { v: [mul(shapes[v][i + 1:]) for i in range(order.ndim)] for v, order in orders.items() } else: orders = {y: y.order, x: x.order} shapes = {y: y.shape, x: x.shape} strides = {y: y.stride, x: x.stride} code = KernelCode([ f""" void main() {{ """, Type.Ivec.get_name(shapes[x]), f""" variable_position_x = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x]), f"""; variable_position_x[{orders[x].axes_dict[axis]}] += {sections[i]}; gl_FragColor.r = texture2D(""", x, ",", convert_coord("variable_position_x", shapes[x], strides[x], texture_shape(x)[:2][::-1], texture_stride(x)[:2][::-1]), f""").r; }} """ ], name=op.__class__.__name__) source = code.generate() kernels.append( Kernel(source, code.name, code.samplers, code.uniforms, y)) return kernels
def concat(op: Concat) -> List[Kernel]: assert len(op.inputs) == 2 x0 = op.inputs["x0"] x1 = op.inputs["x1"] y = op.outputs["y"] axis = op.axis assert x0.order.check_same_axes(y.order) assert x1.order.check_same_axes(y.order) assert ChannelMode.get(x0) == ChannelMode.get(x1) == ChannelMode.get(y) if x0.ndim > 4 or x1.ndim > 4: # simplify orders orders, shape_dicts = simplify_orders([x0, x1, y], keep_axes=[axis]) shapes = {v: [shape_dicts[v][a] for a in order.axes] for v, order in orders.items()} strides = {v: [mul(shapes[v][i + 1:]) for i in range(order.ndim)] for v, order in orders.items()} else: orders = {y: y.order, x0: x0.order, x1: x1.order} shape_dicts = {y: y.shape_dict, x0: x0.shape_dict, x1: x1.shape_dict} shapes = {y: y.shape, x0: x0.shape, x1: x1.shape} strides = {y: y.stride, x0: x0.stride, x1: x1.stride} code = KernelCode([f""" void main() {{ """, Type.Ivec.get_name(shapes[x0]), f""" variable_position_x0 = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x0] ), f"""; """, Type.Ivec.get_name(shapes[x1]), f""" variable_position_x1 = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x1] ), f"""; variable_position_x1[{orders[x1].axes_dict[axis]}] -= {x0.shape_dict[axis]}; gl_FragColor.r = ( (variable_position_x0[{orders[x0].axes_dict[axis]}] >= {shape_dicts[x0][axis]}) ? texture2D(""", x1, ",", convert_coord("variable_position_x1", shapes[x1], strides[x1], texture_shape(x1)[:2][::-1], texture_stride(x1)[:2][::-1]), f""") : texture2D(""", x0, ",", convert_coord("variable_position_x0", shapes[x0], strides[x0], texture_shape(x0)[:2][::-1], texture_stride(x0)[:2][::-1]), f""") ).r; }} """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def reduce_kernel(op: Reduce): x = op.inputs["x"] y = op.outputs["y"] axis = op.axis orders, shape_dicts = simplify_orders([x, y], keep_axes=[axis]) # Padding shapes and strides to 4D if orders[y].ndim > 4: raise NotImplementedError(f"Too large number of dimension: {y}") shapes = {v: [shape_dicts[v][a] for a in orders[v].axes] for v in [x, y]} strides = { v: [mul(shapes[v][orders[v].axes_dict[a] + 1:]) for a in orders[v].axes] for v in [x, y] } stride_dicts = {v: AxisKeyDict(orders[v].axes, strides[v]) for v in [x, y]} # Change x's shapes and strides order to same as y's order x_virtual_shape = [ shape_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes ] x_virtual_stride = [ stride_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes ] while len(x_virtual_shape) < 3: x_virtual_stride.append(1) x_virtual_shape.append(stride_dicts[x][axis]) x_virtual_shape.append(shape_dicts[x][axis]) x_virtual_stride.append(stride_dicts[x][axis]) y_virtual_shape = shapes[y] y_virtual_stride = strides[y] while len(y_virtual_shape) < 4: y_virtual_stride.append(1) y_virtual_shape.append(1) code = _generate_template(op, reduction_size=shape_dicts[x][axis], shapes={ y: y_virtual_shape, x: x_virtual_shape }, strides={ y: y_virtual_stride, x: x_virtual_stride }) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def concat(op: Concat) -> List[Kernel]: xs = [op.inputs[f"x{i}"] for i in range(len(op.inputs) - 1)] workspace = op.inputs["workspace"] y = op.outputs["y"] axis = op.axis kernels = [] # noinspection PyUnresolvedReferences inv_texture_shape_y = [ float(np.double(1.0) / np.double(v)) for v in texture_shape(y)[:2][::-1] ] # noinspection PyUnresolvedReferences inv_texture_shape_workspace = [ float(np.double(1.0) / np.double(v)) for v in texture_shape(workspace)[:2][::-1] ] sections = [0] for x in xs: sections.append(sections[-1] + x.shape_dict[axis]) for i, x in enumerate(xs): assert x.order.check_same_axes(y.order) assert ChannelMode.get(x) == ChannelMode.get(y) if x.ndim > 4: # simplify orders orders, shape_dicts = simplify_orders([x, y], keep_axes=[axis]) shapes = { v: [shape_dicts[v][a] for a in order.axes] for v, order in orders.items() } strides = { v: [mul(shapes[v][i + 1:]) for i in range(order.ndim)] for v, order in orders.items() } else: orders = {y: y.order, x: x.order} shape_dicts = {y: y.shape_dict, x: x.shape_dict} shapes = {y: y.shape, x: x.shape} strides = {y: y.stride, x: x.stride} # copy xs[i] or workspace's value into y code1 = KernelCode([ f""" void main() {{ """, Type.Ivec.get_name(shapes[x]), f""" variable_position_x = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes[y], strides[y]), orders[y], orders[x]), f"""; variable_position_x[{orders[x].axes_dict[axis]}] -= {sections[i]}; gl_FragColor.r = ( variable_position_x[{orders[x].axes_dict[axis]}] < 0 || variable_position_x[{orders[x].axes_dict[axis]}] >= {shape_dicts[x][axis]} ) ? texture2D(""", workspace, """, gl_FragCoord.xy * """, inv_texture_shape_workspace, """).r : texture2D(""", x, ",", convert_coord("variable_position_x", shapes[x], strides[x], texture_shape(x)[:2][::-1], texture_stride(x)[:2][::-1]), f""").r; }} """ ], name="Concat_copy_to_y") # copy y's value into workspace code2 = KernelCode([ """ void main() { gl_FragColor = texture2D(""", y, """, gl_FragCoord.xy * """, inv_texture_shape_y, """); } """ ], name="Concat_escape_to_ws") source1 = code1.generate() source2 = code2.generate() kernels += [ Kernel(source1, code1.name, code1.samplers, code1.uniforms, y), Kernel(source2, code2.name, code2.samplers, code2.uniforms, workspace) ] return kernels
def reduce_kernel(op: Reduce): x = op.inputs["x"] y = op.outputs["y"] axis = op.axis orders, shape_dicts = simplify_orders([x, y], keep_axes=[axis]) # Padding shapes and strides to 4D if orders[y].ndim > 4: raise NotImplementedError(f"Too large number of dimension: {y}") shapes = {v: [shape_dicts[v][a] for a in orders[v].axes] for v in [x, y]} strides = { v: [mul(shapes[v][orders[v].axes_dict[a] + 1:]) for a in orders[v].axes] for v in [x, y] } stride_dicts = {v: AxisKeyDict(orders[v].axes, strides[v]) for v in [x, y]} # Change x's shapes and strides order to same as y's order x_virtual_shape = [ shape_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes ] x_virtual_stride = [ stride_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes ] while len(x_virtual_shape) < 3: x_virtual_stride.append(1) x_virtual_shape.append(stride_dicts[x][axis]) x_virtual_shape.append(shape_dicts[x][axis]) x_virtual_stride.append(stride_dicts[x][axis]) y_virtual_shape = shapes[y] y_virtual_stride = strides[y] while len(y_virtual_shape) < 4: y_virtual_stride.append(1) y_virtual_shape.append(1) name_injector = KernelNameInjector(op) uniform_injector = UniformInjector() uniform_injector.register({ "texture_stride_y": texture_stride(y), "variable_shape_y": y_virtual_shape, "variable_stride_y": y_virtual_stride, f"sampler_x": x, f"texture_shape_x": texture_shape(x), f"texture_stride_x": texture_stride(x), f"variable_shape_x": x_virtual_shape, f"variable_stride_x": x_virtual_stride, }) for name, callable in _registered_items[op.__class__].parameters.items(): uniform_injector.register({name: callable(op)}) # Computing logical position is required. source = _generate_template_convert_position( op, reduction_size=shape_dicts[x][axis]) source = uniform_injector.inject(source) source = name_injector.inject(source) kernel = Kernel(source, name_injector.name, uniform_injector.samplers, uniform_injector.uniforms, y) return [kernel]
def _optimize_loop_structure(variables: List[Variable], key_variable: Variable, keep_axes: List[Axis] = None): """ Optimize loop structure to iterate each element in variables Returns: (tuple): two elements are returned - First one is shape dictionary of all variables. - Second one is stride dictionary of all variables. """ orders, shape_dicts = simplify_orders( variables, keep_axes=keep_axes ) # type: Dict[Variable, Order], Dict[Variable, AxisKeyDict[List[int]]] shapes = { v: [shape_dicts[v][a] for a in orders[v].axes] for v in variables } strides = { v: [mul(shapes[v][orders[v].axes_dict[a] + 1:]) for a in orders[v].axes] for v in variables } stride_dicts = { v: AxisKeyDict(orders[v].axes, strides[v]) for v in variables } # Re-ordering shapes and strides along to key variable's order axes = [] axes += [axis for axis in orders[key_variable].axes if axis not in axes] for v in sorted(variables, key=lambda v: orders[v].ndim): axes += [axis for axis in orders[v].axes if axis not in axes] orders = { v: Order(list(filter(lambda x: x in orders[v].axes, axes))) for v in variables } key_order = orders[key_variable] shapes = { v: [ shape_dicts[v][a] if a in orders[v].axes else 1 for a in key_order.axes ] for v in variables } strides = { v: [ stride_dicts[v][a] if a in orders[v].axes else 1 for a in key_order.axes ] for v in variables } # Padding shapes and strides to 4D if key_order.ndim > 4: raise NotImplementedError(f"Too large number of dimension: {v}") for v in variables: shape = shapes[v] stride = strides[v] while len(shape) < 4: stride.append(1) shape.append(1) return shapes, strides
def reshape(op: Reshape) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] in_order = op.parameters["in_order"] out_order = op.parameters["out_order"] dummy_y = Variable(y.shape, y.order).change_order(out_order) orders_y_dy, shapes_y_dy = simplify_orders([y, dummy_y]) if orders_y_dy[y] == orders_y_dy[dummy_y]: order = Order([None] * 4) shape = factorize(y.size) stride = [mul(shape[i + 1:]) for i in range(4)] dummy_y = Variable(y.shape, y.order) shapes_y_dy = {y: shape, dummy_y: shape} strides_y_dy = {y: stride, dummy_y: stride} orders_y_dy = {y: order, dummy_y: order} else: shapes_y_dy = {v: [shapes_y_dy[v][a] for a in orders_y_dy[v].axes] for v in [y, dummy_y]} strides_y_dy = {v: [mul(shapes_y_dy[v][i + 1:]) for i in range(orders_y_dy[v].ndim)] for v in [y, dummy_y]} dummy_x = Variable(x.shape, x.order).change_order(in_order) orders_x_dx, shapes_x_dx = simplify_orders([x, dummy_x]) if orders_x_dx[x] == orders_x_dx[dummy_x]: order = Order([None] * 4) shape = factorize(x.size) stride = [mul(shape[i + 1:]) for i in range(4)] dummy_x = Variable(x.shape, x.order) shapes_x_dx = {x: shape, dummy_x: shape} strides_x_dx = {x: stride, dummy_x: stride} orders_x_dx = {x: order, dummy_x: order} else: shapes_x_dx = {v: [shapes_x_dx[v][a] for a in orders_x_dx[v].axes] for v in [x, dummy_x]} strides_x_dx = {v: [mul(shapes_x_dx[v][i + 1:]) for i in range(orders_x_dx[v].ndim)] for v in [x, dummy_x]} # FIXME: optimize # y -{change_order}-> dummy_y -{convert_position}-> dummy_x -{change_order}-> x code = KernelCode([f""" void main() {{ gl_FragColor.r = texture2D(""", x, """,""", convert_coord( change_order( convert_position( change_order( convert_position("gl_FragCoord.yx", texture_shape(y)[:2], texture_stride(y)[:2], shapes_y_dy[y], strides_y_dy[y]), orders_y_dy[y], orders_y_dy[dummy_y] ), shapes_y_dy[dummy_y], strides_y_dy[dummy_y], shapes_x_dx[dummy_x], strides_x_dx[dummy_x] ), orders_x_dx[dummy_x], orders_x_dx[x] ), shapes_x_dx[x], strides_x_dx[x], texture_shape(x)[:2][::-1], texture_stride(x)[:2][::-1] ), f""").r; }} """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]