def scalar_affine( op: ScalarAffine, constants_layout: MemoryLayout, variables_layout: MemoryLayout, metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]: x = variables_layout[op.inputs["x"]] y = variables_layout[op.outputs["y"]] assert x.variable.shape == y.variable.shape if metabuffer_injector is None: metabuffer_injector = MetaBufferInjector() metabuffer_injector.register({ "affine_transform_X_offset": x.offset, "affine_transform_Y_offset": y.offset, "affine_transform_N": y.variable.size, "affine_transform_scale": op.scale, "affine_transform_bias": op.bias }) source = metabuffer_injector.inject(template) func_name = util.add_canonical_suffix("scalar_affine", source) source = source.replace("%%FUNC_NAME%%", func_name) kernel = Kernel({func_name: source}, func_name, metabuffer_injector.generate_buffer()) return [kernel]
def axiswise_scale_same_order(op: AxiswiseScale, memory_layout: MemoryLayout) -> List[Kernel]: x = memory_layout[op.inputs["x"]] s = memory_layout[op.inputs["s"]] y = memory_layout[op.outputs["y"]] target_axis_index = x.variable.order.axes_dict[op.axis] D1 = mul(x.variable.shape[:target_axis_index]) D2 = x.variable.shape[target_axis_index] D3 = mul(x.variable.shape[target_axis_index + 1:]) buffer_injector = BufferInjector() buffer_injector.register({ "axiswise_scale_X": x, "axiswise_scale_S": s, "axiswise_scale_Y": y, "axiswise_scale_D1": D1, "axiswise_scale_D2": D2, "axiswise_scale_D3": D3 }) name_injector = KernelNameInjector(op) source = template_same_order source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def max_handler(op: Max, memory_layout: MemoryLayout) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] axis = op.parameters["axis"] buffer_injector = BufferInjector() buffer_injector.register({ "max_X": memory_layout[x], "max_Y": memory_layout[y], "max_y_stride": y.stride, "max_y_shape": y.shape, "max_x_stride": [x.stride_dict[a] for a in y.order.axes], "max_D": y.ndim, "max_N": x.shape_dict[axis], "max_MAX_GID": y.size, "max_x_target_axis_stride": x.stride_dict[axis] }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel( {name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list ) return [kernel]
def tile(op: Tile, memory_layout: MemoryLayout) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] buffer_injector = BufferInjector() buffer_injector.register({ "tile_x": memory_layout[x], "tile_y": memory_layout[y], "tile_y_stride": y.stride, "tile_x_stride": [x.stride_dict[a] for a in y.order.axes], "tile_x_shape": [x.shape_dict[a] for a in y.order.axes], "tile_D": x.ndim, "tile_MAX_GID": y.size, }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel( {name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list ) return [kernel]
def linear(op: Linear, memory_layout: MemoryLayout) -> List[Kernel]: x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] assert x.order == OrderNC or x.order == OrderNHWC assert w.order == OrderCN or w.order == OrderHWCN assert y.order == OrderNC or y.order == OrderNHWC assert w.ndim == x.ndim buffer_injector = BufferInjector() buffer_injector.register({ "linear_X": memory_layout[x], "linear_Y": memory_layout[y], "linear_W": memory_layout[w], "linear_M": y.shape_dict[Axis.N], "linear_N": y.size // y.shape_dict[Axis.N], "linear_K": x.size // x.shape_dict[Axis.N], }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel( {name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list ) return [kernel]
def embedding(op: Embedding, memory_layout: MemoryLayout) -> List[Kernel]: x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] assert x.order == OrderNT assert w.order == OrderCN assert y.order == OrderNTC buffer_injector = BufferInjector() buffer_injector.register({ "embedding_X": memory_layout[x], "embedding_Y": memory_layout[y], "embedding_W": memory_layout[w], "embedding_vocabulary": w.shape_dict[Axis.C], "embedding_sequence_len": x.shape_dict[Axis.T], "embedding_batch_size": x.shape_dict[Axis.N], "embedding_dim": w.shape_dict[Axis.N] }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def softmax(op: Softmax, memory_layout: MemoryLayout) -> List[Kernel]: x = memory_layout[op.inputs["x"]] y = memory_layout[op.outputs["y"]] assert y.variable.order == x.variable.order assert y.variable.shape == x.variable.shape axis = op.parameters["axis"] assert axis == x.variable.order.axes[ -1], "[Webassembly] Softmax supports only for aggregating last axis." buffer_injector = BufferInjector() buffer_injector.register({ "softmax_X": x, "softmax_Y": y, "softmax_N": y.variable.size // y.variable.shape_dict[axis], "softmax_C": y.variable.shape_dict[axis], }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def tanh(op: Tanh, constants_layout: MemoryLayout, variables_layout: MemoryLayout, metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]: x = variables_layout[op.inputs["x"]] y = variables_layout[op.outputs["y"]] assert x.variable.order == y.variable.order assert x.variable.shape == y.variable.shape if metabuffer_injector is None: metabuffer_injector = MetaBufferInjector() metabuffer_injector.register({ "relu_X_offset": x.offset, "relu_Y_offset": y.offset, "relu_N": y.variable.size }) source = metabuffer_injector.inject(template) func_name = util.add_canonical_suffix("tanh", source) source = source.replace("%%FUNC_NAME%%", func_name) kernel = Kernel({func_name: source}, func_name, metabuffer_injector.generate_buffer()) return [kernel]
def reshape(op: Reshape, memory_layout: MemoryLayout) -> List[Kernel]: # Operation without need for transposition is currently supported x = op.inputs["x"] y = op.outputs["y"] if memory_layout[x] == memory_layout[y]: # This is inplace operation return [] assert x.order == op.parameters["in_order"] assert y.order == op.parameters["out_order"] assert y.size == mul(op.parameters["out_shape"]) buffer_injector = BufferInjector() buffer_injector.register({ "reshape_x": memory_layout[x], "reshape_y": memory_layout[y], "reshape_N": y.size, }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def reinterpret_axis(op: ReinterpretAxis, memory_layout: MemoryLayout) -> List[Kernel]: # Operation without need for transposition is currently supported x = memory_layout[op.inputs["x"]] y = memory_layout[op.outputs["y"]] assert x.variable.order == op.parameters["in_order"] assert y.variable.order == op.parameters["out_order"] buffer_injector = BufferInjector() buffer_injector.register({ "reinterpret_axis_x": x, "reinterpret_axis_y": y, "reinterpret_axis_N": y.variable.size, }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def space2depth(op: Space2Depth, memory_layout: MemoryLayout) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] assert x.order == OrderNHWC assert y.order == OrderNHWC buffer_injector = BufferInjector() buffer_injector.register({ "space2depth_x": memory_layout[x], "space2depth_y": memory_layout[y], 'space2depth_r': r, "space2depth_N": x.shape_dict[Axis.N], "space2depth_C1": x.shape_dict[Axis.C], "space2depth_C2": y.shape_dict[Axis.C], "space2depth_H1": x.shape_dict[Axis.H], "space2depth_H2": y.shape_dict[Axis.H], "space2depth_W1": x.shape_dict[Axis.W], "space2depth_W2": y.shape_dict[Axis.W], }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def axiswise_scale_general(op: AxiswiseScale, memory_layout: MemoryLayout) -> List[Kernel]: x = memory_layout[op.inputs["x"]] s = memory_layout[op.inputs["s"]] y = memory_layout[op.outputs["y"]] x_shape = x.variable.shape target_axis_index = x.variable.order.axes_dict[op.axis] D1 = mul(x_shape[:target_axis_index]) D2 = x_shape[target_axis_index] D3 = mul(x_shape[target_axis_index + 1:]) y_strides = [] stride = 1 for sh in reversed(y.variable.shape): y_strides.insert(0, stride) stride *= sh x_stride_in_y = [ y_strides[y.variable.order.axes_dict[axis]] for axis in x.variable.order.axes ] buffer_injector = BufferInjector() buffer_injector.register({ "axiswise_scale_X": x, "axiswise_scale_S": s, "axiswise_scale_Y": y, "axiswise_scale_D1": D1, "axiswise_scale_D2": D2, "axiswise_scale_D3": D3, "axiswise_scale_D": x.variable.ndim, "axiswise_scale_d_target": x.variable.order.axes_dict[op.axis], "axiswise_scale_x_shape": x_shape, "axiswise_scale_x_stride_in_y": x_stride_in_y, }) name_injector = KernelNameInjector(op) source = template_general source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def tensordot(op: Tensordot, memory_layout: MemoryLayout) -> List[Kernel]: A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] axes = op.axes # Reduced axes must be located on inside of input variables. assert A.order.axes[-len(axes[0]):] == axes[0] assert B.order.axes[-len(axes[1]):] == axes[1] # output variable's axes order must be as [*a_remained_axes, *b_remained_axes] assert C.order.axes[:A.ndim - len(axes[0])] == A.order.axes[:-len(axes[0])] assert C.order.axes[-(B.ndim - len(axes[1])):] == B.order.axes[:-len(axes[1])] assert C.ndim == A.ndim - len(axes[0]) + B.ndim - len(axes[1]) K = mul(A.shape_dict[a] for a in axes[0]) M = A.size // K N = B.size // K buffer_injector = BufferInjector() buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C], "sgemm_M": M, "sgemm_N": N, "sgemm_K": K }) if op.has_attribute(UseEigenAttribute): source = generate_template_eigen(True, False) buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C] }) else: source = generate_template(True, False) buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C], "sgemm_M": op.M, "sgemm_N": op.N, "sgemm_K": op.K }) name_injector = KernelNameInjector(op) source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def average_pooling_2d(op: AveragePooling2D, memory_layout: MemoryLayout) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order == OrderNHWC assert y.order == OrderNHWC buffer_injector = BufferInjector() buffer_injector.register({ "average_pooling_2d_X": memory_layout[x], "average_pooling_2d_Y": memory_layout[y], "average_pooling_2d_N": x.shape_dict[Axis.N], "average_pooling_2d_H1": x.shape_dict[Axis.H], "average_pooling_2d_W1": x.shape_dict[Axis.W], "average_pooling_2d_C": x.shape_dict[Axis.C], "average_pooling_2d_H2": y.shape_dict[Axis.H], "average_pooling_2d_W2": y.shape_dict[Axis.W], "average_pooling_2d_KH": op.parameters["ksize"][0], "average_pooling_2d_KW": op.parameters["ksize"][1], "average_pooling_2d_SH": op.parameters["stride"][0], "average_pooling_2d_SW": op.parameters["stride"][1], "average_pooling_2d_PH": op.parameters["padding"][0], "average_pooling_2d_PW": op.parameters["padding"][1], }) name_injector = KernelNameInjector(op) source = template for key, statement in statement_divide_without_padding[ op.parameters["divide_without_padding"]].items(): source = source.replace(key, statement) source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def concat(op: Concat, constants_layout: MemoryLayout, variables_layout: MemoryLayout, metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]: xs = [variables_layout[op.inputs[f"x{str(i)}"]] for i in range(len(op.inputs))] y = variables_layout[op.outputs["y"]] target_axis = op.axis x_offsets = [x.offset for x in xs] x_shapes = [x.variable.shape for x in xs] y_strides = [] stride = 1 for s in reversed(y.variable.shape): y_strides.insert(0, stride) stride *= s # x_strides[i][j] is stride size of xs[i].order.axes[j] in y x_strides_in_y = [[] for _ in xs] for x, strides in zip(xs, x_strides_in_y): for axis in x.variable.order.axes: strides.append(y_strides[y.variable.order.axes_dict[axis]]) # x_offsets[i] is memory offset of xs[i]'s data in y. y_offsets = [] target_axis_offset = 0 for x in xs: y_offsets.append(target_axis_offset * y_strides[y.variable.order.axes_dict[target_axis]]) target_axis_offset += x.variable.shape_dict[target_axis] if metabuffer_injector is None: metabuffer_injector = MetaBufferInjector() metabuffer_injector.register({ "concat_y_offset": y.offset, "concat_D": len(y.variable.shape), "concat_N": len(xs), "concat_x_offsets": np.array(x_offsets, dtype=np.int32).tobytes(), "concat_x_strides_in_y": np.array(x_strides_in_y, dtype=np.int32).tobytes(), "concat_x_shapes": np.array(x_shapes, dtype=np.int32).tobytes(), "concat_y_offsets": np.array(y_offsets, dtype=np.int32).tobytes(), }) source = metabuffer_injector.inject(template) func_name = util.add_canonical_suffix("concat", source) source = source.replace("%%FUNC_NAME%%", func_name) kernel = Kernel( {func_name: source}, func_name, metabuffer_injector.generate_buffer() ) return [kernel]
def concat(op: Concat, memory_layout: MemoryLayout) -> List[Kernel]: xs = [ memory_layout[op.inputs[f"x{str(i)}"]] for i in range(len(op.inputs)) ] y = memory_layout[op.outputs["y"]] target_axis = op.axis x_offsets = [x.offset for x in xs] x_shapes = [x.variable.shape for x in xs] y_strides = [] stride = 1 for s in reversed(y.variable.shape): y_strides.insert(0, stride) stride *= s # x_strides[i][j] is stride size of xs[i].order.axes[j] in y x_strides_in_y = [[] for _ in xs] for x, strides in zip(xs, x_strides_in_y): for axis in x.variable.order.axes: strides.append(y_strides[y.variable.order.axes_dict[axis]]) # x_offsets[i] is memory offset of xs[i]'s data in y. y_offsets = [] target_axis_offset = 0 for x in xs: y_offsets.append(target_axis_offset * y_strides[y.variable.order.axes_dict[target_axis]]) target_axis_offset += x.variable.shape_dict[target_axis] buffer_injector = BufferInjector() buffer_injector.register({ "concat_y": y, "concat_D": len(y.variable.shape), "concat_N": len(xs), "concat_xs": xs, "concat_x_strides_in_y": x_strides_in_y, "concat_x_shapes": x_shapes, "concat_y_offsets": y_offsets }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def elementwise_kernel_base(op: Elementwise, command_buffer: CommandBuffer, buffer_injector: BufferInjector): name_injector = KernelNameInjector(op) source = encode_command(command_buffer) source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def average_pooling_2d(op: AveragePooling2D, memory_layout: MemoryLayout) -> List[Kernel]: x = memory_layout[op.inputs["x"]] y = memory_layout[op.outputs["y"]] assert x.variable.order == OrderNHWC assert y.variable.order == OrderNHWC buffer_injector = BufferInjector() buffer_injector.register({ "average_pooling_2d_X": x, "average_pooling_2d_Y": y, "average_pooling_2d_N": x.variable.shape_dict[Axis.N], "average_pooling_2d_H1": x.variable.shape_dict[Axis.H], "average_pooling_2d_W1": x.variable.shape_dict[Axis.W], "average_pooling_2d_C": x.variable.shape_dict[Axis.C], "average_pooling_2d_H2": y.variable.shape_dict[Axis.H], "average_pooling_2d_W2": y.variable.shape_dict[Axis.W], "average_pooling_2d_KH": op.parameters["ksize"][0], "average_pooling_2d_KW": op.parameters["ksize"][1], "average_pooling_2d_SH": op.parameters["stride"][0], "average_pooling_2d_SW": op.parameters["stride"][1], "average_pooling_2d_PH": op.parameters["padding"][0], "average_pooling_2d_PW": op.parameters["padding"][1], }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def sgemm(op: Sgemm, constants_layout: MemoryLayout, variables_layout: MemoryLayout, metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]: A = variables_layout[op.inputs["A"]] if op.inputs[ "A"] in variables_layout else constants_layout[op.inputs["A"]] B = variables_layout[op.inputs["B"]] if op.inputs[ "B"] in variables_layout else constants_layout[op.inputs["B"]] C = variables_layout[op.outputs["C"]] if metabuffer_injector is None: metabuffer_injector = MetaBufferInjector() metabuffer_injector.register({ "sgemm_A_offset": A.offset, "sgemm_B_offset": B.offset, "sgemm_C_offset": C.offset, "sgemm_M": op.M, "sgemm_N": op.N, "sgemm_K": op.K }) if op.parameters["eigen"]: source = generate_template_eigen(op.transpose_A, op.transpose_B, op.M, op.N, op.K) metabuffer_injector.register({ "sgemm_A_offset": A.offset, "sgemm_B_offset": B.offset, "sgemm_C_offset": C.offset }) else: source = generate_template(op.transpose_A, op.transpose_B) metabuffer_injector.register({ "sgemm_A_offset": A.offset, "sgemm_B_offset": B.offset, "sgemm_C_offset": C.offset, "sgemm_M": op.M, "sgemm_N": op.N, "sgemm_K": op.K }) source = metabuffer_injector.inject(source) func_name = util.add_canonical_suffix("sgemm", source) source = source.replace("%%FUNC_NAME%%", func_name) kernel = Kernel({func_name: source}, func_name, metabuffer_injector.generate_buffer()) return [kernel]
def elementwise_kernel(op: Elementwise, memory_layout: MemoryLayout) -> List[Kernel]: xs = [ memory_layout[op.inputs[f"x{str(i)}"]] for i in range(len(op.inputs)) ] y = memory_layout[op.outputs["y"]] item = _registered_items[op.__class__] parameters = {key: fn(op) for key, fn in item.parameters.items()} x_shapes = [x.variable.shape for x in xs] y_strides = [] stride = 1 for s in reversed(y.variable.shape): y_strides.insert(0, stride) stride *= s # x_strides[i][j] is stride size of xs[i].order.axes[j] in y x_strides_in_y = [[] for _ in xs] for x, strides in zip(xs, x_strides_in_y): for axis in x.variable.order.axes: strides.append(y_strides[y.variable.order.axes_dict[axis]]) buffer_injector = BufferInjector() buffer_injector.register({ "elementwise_Y": y, "elementwise_D": len(y.variable.shape), "elementwise_N": xs[0].variable.size, "elementwise_Xs": xs, "elementwise_X_strides_in_Y": x_strides_in_y, "elementwise_X_shapes": x_shapes }) buffer_injector.register({ f"elementwise_parameters_{key}": val for key, val in parameters.items() }) name_injector = KernelNameInjector(op) source = _generate_source(xs, y, item.code, parameters) source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def slice_handler(op: Slice, memory_layout: MemoryLayout) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] remained_axes_in_y_order = [a for a in y.order.axes if a in x.order.axes] removed_axes = [a for a in x.order.axes if a not in y.order.axes] x_index_offset = 0 x_strides = [] for axis in remained_axes_in_y_order: assert isinstance(op.indices[axis], slice) index = normalize_slice(op.indices[axis], x.shape_dict[axis]) x_index_offset += x.stride_dict[axis] * index.start x_strides.append(x.stride_dict[axis] * index.step) for axis in removed_axes: assert isinstance(op.indices[axis], int) x_index_offset += x.stride_dict[axis] * op.indices[axis] buffer_injector = BufferInjector() buffer_injector.register({ "slice_ndim": len(remained_axes_in_y_order), "slice_X": memory_layout[x], "slice_x_stride_in_y_order": x_strides, "slice_x_index_offset": x_index_offset, "slice_Y": memory_layout[y], "slice_y_size": y.size, "slice_y_shape": [y.shape_dict[a] for a in remained_axes_in_y_order], "slice_y_stride": [y.stride_dict[a] for a in remained_axes_in_y_order] }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel( {name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list ) return [kernel]
def sgemm(op: Sgemm, memory_layout: MemoryLayout) -> List[Kernel]: A = memory_layout[op.inputs["A"]] B = memory_layout[op.inputs["B"]] C = memory_layout[op.outputs["C"]] buffer_injector = BufferInjector() buffer_injector.register({ "sgemm_A": A, "sgemm_B": B, "sgemm_C": C, "sgemm_M": op.M, "sgemm_N": op.N, "sgemm_K": op.K }) if op.parameters["eigen"]: source = generate_template_eigen(op.transpose_A, op.transpose_B, op.M, op.N, op.K) buffer_injector.register({ "sgemm_A": A, "sgemm_B": B, "sgemm_C": C }) else: source = generate_template(op.transpose_A, op.transpose_B) buffer_injector.register({ "sgemm_A": A, "sgemm_B": B, "sgemm_C": C, "sgemm_M": op.M, "sgemm_N": op.N, "sgemm_K": op.K }) name_injector = KernelNameInjector(op) source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel( {name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list ) return [kernel]
def average_pooling_2d( op: AveragePooling2D, constants_layout: MemoryLayout, variables_layout: MemoryLayout, metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]: x = variables_layout[op.inputs["x"]] y = variables_layout[op.outputs["y"]] assert x.variable.order == OrderNHWC assert y.variable.order == OrderNHWC if metabuffer_injector is None: metabuffer_injector = MetaBufferInjector() metabuffer_injector.register({ "average_pooling_2d_X_offset": x.offset, "average_pooling_2d_Y_offset": y.offset, "average_pooling_2d_N": x.variable.shape_dict[Axis.N], "average_pooling_2d_H1": x.variable.shape_dict[Axis.H], "average_pooling_2d_W1": x.variable.shape_dict[Axis.W], "average_pooling_2d_C": x.variable.shape_dict[Axis.C], "average_pooling_2d_H2": y.variable.shape_dict[Axis.H], "average_pooling_2d_W2": y.variable.shape_dict[Axis.W], "average_pooling_2d_K": op.parameters["ksize"][0], "average_pooling_2d_S": op.parameters["stride"][0], "average_pooling_2d_P": op.parameters["padding"][0], }) source = metabuffer_injector.inject(template) func_name = util.add_canonical_suffix("average_pooling_2d", source) source = source.replace("%%FUNC_NAME%%", func_name) kernel = Kernel({func_name: source}, func_name, metabuffer_injector.generate_buffer()) return [kernel]
def split_axis(op: SplitAxis, memory_layout: MemoryLayout) -> List[Kernel]: x = memory_layout[op.inputs["x"]] ys = [ memory_layout[op.outputs[f"y{str(i)}"]] for i in range(len(op.outputs)) ] target_axis = op.parameters["axis"] y_shapes = [y.variable.shape for y in ys] # y_strides[i][j] is stride size of ys[i].order.axes[j] in x y_strides_in_x = [[] for _ in ys] for y, strides in zip(ys, y_strides_in_x): for axis in y.variable.order.axes: strides.append(x.variable.stride[x.variable.order.axes_dict[axis]]) # x_offsets[i] is memory offset of ys[i]'s data in x. x_offsets = [] target_axis_offset = 0 for y in ys: x_offsets.append( target_axis_offset * x.variable.stride[x.variable.order.axes_dict[target_axis]]) target_axis_offset += y.variable.shape_dict[target_axis] buffer_injector = BufferInjector() buffer_injector.register({ "split_axis_x": x, "split_axis_D": len(x.variable.shape), "split_axis_N": len(ys), "split_axis_ys": ys, "split_axis_y_strides_in_x": y_strides_in_x, "split_axis_y_shapes": y_shapes, "split_axis_x_offsets": x_offsets }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def local_response_normalization( op: LocalResponseNormalization, constants_layout: MemoryLayout, variables_layout: MemoryLayout, metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]: x = variables_layout[op.inputs["x"]] y = variables_layout[op.outputs["y"]] assert x.variable.order == OrderNHWC assert y.variable.order == OrderNHWC if metabuffer_injector is None: metabuffer_injector = MetaBufferInjector() metabuffer_injector.register({ "local_response_normalization_X_offset": x.offset, "local_response_normalization_Y_offset": y.offset, "local_response_normalization_N": x.variable.shape_dict[Axis.N], "local_response_normalization_H": x.variable.shape_dict[Axis.H], "local_response_normalization_W": x.variable.shape_dict[Axis.W], "local_response_normalization_C": x.variable.shape_dict[Axis.C], "local_response_normalization_param_half_n": int(op.parameters["n"] // 2), "local_response_normalization_param_k": float(op.parameters["k"]), "local_response_normalization_param_alpha": float(op.parameters["alpha"]), "local_response_normalization_param_minus_beta": float(-op.parameters["beta"]) }) source = metabuffer_injector.inject(template) func_name = util.add_canonical_suffix("local_response_normalization", source) source = source.replace("%%FUNC_NAME%%", func_name) kernel = Kernel({func_name: source}, func_name, metabuffer_injector.generate_buffer()) return [kernel]
def axiswise_bias( op: AxiswiseBias, constants_layout: MemoryLayout, variables_layout: MemoryLayout, metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]: x = variables_layout[op.inputs["x"]] b = constants_layout[op.inputs["b"]] y = variables_layout[op.outputs["y"]] if metabuffer_injector is None: metabuffer_injector = MetaBufferInjector() assert x.variable.order == OrderNC or x.variable.order == OrderNHWC or x.variable.order == OrderHWNC assert y.variable.shape == x.variable.shape assert op.parameters[ "axis"] == Axis.C, "[Webassembly] AxiswiseBias supports only channelwise bias." metabuffer_injector.register({ "axiswise_bias_X_offset": x.offset, "axiswise_bias_Y_offset": y.offset, "axiswise_bias_B_offset": b.offset, "axiswise_bias_N": y.variable.size // y.variable.shape_dict[Axis.C], "axiswise_bias_C": y.variable.shape_dict[Axis.C], }) inline_injector = InlineInjector() if "inline_elementwise" in op.parameters: inline_injector.delegate = op.parameters["inline_elementwise"] source = template source = metabuffer_injector.inject(source) source = inline_injector.inject(source) func_name = util.add_canonical_suffix("axiswise_bias", source) source = source.replace("%%FUNC_NAME%%", func_name) kernel = Kernel({func_name: source}, func_name, metabuffer_injector.generate_buffer()) return [kernel]
def sgemm(op: Sgemm, memory_layout: MemoryLayout) -> List[Kernel]: A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] buffer_injector = BufferInjector() buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C], "sgemm_M": op.M, "sgemm_N": op.N, "sgemm_K": op.K }) if op.has_attribute(SgemmWithEigen): source = generate_template_eigen(op.transpose_A, op.transpose_B) buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C] }) else: source = generate_template(op.transpose_A, op.transpose_B) buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C], "sgemm_M": op.M, "sgemm_N": op.N, "sgemm_K": op.K }) name_injector = KernelNameInjector(op) source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def im2col(op: Im2Col, memory_layout: MemoryLayout) -> List[Kernel]: im = op.inputs["im"] col = op.outputs["col"] assert im.order == OrderNHWC col_acceptable_order = [ Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C]), Order([Axis.KH, Axis.KW, Axis.C, Axis.N, Axis.H, Axis.W]) ] assert col.order in col_acceptable_order buffer_injector = BufferInjector() buffer_injector.register({ "im2col_im": memory_layout[im], "im2col_col": memory_layout[col], "im2col_N": col.shape_dict[Axis.N], "im2col_C1": im.shape_dict[Axis.C], "im2col_H1": im.shape_dict[Axis.H], "im2col_W1": im.shape_dict[Axis.W], "im2col_H2": col.shape_dict[Axis.H], "im2col_W2": col.shape_dict[Axis.W], "im2col_KH": op.KH, "im2col_KW": op.KW, "im2col_DH": op.DH, "im2col_DW": op.DW, "im2col_SH": op.SH, "im2col_SW": op.SW, "im2col_PH": op.PH, "im2col_PW": op.PW, }) name_injector = KernelNameInjector(op) source = template_KKCNHW if col.order == Order([ Axis.KH, Axis.KW, Axis.C, Axis.N, Axis.H, Axis.W ]) else template_NHWKKC source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def local_response_normalization(op: LocalResponseNormalization, memory_layout: MemoryLayout) -> List[Kernel]: x = memory_layout[op.inputs["x"]] y = memory_layout[op.outputs["y"]] assert x.variable.order == OrderNHWC assert y.variable.order == OrderNHWC buffer_injector = BufferInjector() buffer_injector.register({ "local_response_normalization_X": x, "local_response_normalization_Y": y, "local_response_normalization_N": x.variable.shape_dict[Axis.N], "local_response_normalization_H": x.variable.shape_dict[Axis.H], "local_response_normalization_W": x.variable.shape_dict[Axis.W], "local_response_normalization_C": x.variable.shape_dict[Axis.C], "local_response_normalization_param_half_n": int(op.parameters["n"] // 2), "local_response_normalization_param_k": float(op.parameters["k"]), "local_response_normalization_param_alpha": float(op.parameters["alpha"]), "local_response_normalization_param_minus_beta": float(-op.parameters["beta"]) }) name_injector = KernelNameInjector(op) source = template source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def im2col(op: Im2Col, constants_layout: MemoryLayout, variables_layout: MemoryLayout, metabuffer_injector: MetaBufferInjector = None) -> List[Kernel]: im = variables_layout[op.inputs["im"]] col = variables_layout[op.outputs["col"]] assert im.variable.order == OrderNHWC assert col.variable.order == OrderNHWC or col.variable.order == OrderCNHW if metabuffer_injector is None: metabuffer_injector = MetaBufferInjector() metabuffer_injector.register({ "im2col_im_offset": im.offset, "im2col_col_offset": col.offset, "im2col_N": col.variable.shape_dict[Axis.N], "im2col_C1": im.variable.shape_dict[Axis.C], "im2col_H1": im.variable.shape_dict[Axis.H], "im2col_W1": im.variable.shape_dict[Axis.W], "im2col_H2": col.variable.shape_dict[Axis.H], "im2col_W2": col.variable.shape_dict[Axis.W], "im2col_KH": op.KH, "im2col_KW": op.KW, "im2col_SH": op.SH, "im2col_SW": op.SW, "im2col_PH": op.PH, "im2col_PW": op.PW, }) source = template_CNHW if col.variable.order == OrderCNHW else template_NHWC source = metabuffer_injector.inject(source) func_name = util.add_canonical_suffix("im2col", source) source = source.replace("%%FUNC_NAME%%", func_name) kernel = Kernel( {func_name: source}, func_name, metabuffer_injector.generate_buffer() ) return [kernel]