Esempio n. 1
0
def tanh(op: Tanh, constants_layout: MemoryLayout,
         variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]

    assert x.variable.shape == y.variable.shape
    assert x.variable.order == y.variable.order

    meta_injector = MetaInjector()
    meta_injector.register({
        "tanh_X_offset": x.offset,
        "tanh_Y_offset": y.offset,
        "tanh_N": y.variable.size
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 2
0
def axiswise_bias_same_order(op: AxiswiseBias, constants_layout: MemoryLayout,
                             variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    b = constants_layout[op.inputs["b"]]
    y = variables_layout[op.outputs["y"]]

    target_axis_index = x.variable.order.axes_dict[op.axis]
    D1 = int(np.prod(x.variable.shape[:target_axis_index]))
    D2 = x.variable.shape[target_axis_index]
    D3 = int(np.prod(x.variable.shape[target_axis_index + 1:]))

    meta_injector = MetaInjector()
    meta_injector.register({
        "axiswise_bias_X_offset": x.offset,
        "axiswise_bias_B_offset": b.offset,
        "axiswise_bias_Y_offset": y.offset,
        "axiswise_bias_D1": D1,
        "axiswise_bias_D2": D2,
        "axiswise_bias_D3": D3
    })

    name_injector = KernelNameInjector(op)

    source = generate_template_same_order(D1, D3)
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 3
0
def scalar_affine(op: ScalarAffine, constants_layout: MemoryLayout,
                  variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]
    assert x.variable.shape == y.variable.shape

    meta_injector = MetaInjector()
    meta_injector.register({
        "affine_transform_X_offset": x.offset,
        "affine_transform_Y_offset": y.offset,
        "affine_transform_N": y.variable.size,
        "affine_transform_scale": float(op.scale),
        "affine_transform_bias": float(op.bias)
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 4
0
def max_pooling_2d(op: MaxPooling2D, constants_layout: MemoryLayout,
                   variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]

    assert x.variable.order == OrderNHWC
    assert y.variable.order == OrderNHWC

    meta_injector = MetaInjector()
    meta_injector.register({
        "max_pooling_2d_X_offset": x.offset,
        "max_pooling_2d_Y_offset": y.offset,
        "max_pooling_2d_N": x.variable.shape_dict[Axis.N],
        "max_pooling_2d_H1": x.variable.shape_dict[Axis.H],
        "max_pooling_2d_W1": x.variable.shape_dict[Axis.W],
        "max_pooling_2d_C": x.variable.shape_dict[Axis.C],
        "max_pooling_2d_H2": y.variable.shape_dict[Axis.H],
        "max_pooling_2d_W2": y.variable.shape_dict[Axis.W],
        "max_pooling_2d_K": op.parameters["ksize"][0],
        "max_pooling_2d_S": op.parameters["stride"][0],
        "max_pooling_2d_P": op.parameters["padding"][0],
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 5
0
def axiswise_scale(op: AxiswiseScale, constants_layout: MemoryLayout,
                   variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    s = constants_layout[op.inputs["s"]]
    y = variables_layout[op.outputs["y"]]

    assert x.variable.order == OrderNC or x.variable.order == OrderNHWC or x.variable.order == OrderHWNC
    assert y.variable.order == OrderNC or y.variable.order == OrderNHWC or y.variable.order == OrderHWNC
    assert op.parameters[
        "axis"] == Axis.C, "[WebGPU] AxiswiseScale supports only channelwise bias."

    meta_injector = MetaInjector()
    meta_injector.register({
        "axiswise_scale_X_offset": x.offset,
        "axiswise_scale_Y_offset": y.offset,
        "axiswise_scale_S_offset": s.offset,
        "axiswise_scale_N": y.variable.size,
        "axiswise_scale_C": y.variable.shape_dict[Axis.C],
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 6
0
def elementwise_sum(op: AxiswiseScale,
                    constants_layout: MemoryLayout,
                    variables_layout: MemoryLayout) -> List[Kernel]:
    x0 = variables_layout[op.inputs["x0"]]
    x1 = variables_layout[op.inputs["x1"]]
    y = variables_layout[op.outputs["y"]]

    assert len(op.inputs) == 2, "[WebGPU] ElementwiseSum operator currently supported only 2 inputs."
    assert x0.variable.shape == x1.variable.shape == y.variable.shape

    meta_injector = MetaInjector()
    meta_injector.register({
        "elementwise_sum_X0_offset": x0.offset,
        "elementwise_sum_X1_offset": x1.offset,
        "elementwise_sum_Y_offset": y.offset,
        "elementwise_sum_N": y.variable.size
    })

    inline_injector = InlineInjector(op)
    name_injector = KernelNameInjector(op)

    source = generate_template(y.variable.size, inline_injector.has_inline)
    source = meta_injector.inject(source)
    source = inline_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel(
        {name_injector.name: source},
        name_injector.name,
        GPUSize(8, 1, 1),
        GPUSize(1024, 1, 1),
        meta_injector.buffer
    )

    return [kernel]
def local_response_normalization_general(
        op: LocalResponseNormalization, constants_layout: MemoryLayout,
        variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]

    target_axis = Axis.C

    x_shape = x.variable.shape

    y_strides = []
    stride = 1
    for s in reversed(y.variable.shape):
        y_strides.insert(0, stride)
        stride *= s

    x_stride_in_y = [
        y_strides[y.variable.order.axes_dict[axis]]
        for axis in x.variable.order.axes
    ]

    meta_injector = MetaInjector()
    meta_injector.register({
        "local_response_normalization_X_offset":
        x.offset,
        "local_response_normalization_Y_offset":
        y.offset,
        "local_response_normalization_D":
        x.variable.ndim,
        "local_response_normalization_d_target":
        x.variable.order.axes_dict[target_axis],
        "local_response_normalization_x_shape":
        np.array(x_shape, dtype=np.int32).tobytes(),
        "local_response_normalization_x_stride_in_y":
        np.array(x_stride_in_y, dtype=np.int32).tobytes(),
        "local_response_normalization_param_half_n":
        int(op.parameters["n"] // 2),
        "local_response_normalization_param_k":
        float(op.parameters["k"]),
        "local_response_normalization_param_alpha":
        float(op.parameters["alpha"]),
        "local_response_normalization_param_minus_beta":
        float(-op.parameters["beta"])
    })

    name_injector = KernelNameInjector(op)

    source = template_general
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 8
0
def axiswise_bias_general(op: AxiswiseBias, constants_layout: MemoryLayout,
                          variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    b = constants_layout[op.inputs["b"]]
    y = variables_layout[op.outputs["y"]]

    x_shape = x.variable.shape

    y_strides = []
    stride = 1
    for s in reversed(y.variable.shape):
        y_strides.insert(0, stride)
        stride *= s

    x_stride_in_y = [
        y_strides[y.variable.order.axes_dict[axis]]
        for axis in x.variable.order.axes
    ]

    meta_injector = MetaInjector()
    meta_injector.register({
        "axiswise_bias_X_offset":
        x.offset,
        "axiswise_bias_B_offset":
        b.offset,
        "axiswise_bias_Y_offset":
        y.offset,
        "axiswise_bias_D":
        x.variable.ndim,
        "axiswise_bias_d_target":
        x.variable.order.axes_dict[op.axis],
        "axiswise_bias_x_shape":
        np.array(x_shape, dtype=np.int32).tobytes(),
        "axiswise_bias_x_stride_in_y":
        np.array(x_stride_in_y, dtype=np.int32).tobytes(),
    })

    name_injector = KernelNameInjector(op)

    source = template_general
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 9
0
def im2col(op: Im2Col, constants_layout: MemoryLayout,
           variables_layout: MemoryLayout) -> List[Kernel]:
    im = variables_layout[op.inputs["im"]]
    col = variables_layout[op.outputs["col"]]

    assert im.variable.order == OrderNHWC
    assert col.variable.order == OrderNHWC or col.variable.order == OrderCNHW

    N = im.variable.shape_dict[Axis.N]
    C1 = im.variable.shape_dict[Axis.C]
    H1 = im.variable.shape_dict[Axis.H]
    W1 = im.variable.shape_dict[Axis.W]

    H1P = H1 + 2 * op.PH
    W1P = W1 + 2 * op.PW

    meta_injector = MetaInjector()
    meta_injector.register({
        "im2col_im_offset": im.offset,
        "im2col_col_offset": col.offset,
        "im2col_N": col.variable.shape_dict[Axis.N],
        "im2col_C1": C1,
        "im2col_H1": im.variable.shape_dict[Axis.H],
        "im2col_W1": im.variable.shape_dict[Axis.W],
        "im2col_H2": col.variable.shape_dict[Axis.H],
        "im2col_W2": col.variable.shape_dict[Axis.W],
        "im2col_KH": op.KH,
        "im2col_KW": op.KW,
        "im2col_SH": op.SH,
        "im2col_SW": op.SW,
        "im2col_PH": op.PH,
        "im2col_PW": op.PW,
    })

    name_injector = KernelNameInjector(op)

    source = template_CNHW if col.variable.order == OrderCNHW else generate_template_NHWC(
        op.SH, op.SW, C1)
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(N * H1P * W1P, 1, 1), GPUSize(64, 1, 1),
                    meta_injector.buffer)

    return [kernel]
def local_response_normalization_same_order(
        op: LocalResponseNormalization, constants_layout: MemoryLayout,
        variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]

    target_axis = Axis.C  # FIXME
    target_axis_index = x.variable.order.axes_dict[target_axis]
    D1 = int(np.prod(x.variable.shape[:target_axis_index]))
    D2 = x.variable.shape[target_axis_index]
    D3 = int(np.prod(x.variable.shape[target_axis_index + 1:]))

    meta_injector = MetaInjector()
    meta_injector.register({
        "local_response_normalization_X_offset":
        x.offset,
        "local_response_normalization_Y_offset":
        y.offset,
        "local_response_normalization_D1":
        D1,
        "local_response_normalization_D2":
        D2,
        "local_response_normalization_D3":
        D3,
        "local_response_normalization_param_half_n":
        int(op.parameters["n"] // 2),
        "local_response_normalization_param_k":
        float(op.parameters["k"]),
        "local_response_normalization_param_alpha":
        float(op.parameters["alpha"]),
        "local_response_normalization_param_minus_beta":
        float(-op.parameters["beta"])
    })

    name_injector = KernelNameInjector(op)

    source = template_same_order
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 11
0
def sgemm(op: Sgemm, constants_layout: MemoryLayout,
          variables_layout: MemoryLayout) -> List[Kernel]:
    A = variables_layout[op.inputs["A"]] if op.inputs[
        "A"] in variables_layout else constants_layout[op.inputs["A"]]
    B = variables_layout[op.inputs["B"]] if op.inputs[
        "B"] in variables_layout else constants_layout[op.inputs["B"]]
    C = variables_layout[op.outputs["C"]]

    with_bias = "b" in op.inputs

    meta_injector = MetaInjector()
    meta_injector.register({
        "sgemm_A_offset":
        A.offset,
        "sgemm_B_offset":
        B.offset,
        "sgemm_C_offset":
        C.offset,
        "sgemm_b_offset":
        constants_layout[op.inputs["b"]].offset if with_bias else 0,
        "sgemm_M":
        op.M,
        "sgemm_N":
        op.N,
        "sgemm_K":
        op.K
    })

    inline_injector = InlineInjector(op)
    name_injector = KernelNameInjector(op)

    # transpose_X assumes fortran-order data. True means X is C-order, False means Fortran-order.
    # In default convolution, transpose_A == transpose_B == True.
    # The order of output matrix C is C-order.
    source = generate_template_64(op.transpose_A, op.transpose_B, op.M, op.N,
                                  op.K, inline_injector.has_inline, with_bias)
    source = meta_injector.inject(source)
    source = inline_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize((op.M + 64 - 1) // 64, (op.N + 64 - 1) // 64, 1),
                    GPUSize(64, 1, 1), meta_injector.buffer)

    return [kernel]
def local_response_normalization(
        op: LocalResponseNormalization, constants_layout: MemoryLayout,
        variables_layout: MemoryLayout) -> List[Kernel]:
    x = variables_layout[op.inputs["x"]]
    y = variables_layout[op.outputs["y"]]

    assert x.variable.order == OrderNHWC
    assert y.variable.order == OrderNHWC

    meta_injector = MetaInjector()
    meta_injector.register({
        "local_response_normalization_X_offset":
        x.offset,
        "local_response_normalization_Y_offset":
        y.offset,
        "local_response_normalization_N":
        x.variable.shape_dict[Axis.N],
        "local_response_normalization_H":
        x.variable.shape_dict[Axis.H],
        "local_response_normalization_W":
        x.variable.shape_dict[Axis.W],
        "local_response_normalization_C":
        x.variable.shape_dict[Axis.C],
        "local_response_normalization_param_half_n":
        int(op.parameters["n"] // 2),
        "local_response_normalization_param_k":
        float(op.parameters["k"]),
        "local_response_normalization_param_alpha":
        float(op.parameters["alpha"]),
        "local_response_normalization_param_minus_beta":
        float(-op.parameters["beta"])
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 13
0
def col2im(op: Col2Im, constants_layout: MemoryLayout,
           variables_layout: MemoryLayout) -> List[Kernel]:
    col = variables_layout[op.inputs["col"]]
    im = variables_layout[op.outputs["im"]]

    assert col.variable.order == OrderNHWC
    assert im.variable.order == OrderNHWC

    meta_injector = MetaInjector()
    meta_injector.register({
        "col2im_im_offset": im.offset,
        "col2im_col_offset": col.offset,
        "col2im_N": col.variable.shape_dict[Axis.N],
        "col2im_H2": col.variable.shape_dict[Axis.H],
        "col2im_W2": col.variable.shape_dict[Axis.W],
        "col2im_C1": im.variable.shape_dict[Axis.C],
        "col2im_H1": im.variable.shape_dict[Axis.H],
        "col2im_W1": im.variable.shape_dict[Axis.W],
        "col2im_KH": op.KH,
        "col2im_KW": op.KW,
        "col2im_SH": op.SH,
        "col2im_SW": op.SW,
        "col2im_PH": op.PH,
        "col2im_PW": op.PW,
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]
Esempio n. 14
0
def concat(op: Concat, constants_layout: MemoryLayout,
           variables_layout: MemoryLayout) -> List[Kernel]:
    xs = [
        variables_layout[op.inputs[f"x{str(i)}"]]
        for i in range(len(op.inputs))
    ]
    y = variables_layout[op.outputs["y"]]
    target_axis = op.axis

    x_offsets = [x.offset for x in xs]
    x_shapes = [x.variable.shape for x in xs]

    y_strides = []
    stride = 1
    for s in reversed(y.variable.shape):
        y_strides.insert(0, stride)
        stride *= s

    # x_strides[i][j] is stride size of xs[i].order.axes[j] in y
    x_strides_in_y = [[] for _ in xs]
    for x, strides in zip(xs, x_strides_in_y):
        for axis in x.variable.order.axes:
            strides.append(y_strides[y.variable.order.axes_dict[axis]])

    # x_offsets[i] is memory offset of xs[i]'s data in y.
    y_offsets = []
    target_axis_offset = 0
    for x in xs:
        y_offsets.append(target_axis_offset *
                         y_strides[y.variable.order.axes_dict[target_axis]])
        target_axis_offset += x.variable.shape_dict[target_axis]

    meta_injector = MetaInjector()
    meta_injector.register({
        "concat_y_offset":
        y.offset,
        "concat_D":
        len(y.variable.shape),
        "concat_N":
        len(xs),
        "concat_x_offsets":
        np.array(x_offsets, dtype=np.int32).tobytes(),
        "concat_x_strides_in_y":
        np.array(x_strides_in_y, dtype=np.int32).tobytes(),
        "concat_x_shapes":
        np.array(x_shapes, dtype=np.int32).tobytes(),
        "concat_y_offsets":
        np.array(y_offsets, dtype=np.int32).tobytes(),
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = meta_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1, 1),
                    meta_injector.buffer)

    return [kernel]