Exemple #1
0
def elementwise_add(op: Sgemm) -> List[Kernel]:
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]

    assert ChannelMode.get_mode(A) == ChannelMode.get_mode(B)

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()
    uniform_injector.register({
        "A": A,
        "B": B,
        "s_c": texture_stride(C),
        "d_C": [op.M, op.N],
        "s_C": [op.N, 1],
        "d_a": texture_shape(A),
        "s_a": texture_stride(A),
        "s_A": [op.K, 1] if op.transpose_A else [1, op.M],
        "d_b": texture_shape(B),
        "s_b": texture_stride(B),
        "s_B": [op.N, 1] if op.transpose_B else [1, op.K],
        "K": op.K
    })

    source = generate_template(mode=ChannelMode.get_mode(A), K=op.K)
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, C)

    return [kernel]
Exemple #2
0
def convert_rgba_to_r(op: ConvertRGBAtoR) -> List[Kernel]:
    x0 = op.inputs["x0"]
    y = op.outputs["y"]

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()
    uniform_injector.register({
        "X0": x0,

        "s_y": texture_stride(y),

        "d_x0": texture_shape(x0),
        "s_x0": texture_stride(x0),
    })

    source = template
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(
        source,
        name_injector.name,
        uniform_injector.samplers,
        uniform_injector.uniforms,
        y
    )

    return [kernel]
Exemple #3
0
def convert_rgba_to_r(op: ConvertRGBAtoR) -> List[Kernel]:
    x0 = op.inputs["x0"]
    y = op.outputs["y"]

    assert ChannelMode.get(x0) == ChannelModeEnum.RGBA
    assert ChannelMode.get(y) == ChannelModeEnum.R

    if x0.order != y.order:
        raise NotImplementedError

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()
    uniform_injector.register({
        "X0": x0,
        "s_y": texture_stride(y),
        "d_x0": texture_shape(x0),
        "s_x0": texture_stride(x0),
    })

    source = template
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]
def reinterpret_axis(op: ReinterpretAxis) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "X": x,

        "s_y": texture_stride(y),

        "d_x": texture_shape(x),
        "s_x": texture_stride(x),
    })

    source = template
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(
        source,
        name_injector.name,
        uniform_injector.samplers,
        uniform_injector.uniforms,
        y
    )

    return [kernel]
Exemple #5
0
def space2depth(op: Space2Depth) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]
    r = op.parameters['r']

    assert x.order == OrderNHWC
    assert y.order == OrderNHWC

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "X": x,
        "s_y": texture_stride(y),
        "d_Y": y.shape,
        "s_Y": y.stride,
        "d_x": texture_shape(x),
        "s_x": texture_stride(x),
        "d_X": x.shape,
        "s_X": x.stride,
        "r": r,
        "C1": x.shape_dict[Axis.C],
    })

    source = template
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]
Exemple #6
0
def elementwise_add(op: ClippedRelu) -> List[Kernel]:
    x0 = op.inputs["x0"]
    y = op.outputs["y"]

    shapes, strides = optimize_loop_structure([x0, y], y)

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "X0": x0,
        "s_y": texture_stride(y),
        "d_Y": shapes[y],
        "s_Y": strides[y],
        "d_x0": texture_shape(x0),
        "s_x0": texture_stride(x0),
        "d_X0": shapes[x0],
        "s_X0": strides[x0],
        "cap": op.parameters["cap"]
    })

    source = template
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]
Exemple #7
0
def average_pooling_2d(op: Unpooling2D) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    assert x.order == OrderNHWC
    assert y.order == OrderNHWC

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "X": x,
        "s_y": texture_stride(y),
        "d_Y": y.shape,
        "s_Y": y.stride,
        "d_x": texture_shape(x),
        "s_x": texture_stride(x),
        "s_X": x.stride,
        "C1": x.shape_dict[Axis.C],
        "H1": x.shape_dict[Axis.H],
        "W1": x.shape_dict[Axis.W],
        "SH": op.parameters["stride"][0],
        "SW": op.parameters["stride"][1],
        "PH": op.parameters["padding"][0],
        "PW": op.parameters["padding"][1],
    })

    source = generate_template(ksize=op.parameters["ksize"])
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]
Exemple #8
0
def elementwise_add(op: Tanh) -> List[Kernel]:
    x0 = op.inputs["x0"]
    y = op.outputs["y"]

    shapes, strides = optimize_loop_structure([x0, y], y)

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "X0": x0,
        "s_y": texture_stride(y),
        "d_Y": shapes[y],
        "s_Y": strides[y],
        "d_x0": texture_shape(x0),
        "s_x0": texture_stride(x0),
        "d_X0": shapes[x0],
        "s_X0": strides[x0],
    })

    source = template_R if ChannelMode.get(
        y) == ChannelModeEnum.R else template_RGBA
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]
Exemple #9
0
def convert_r_to_rgba(op: ConvertRtoRGBA) -> List[Kernel]:
    x = op.inputs["x0"]
    y = op.outputs["y"]

    assert ChannelMode.get(x) == ChannelModeEnum.R
    assert ChannelMode.get(y) == ChannelModeEnum.RGBA

    orders, shape_dicts = simplify_orders([x, y])
    shapes = {v: [shape_dicts[v][a] for a in orders[v].axes] for v in [x, y]}
    strides = {
        v:
        [mul(shapes[v][orders[v].axes_dict[a] + 1:]) for a in orders[v].axes]
        for v in [x, y]
    }
    stride_dicts = {v: AxisKeyDict(orders[v].axes, strides[v]) for v in [x, y]}

    # Change x's shapes and strides order to same as y's order
    shapes[x] = [
        shape_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes
    ]
    strides[x] = [
        stride_dicts[x][a] if a in orders[x].axes else 1
        for a in orders[y].axes
    ]

    # Padding shapes and strides to 4D
    if orders[y].ndim > 4:
        raise NotImplementedError(f"Too large number of dimension: {y}")

    for v in [x, y]:
        shape = shapes[v]
        stride = strides[v]
        while len(shape) < 4:
            stride.append(1)
            shape.append(1)

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()
    uniform_injector.register({
        "sampler_x": x,
        "texture_stride_y": texture_stride(y),
        "variable_shape_y": shapes[y],
        "variable_stride_y": strides[y],
        "texture_shape_x": texture_shape(x),
        "texture_stride_x": texture_stride(x),
        "variable_shape_x": shapes[x],
        "variable_stride_x": strides[x],
    })
    source = template
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]
Exemple #10
0
def partial_im2col(op: PartialIm2Col) -> List[Kernel]:
    im = op.inputs["im"]
    cols = [op.outputs[f"col{i}"] for i in range(len(op.outputs))]
    sections = [0] + op.sections
    axis = op.axis

    kernels = []

    for i, col in enumerate(cols):
        assert im.order == col.order == OrderNHWC
        assert ChannelMode.get(im) == ChannelModeEnum.R

        name_injector = KernelNameInjector(op)
        uniform_injector = UniformInjector()

        offset = [sections[i] if a == axis else 0 for a in col.order.axes]
        uniform_injector.register({
            "sampler_im": im,

            "texture_stride_col": texture_stride(col),
            "variable_shape_col": col.shape,
            "variable_stride_col": col.stride,
            "offset_col": offset,

            "texture_shape_im": texture_shape(im),
            "texture_stride_im": texture_stride(im),
            "variable_shape_im": im.shape,
            "variable_stride_im": im.stride,

            "C1": im.shape_dict[Axis.C],
            "H1": im.shape_dict[Axis.H],
            "W1": im.shape_dict[Axis.W],
            "KH": op.KH,
            "KW": op.KW,
            "DH": op.DH,
            "DW": op.DW,
            "SH": op.SH,
            "SW": op.SW,
            "PH": op.PH,
            "PW": op.PW,
        })

        source = template_R if ChannelMode.get(col) == ChannelModeEnum.R else template_RGBA
        source = uniform_injector.inject(source)
        source = name_injector.inject(source)
        kernel = Kernel(
            source,
            name_injector.name,
            uniform_injector.samplers,
            uniform_injector.uniforms,
            col
        )
        kernels.append(kernel)

    return kernels
Exemple #11
0
def col2im(op: Col2Im) -> List[Kernel]:
    col = op.inputs["col"]
    im = op.outputs["im"]

    assert col.order == OrderNHWC
    assert im.order == OrderNHWC
    assert ChannelMode.get(col) == ChannelModeEnum.R
    assert ChannelMode.get(im) == ChannelModeEnum.R

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "col": col,

        "s_im": texture_stride(im),
        "d_Im": im.shape,
        "s_Im": im.stride,

        "d_col": texture_shape(col),
        "s_col": texture_stride(col),
        "d_Col": col.shape,
        "s_Col": col.stride,

        "H2": col.shape_dict[Axis.H],
        "W2": col.shape_dict[Axis.W],
        "C1": im.shape_dict[Axis.C],
        "SH": op.SH,
        "SW": op.SW,
        "PH": op.PH,
        "PW": op.PW,
    })

    source = generate_template(op)
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(
        source,
        name_injector.name,
        uniform_injector.samplers,
        uniform_injector.uniforms,
        im
    )

    return [kernel]
Exemple #12
0
def split_axis(op: SplitAxis) -> List[Kernel]:
    x = op.inputs["x"]
    ys = [op.outputs[f"y{i}"] for i in range(len(op.outputs))]
    sections = [0] + op.sections
    axis = op.axis

    kernels = []

    for i, y in enumerate(ys):
        assert x.order.check_same_axes(y.order)
        assert ChannelMode.get(x) == ChannelMode.get(y) == ChannelModeEnum.R

        name_injector = KernelNameInjector(op)
        uniform_injector = UniformInjector()

        offset = [sections[i] if a == axis else 0 for a in y.order.axes]
        uniform_injector.register({
            "sampler_x":
            x,
            "texture_stride_y":
            texture_stride(y),
            "variable_shape_y":
            _pad_to_4d(y.shape),
            "variable_stride_y":
            _pad_to_4d(y.stride),
            "texture_shape_x":
            texture_shape(x),
            "texture_stride_x":
            texture_stride(x),
            "variable_shape_x":
            _pad_to_4d([x.shape_dict[a] for a in y.order.axes]),
            "variable_stride_x":
            _pad_to_4d([x.stride_dict[a] for a in y.order.axes]),
            "offset":
            _pad_to_4d(offset, 0)
        })

        source = template
        source = uniform_injector.inject(source)
        source = name_injector.inject(source)
        kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                        uniform_injector.uniforms, y)
        kernels.append(kernel)

    return kernels
Exemple #13
0
def tensordot(op: Tensordot) -> List[Kernel]:
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]
    axes = op.axes

    assert ChannelMode.get(A) == ChannelMode.get(B)
    assert ChannelMode.get(C) == ChannelModeEnum.R

    # Reduced axes must be located on inside of input variables.
    assert A.order.axes[-len(axes[0]):] == axes[0]
    assert B.order.axes[-len(axes[1]):] == axes[1]

    # output variable's axes order must be as [*a_remained_axes, *b_remained_axes]
    assert C.order.axes[:A.ndim - len(axes[0])] == A.order.axes[:-len(axes[0])]
    assert C.order.axes[-(B.ndim -
                          len(axes[1])):] == B.order.axes[:-len(axes[1])]
    assert C.ndim == A.ndim - len(axes[0]) + B.ndim - len(axes[1])

    K = mul(A.shape[-len(axes[0]):])
    M = A.size // K
    N = B.size // K

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()
    uniform_injector.register({
        "A": A,
        "B": B,
        "s_c": texture_stride(C),
        "d_C": [M, N],
        "s_C": [N, 1],
        "d_a": texture_shape(A),
        "d_b": texture_shape(B),
        "K": K
    })

    source = generate_template(mode=ChannelMode.get(A), reduction_size=K)
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, C)

    return [kernel]
Exemple #14
0
def im2col(op: Im2Col) -> List[Kernel]:
    im = op.inputs["im"]
    col = op.outputs["col"]

    assert im.order == OrderNHWC
    assert col.order == OrderNHWC
    assert ChannelMode.get(im) == ChannelModeEnum.R

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "im": im,
        "s_col": texture_stride(col),
        "d_Col": col.shape,
        "s_Col": col.stride,
        "d_im": texture_shape(im),
        "s_im": texture_stride(im),
        "d_Im": im.shape,
        "s_Im": im.stride,
        "C1": im.shape_dict[Axis.C],
        "H1": im.shape_dict[Axis.H],
        "W1": im.shape_dict[Axis.W],
        "KH": op.KH,
        "KW": op.KW,
        "DH": op.DH,
        "DW": op.DW,
        "SH": op.SH,
        "SW": op.SW,
        "PH": op.PH,
        "PW": op.PW,
    })

    source = template_R if ChannelMode.get(
        col) == ChannelModeEnum.R else template_RGBA
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, col)

    return [kernel]
Exemple #15
0
def elementwise_kernel(op: Elementwise):
    xs = list(op.inputs.values())
    y = op.outputs["y"]

    shapes, strides = _optimize_loop_structure(xs + [y], y)

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "texture_stride_y": texture_stride(y),
        "variable_shape_y": shapes[y],
        "variable_stride_y": strides[y]
    })

    for k, v in op.inputs.items():
        uniform_injector.register({
            f"sampler_{k}": v,
            f"texture_shape_{k}": texture_shape(v),
            f"texture_stride_{k}": texture_stride(v),
            f"variable_shape_{k}": shapes[v],
            f"variable_stride_{k}": strides[v],
        })

    for name, callable in _registered_items[op.__class__].parameters.items():
        uniform_injector.register({name: callable(op)})

    if all([
            x.shape == y.shape and x.order == y.order
            and texture_shape(x) == texture_shape(y) for x in xs
    ]):
        # For all variables, not only element position (=logical position), pixel position (=actual position) is also same.
        # Therefore computing logical position is no need.
        source = _generate_template_no_convert_position(op)

    else:
        # Computing logical position is required.
        source = _generate_template_convert_position(op)

    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]
Exemple #16
0
def concat(op: Concat) -> List[Kernel]:
    xs = [op.inputs[f"x{i}"] for i in range(len(op.inputs) - 1)]
    workspace = op.inputs["workspace"]
    y = op.outputs["y"]
    axis = op.axis

    kernels = []
    sections = [0]

    for x in xs[1:]:
        sections.append(sections[-1] + x.shape_dict[axis])

    for i, x in enumerate(xs):
        assert x.order.check_same_axes(y.order)
        assert ChannelMode.get(x) == ChannelMode.get(y)

        offset = [sections[i] if a == axis else 0 for a in y.order.axes]

        name_injector = KernelNameInjector(op)
        uniform_injector = UniformInjector()
        uniform_injector.register({
            "sampler_x": x,
            "sampler_workspace": workspace,

            "texture_shape_workspace": texture_shape(workspace),

            "texture_stride_y": texture_stride(y),
            "variable_shape_y": _pad_to_4d(y.shape),
            "variable_stride_y": _pad_to_4d(y.stride),

            "texture_shape_x": texture_shape(x),
            "texture_stride_x": texture_stride(x),
            "variable_shape_x": _pad_to_4d([x.shape_dict[a] for a in y.order.axes]),
            "variable_stride_x": _pad_to_4d([x.stride_dict[a] for a in y.order.axes]),

            "offset": _pad_to_4d(offset, 0)
        })
        source = template
        source = uniform_injector.inject(source)
        source = name_injector.inject(source)
        kernel = Kernel(
            source,
            name_injector.name,
            uniform_injector.samplers,
            uniform_injector.uniforms,
            y
        )
        kernels.append(kernel)

        name_injector2 = KernelNameInjector(op)
        uniform_injector2 = UniformInjector()
        uniform_injector2.register({
            "sampler_y": y,
            "texture_shape_y": texture_shape(y),
        })
        source2 = template2
        source2 = uniform_injector2.inject(source2)
        source2 = name_injector2.inject(source2)
        kernel2 = Kernel(
            source2,
            name_injector2.name,
            uniform_injector2.samplers,
            uniform_injector2.uniforms,
            workspace
        )
        kernels.append(kernel2)

    return kernels
Exemple #17
0
def reduce_kernel(op: Reduce):
    x = op.inputs["x"]
    y = op.outputs["y"]
    axis = op.axis

    orders, shape_dicts = simplify_orders([x, y], keep_axes=[axis])

    # Padding shapes and strides to 4D
    if orders[y].ndim > 4:
        raise NotImplementedError(f"Too large number of dimension: {y}")

    shapes = {v: [shape_dicts[v][a] for a in orders[v].axes] for v in [x, y]}
    strides = {
        v:
        [mul(shapes[v][orders[v].axes_dict[a] + 1:]) for a in orders[v].axes]
        for v in [x, y]
    }
    stride_dicts = {v: AxisKeyDict(orders[v].axes, strides[v]) for v in [x, y]}

    # Change x's shapes and strides order to same as y's order
    x_virtual_shape = [
        shape_dicts[x][a] if a in orders[x].axes else 1 for a in orders[y].axes
    ]
    x_virtual_stride = [
        stride_dicts[x][a] if a in orders[x].axes else 1
        for a in orders[y].axes
    ]
    while len(x_virtual_shape) < 3:
        x_virtual_stride.append(1)
        x_virtual_shape.append(stride_dicts[x][axis])
    x_virtual_shape.append(shape_dicts[x][axis])
    x_virtual_stride.append(stride_dicts[x][axis])

    y_virtual_shape = shapes[y]
    y_virtual_stride = strides[y]
    while len(y_virtual_shape) < 4:
        y_virtual_stride.append(1)
        y_virtual_shape.append(1)

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "texture_stride_y": texture_stride(y),
        "variable_shape_y": y_virtual_shape,
        "variable_stride_y": y_virtual_stride,
        f"sampler_x": x,
        f"texture_shape_x": texture_shape(x),
        f"texture_stride_x": texture_stride(x),
        f"variable_shape_x": x_virtual_shape,
        f"variable_stride_x": x_virtual_stride,
    })

    for name, callable in _registered_items[op.__class__].parameters.items():
        uniform_injector.register({name: callable(op)})

    # Computing logical position is required.
    source = _generate_template_convert_position(
        op, reduction_size=shape_dicts[x][axis])

    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]