Esempio n. 1
0
def axiswise_bias_same_order(op: AxiswiseBias,
                             memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    b = memory_layout[op.inputs["b"]]
    y = memory_layout[op.outputs["y"]]

    target_axis_index = x.variable.order.axes_dict[op.axis]
    D1 = mul(x.variable.shape[:target_axis_index])
    D2 = x.variable.shape[target_axis_index]
    D3 = mul(x.variable.shape[target_axis_index + 1:])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "axiswise_bias_X": x,
        "axiswise_bias_B": b,
        "axiswise_bias_Y": y,
        "axiswise_bias_D1": D1,
        "axiswise_bias_D2": D2,
        "axiswise_bias_D3": D3
    })

    name_injector = KernelNameInjector(op)

    source = generate_template_same_order(D1, D3)
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(MAX_THREADS_PER_THREADGROUP, 1,
                                              1), buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Esempio n. 2
0
def softmax_same_order(op: Softmax,
                       memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    target_axis = op.parameters["axis"]
    target_axis_index = x.order.axes_dict[target_axis]
    D1 = mul(x.shape[:target_axis_index])
    D2 = x.shape[target_axis_index]
    D3 = mul(x.shape[target_axis_index + 1:])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "softmax_X": memory_layout[x],
        "softmax_Y": memory_layout[y],
        "softmax_D1": D1,
        "softmax_D2": D2,
        "softmax_D3": D3
    })

    name_injector = KernelNameInjector(op)

    source = template_same_order
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(MAX_THREADS_PER_THREADGROUP, 1,
                                              1), buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Esempio n. 3
0
def template(x_shape, axis, description: str = ""):
    np_axis = 1 if axis is None else axis
    vx = np.random.rand(*x_shape)
    new_shape = [mul(vx.shape[:np_axis]), mul(vx.shape[np_axis:])]
    max_i = np.argmax(vx.reshape(new_shape), axis=1)
    vy = np.zeros(new_shape)
    vy[np.arange(vy.shape[0]), max_i] = 1

    x = make_tensor_value_info("x", vx.shape)
    y = make_tensor_value_info("y", vy.shape)

    kwargs = {}
    if axis is not None:
        kwargs["axis"] = axis
    operator = make_node("Hardmax", ["x"], ["y"], **kwargs)

    model = make_model([operator], [x], [y])

    graph = ONNXConverter().convert(model)

    assert tuple(vy.shape) == tuple(
        graph.outputs[0].shape
    ), f"vy: {vy.shape}, graph.outputs[0]: {graph.outputs[0].shape}"
    generate_kernel_test_case(
        description=f"[ONNX] Hardmax {description}",
        graph=graph,
        backend=["webgpu", "webgl", "webassembly"],
        inputs={graph.inputs[0]: vx},
        expected={graph.outputs[0]: vy},
    )
Esempio n. 4
0
def axiswise_scale_same_order(op: AxiswiseScale,
                              memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    s = memory_layout[op.inputs["s"]]
    y = memory_layout[op.outputs["y"]]

    target_axis_index = x.variable.order.axes_dict[op.axis]
    D1 = mul(x.variable.shape[:target_axis_index])
    D2 = x.variable.shape[target_axis_index]
    D3 = mul(x.variable.shape[target_axis_index + 1:])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "axiswise_scale_X": x,
        "axiswise_scale_S": s,
        "axiswise_scale_Y": y,
        "axiswise_scale_D1": D1,
        "axiswise_scale_D2": D2,
        "axiswise_scale_D3": D3
    })

    name_injector = KernelNameInjector(op)

    source = template_same_order
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Esempio n. 5
0
def axiswise_scale_general(op: AxiswiseScale,
                           memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    s = memory_layout[op.inputs["s"]]
    y = memory_layout[op.outputs["y"]]

    x_shape = x.variable.shape

    target_axis_index = x.variable.order.axes_dict[op.axis]
    D1 = mul(x_shape[:target_axis_index])
    D2 = x_shape[target_axis_index]
    D3 = mul(x_shape[target_axis_index + 1:])

    y_strides = []
    stride = 1
    for sh in reversed(y.variable.shape):
        y_strides.insert(0, stride)
        stride *= sh

    x_stride_in_y = [
        y_strides[y.variable.order.axes_dict[axis]]
        for axis in x.variable.order.axes
    ]

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "axiswise_scale_X":
        x,
        "axiswise_scale_S":
        s,
        "axiswise_scale_Y":
        y,
        "axiswise_scale_D1":
        D1,
        "axiswise_scale_D2":
        D2,
        "axiswise_scale_D3":
        D3,
        "axiswise_scale_D":
        x.variable.ndim,
        "axiswise_scale_d_target":
        x.variable.order.axes_dict[op.axis],
        "axiswise_scale_x_shape":
        x_shape,
        "axiswise_scale_x_stride_in_y":
        x_stride_in_y,
    })

    name_injector = KernelNameInjector(op)

    source = template_general
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Esempio n. 6
0
def col2im(op: Col2Im) -> List[Kernel]:
    col = op.inputs["col"]
    im = op.outputs["im"]

    assert col.order.check_same_axes(
        Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C]))
    assert col.order.axes_dict[Axis.KH] + 2 == col.order.axes_dict[
        Axis.KW] + 1 == col.order.axes_dict[Axis.C] == 5
    assert im.order.check_same_axes(OrderNHWC)
    assert ChannelMode.get(col) == ChannelModeEnum.R
    assert ChannelMode.get(im) == ChannelModeEnum.R

    col_shape = col.shape[0:3] + (mul(col.shape[3:6]), )
    col_stride = [mul(col_shape[i + 1:]) for i in range(len(col_shape))]
    col_order = Order(col.order.axes[0:3] + (Axis.C, ))

    code = KernelCode([
        """
void main() {
    ivec4 variable_position_im = """,
        change_order(get_output_position(im), im.order, OrderNHWC), f""";

    int n = variable_position_im.x;
    int h1 = variable_position_im.y;
    int w1 = variable_position_im.z;
    int c1 = variable_position_im.w;

    float sum = 0.0;

    for (int kh = 0; kh < {op.KH}; kh++) {{
        int h2 = (h1 + {op.PH} - kh) / {op.SH};
        if (mod(h1 + {op.PH} - kh, {op.SH}) != 0 || h2 < 0 || h2 >= {col.shape_dict[Axis.H]}) continue;

        for (int kw = 0; kw < {op.KW}; kw++) {{
            int w2 = (w1 + {op.PW} - kw) / {op.SW};
            if (mod(w1 + {op.PW} - kw, {op.SW}) != 0 || w2 < 0 || w2 >= {col.shape_dict[Axis.W]}) continue;

            int khkwc1 = (kh * {op.KW} + kw) * {im.shape_dict[Axis.C]} + c1;

            sum += texture2D(""", col, ",",
        convert_coord(
            change_order("vec4(n, h2, w2, khkwc1)", OrderNHWC, col_order),
            col_shape, col_stride,
            texture_shape(col)[:2][::-1],
            texture_stride(col)[:2][::-1]), """).r;
        }
    }

    gl_FragColor.r = sum;
}
"""
    ],
                      name=op.__class__.__name__)
    source = code.generate()
    return [Kernel(source, code.name, code.samplers, code.uniforms, im)]
Esempio n. 7
0
def _convert_flatten(converter: ONNXConverter, onnx_op: INodeProto):
    x = converter.get_variable(onnx_op.input[0])

    attrs = attribute_dict(onnx_op)
    axis = attrs["axis"].i if "axis" in attrs else 1

    new_shape = [mul(x.shape[:axis]), mul(x.shape[axis:])]
    new_order = Order([None, None])

    y = x.reshape(shape=new_shape, order=new_order)

    converter.set_variable(onnx_op.output[0], y)
Esempio n. 8
0
def _convert_softmax(converter: ONNXConverter, onnx_op: INodeProto):
    x = converter.get_variable(onnx_op.input[0])

    attrs = attribute_dict(onnx_op)
    axis = attrs["axis"].i if "axis" in attrs else 1
    new_shape = [mul(x.shape[:axis]), mul(x.shape[axis:])]
    new_order = Order([None, None])

    x = x.reshape(shape=new_shape, order=new_order)

    max_x, = Max(None, axis=x.order.axes[1])(x)
    y = x >= max_x

    converter.set_variable(onnx_op.output[0], y)
Esempio n. 9
0
def tensordot(op: Tensordot, memory_layout: MemoryLayout) -> List[Kernel]:
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]

    shape_A_reduced_axes = [A.shape_dict[a] for a in op.axes[0]]
    shape_B_reduced_axes = [B.shape_dict[a] for a in op.axes[1]]
    kernel = Kernel({"tensordot": source},
                    "tensordot",
                    inputs=[memory_layout[A], memory_layout[B]],
                    outputs=[memory_layout[C]],
                    call_option={
                        "reduction_size":
                        mul(A.shape_dict[a] for a in op.axes[0]),
                        "stride_A":
                        A.stride,
                        "stride_B":
                        B.stride,
                        "stride_C":
                        C.stride,
                        "shape_C":
                        C.shape,
                        "stride_A_for_C_axes": [
                            0 if a not in A.order.axes or a in op.axes[0] else
                            A.stride_dict[a] for a in C.order.axes
                        ],
                        "stride_B_for_C_axes": [
                            0 if a not in B.order.axes or a in op.axes[1] else
                            B.stride_dict[a] for a in C.order.axes
                        ],
                        "shape_A_reduced_axes":
                        shape_A_reduced_axes,
                        "stride_A_reduced_axes": [
                            mul(shape_A_reduced_axes[i + 1:])
                            for i in range(len(shape_A_reduced_axes))
                        ],
                        "stride_A_reduced_axes_for_whole":
                        [A.stride_dict[a] for a in op.axes[0]],
                        "shape_B_reduced_axes":
                        shape_B_reduced_axes,
                        "stride_B_reduced_axes": [
                            mul(shape_B_reduced_axes[i + 1:])
                            for i in range(len(shape_B_reduced_axes))
                        ],
                        "stride_B_reduced_axes_for_whole":
                        [B.stride_dict[a] for a in op.axes[1]]
                    })

    return [kernel]
Esempio n. 10
0
    def optimize(self, graph: Graph) -> Tuple[Graph, bool]:
        flag_changed = False
        for op in traverse.filter_nodes(
                traverse.listup_operators(graph),
                Deconvolution2D):  # type: Deconvolution2D
            x = op.inputs["x"]
            w = op.inputs["w"]
            y = op.outputs["y"]
            flag_changed = True
            op.remove_all()

            a_filter, a_kh, a_kw = Axis(), Axis(), Axis()
            w, = ReinterpretAxis(None,
                                 in_order=OrderNHWC,
                                 out_order=Order(
                                     [Axis.C, a_kh, a_kw, a_filter]))(w)
            x, = ReinterpretAxis(None,
                                 in_order=OrderNHWC,
                                 out_order=Order(
                                     [Axis.N, Axis.H, Axis.W, a_filter]))(x)

            col, = Tensordot(None, axes=a_filter)(x, w)
            col = col.transpose(
                Order([Axis.N, Axis.H, Axis.W, a_kh, a_kw, Axis.C]))
            col = col.reshape(shape=[*col.shape[0:3],
                                     mul(col.shape[3:6])],
                              order=OrderNHWC)

            new_y, = Col2Im(None,
                            ksize=op.ksize,
                            stride=op.stride,
                            padding=op.padding)(col)
            OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y)

        return graph, flag_changed
Esempio n. 11
0
def axiswise_bias(op: AxiswiseBias,
                  memory_layout: MemoryLayout) -> List[Kernel]:
    # 該当軸のsize, strideを与える
    x = op.inputs["x"]
    b = op.inputs["b"]
    y = op.outputs["y"]

    assert b.ndim == 1
    axis_pos = x.order.axes_dict[op.parameters["axis"]]  # NCHWでaxis=Cなら、1
    axis_size = x.shape[axis_pos]
    assert axis_size == b.size

    axis_stride = mul(x.shape[axis_pos + 1:])

    kernel = Kernel({"axiswise_bias": source},
                    "axiswise_bias",
                    inputs=[x, b],
                    outputs=[y],
                    call_option={
                        "n": x.size,
                        "axis_stride": axis_stride,
                        "axis_size": axis_size
                    })

    return [kernel]
Esempio n. 12
0
def _listup_splittable_axis(v: Variable, op: Operator) -> List[Axis]:
    if isinstance(op, (Concat, SplitAxis)):
        return list(v.order.axes)

    elif isinstance(op, Reshape):
        """
        For more detail of this condition check, please see the comment document of `_split_reshape`
        """
        splittable_axes = []  # type: List[Axis]
        v1 = v
        v2 = op.outputs["y"] if v == op.inputs["x"] else op.inputs["x"]

        for a1 in v1.order.axes:
            d1 = mul(v1.shape[v1.order.axes_dict[a1]:])
            d2 = 1
            for a2 in reversed(v2.order.axes):
                d2 *= v2.shape_dict[a2]

                if d2 == d1:
                    splittable_axes.append(a1)
                    continue

                elif d2 > d1:
                    continue

        return splittable_axes

    elif isinstance(op, Im2Col):
        op = op  # type: Im2Col
        if v in op.outputs.values():
            if v.shape_dict[Axis.C] % (op.ksize[0] * op.ksize[1]) == 0:
                return [Axis.N, Axis.H, Axis.W, Axis.C]
            else:
                return [Axis.N, Axis.H, Axis.W]

        else:
            return []

    elif isinstance(op, PartialIm2Col):
        op = op  # type: PartialIm2Col
        if v in op.outputs.values():
            return []

        else:
            return [op.axis]

    elif isinstance(op, Sgemm):
        if v == op.outputs["C"]:
            return []
        else:
            return list(v.order.axes)

    elif isinstance(op, Tensordot):
        if v == op.outputs["C"]:
            return []
        else:
            return list(v.order.axes)

    else:
        return list(attr.axis for attr in op.get_attribute(Tensorwise))
Esempio n. 13
0
def template(axis=1, ndim=2, description: str = ""):
    if chainer.__version__ < "1.24" and axis != 1:
        raise SkipTest(
            f"chainer.functions.softmax support \"xis\" parameter since v1.24, current installed version is {chainer.__version__}"
        )

    shape = (np.arange(ndim, ) + 2).tolist()
    vx = chainer.Variable(
        np.arange(mul(shape)).reshape(shape).astype(np.float32))

    if chainer.__version__ < "1.24":
        vy = chainer.functions.softmax(vx)

    else:
        vy = chainer.functions.softmax(vx, axis=axis)

    graph = ChainerConverter().convert([vx], [vy])

    x = graph.inputs[0]
    y = graph.outputs[0]

    generate_kernel_test_case(
        description=f"[chainer] F.softmax {description}",
        graph=graph,
        inputs={x: vx.data},
        backend=["webgpu", "webassembly"],
        expected={y: vy.data},
    )
Esempio n. 14
0
def reshape(op: Reshape, memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    if memory_layout[x] == memory_layout[y]:
        # This is inplace operation
        return []

    assert x.order == op.parameters["in_order"]
    assert y.order == op.parameters["out_order"]
    assert y.size == mul(op.parameters["out_shape"])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "reshape_x": memory_layout[x],
        "reshape_y": memory_layout[y],
        "reshape_N": y.size,
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(MAX_THREADS_PER_THREADGROUP, 1,
                                              1), buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Esempio n. 15
0
def reshape(op: Reshape, memory_layout: MemoryLayout) -> List[Kernel]:
    x = memory_layout[op.inputs["x"]]
    y = memory_layout[op.outputs["y"]]

    assert x.variable.order == op.parameters["in_order"]
    assert y.variable.order == op.parameters["out_order"]
    assert y.variable.size == mul(op.parameters["out_shape"])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "reshape_x": x,
        "reshape_y": y,
        "reshape_N": y.variable.size,
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(1024, 1,
                                              1), buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Esempio n. 16
0
def reshape(op: Reshape, memory_layout: MemoryLayout) -> List[Kernel]:
    # Operation without need for transposition is currently supported
    x = op.inputs["x"]
    y = op.outputs["y"]

    if memory_layout[x] == memory_layout[y]:
        # This is inplace operation
        return []

    assert x.order == op.parameters["in_order"]
    assert y.order == op.parameters["out_order"]
    assert y.size == mul(op.parameters["out_shape"])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "reshape_x": memory_layout[x],
        "reshape_y": memory_layout[y],
        "reshape_N": y.size,
    })

    name_injector = KernelNameInjector(op)

    source = template
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Esempio n. 17
0
    def __call__(self, x: Variable):
        """
        Args:
            x (:class:`~webdnn.graph.variable.Variable`): Input

        Returns:
            tuple of :class:`~webdnn.graph.variable.Variable`: Output
        """
        out_axes = list(x.order.axes)
        for axis in self.parameters["in_axes"]:
            if axis not in out_axes:
                raise ValueError(
                    f"Axis {axis} is not contained in input variable")

            out_axes.remove(axis)

        out_shape = [x.shape_dict[axis] for axis in out_axes]

        if self.parameters["out_axis"] in out_axes:
            raise ValueError(f"Axis {axis} is duplicated")

        out_axes.append(self.parameters["out_axis"])
        out_shape.append(
            mul([x.shape_dict[axis] for axis in self.parameters["in_axes"]]))

        y = Variable(out_shape, Order(out_axes))
        self.append_input("x", x)
        self.append_output("y", y)

        return y,
Esempio n. 18
0
def _convert_reshape(converter: ChainerConverter,
                     c_op: "chainer.functions.Reshape"):
    assert len(c_op.inputs) == 1, \
        f"For 'Reshape' operator in chainer, expected number of inputs is 1, but actual is {len(c_op.inputs)}"

    x = converter.get_variable(c_op.inputs[0])

    out_shape = list(c_op.shape)  # c_op.shape is tuple
    if len(out_shape) == 1:
        out_order = OrderC
    elif len(out_shape) == 2:
        out_order = OrderNC
    elif len(out_shape) == 4:
        out_order = OrderNCHW
    else:
        raise NotImplementedError(
            "Reshaping into dimensions none of 1, 2, 4 is not supported.")
    assert mul(out_shape) == x.size

    y, = Reshape(None,
                 in_order=x.order,
                 out_order=out_order,
                 out_shape=out_shape)(x)

    converter.set_variable(c_op.outputs[0](), y)
Esempio n. 19
0
def _convert_reshape(converter: ONNXConverter, onnx_op: INodeProto):
    x = converter.get_variable(onnx_op.input[0])
    if converter.opset_version >= 5:
        # output shape is specified by onnx_op.input[1]
        # It have to be ConstantVariable.
        # TODO: test for different operator set version
        shape_var = converter.get_variable(onnx_op.input[1])
        assert isinstance(
            shape_var, ConstantVariable
        ), "Shape specifier of Reshape operator have to be constant."
        out_shape = [int(d) for d in shape_var.data]
    else:
        # Reshape-1
        attrs = attribute_dict(onnx_op)
        out_shape = [
            r if s == 0 else s for r, s in zip(x.shape, attrs["shape"].ints)
        ]

    if -1 in out_shape:
        i = out_shape.index(-1)
        out_shape.remove(-1)
        out_shape.insert(i, x.size // mul(out_shape))

    out_order = Order([None] * len(out_shape))

    y, = Reshape(None,
                 in_order=x.order,
                 out_order=out_order,
                 out_shape=out_shape)(x)
    converter.set_variable(onnx_op.output[0], y)
Esempio n. 20
0
def _listup_splittable_axis(v: Variable, op: Operator) -> List[Axis]:
    if isinstance(op, (Concat, SplitAxis)):
        return list(v.order.axes)

    if isinstance(op, Reshape):
        """
        For more detail of this condition check, please see the comment document of `_split_reshape`
        """
        splittable_axes = []  # type: List[Axis]
        v1 = v
        v2 = op.outputs["y"] if v == op.inputs["x"] else op.inputs["x"]
        v1_order = op.in_order if v1 == op.inputs["x"] else op.out_order
        v2_order = op.in_order if v2 == op.inputs["x"] else op.out_order
        v1_shape = [v1.shape_dict[a] for a in v1_order.axes]

        for a1 in v1_order.axes:
            d1 = mul(v1_shape[v1_order.axes_dict[a1]:])
            d2 = 1
            axes = []
            for a2 in reversed(v2_order.axes):
                d2 *= v2.shape_dict[a2]
                axes.append(a2)

                if d2 == d1 and any(v2.shape_dict[a3] % 2 == 0
                                    for a3 in axes):  # TODO
                    splittable_axes.append(a1)
                    continue

                elif d2 > d1:
                    continue

        return splittable_axes

    if isinstance(op, Im2Col):
        op = op  # type: Im2Col
        if v in op.outputs.values():
            return [Axis.N, Axis.H, Axis.W, Axis.C]

        else:
            return []

    if isinstance(op, PartialIm2Col):
        op = op  # type: PartialIm2Col
        if v in op.outputs.values():
            axes = [Axis.N, Axis.C]
            if op.axis not in axes:
                axes.append(op.axis)

            return axes

        else:
            return []

    if isinstance(op, Tensordot):
        return list(v.order.axes)

    if isinstance(op, Pooling2D):
        return [Axis.H, Axis.W]

    return []
Esempio n. 21
0
def convert_layer_global_average_pooling2d(
        converter: KerasConverter,
        k_op: "keras.layers.GlobalAveragePooling2D"):
    x = converter.get_variable(converter.get_input_tensor(k_op)[0])
    if k_op.data_format == "channels_first":
        assert x.order == OrderNCHW

    elif k_op.data_format == "channels_last":
        assert x.order == OrderNHWC

    else:
        raise ValueError(
            f"[KerasConverter] Unknown data format: {k_op.data_format}")

    y, = AveragePooling2D(None,
                          ksize=(x.shape_dict[Axis.H], x.shape_dict[Axis.W]),
                          stride=(1, 1),
                          padding=(0, 0))(x)

    # flatten without changing memory layout
    z, = Reshape(None,
                 in_order=y.order,
                 out_order=OrderNC,
                 out_shape=[y.shape[0], mul(y.shape[1:])])(y)
    converter.set_variable(converter.get_output_tensor(k_op)[0], z)
Esempio n. 22
0
def template(axis=1, ndim=2, description: str = ""):
    shape = (np.arange(ndim, ) + 2).tolist()
    vx = chainer.Variable(
        np.arange(mul(shape)).reshape(shape).astype(np.float32))
    vy = chainer.functions.softmax(vx, axis)

    graph = ChainerConverter().convert_from_inout_vars([vx], [vy])

    x = graph.inputs[0]
    y = graph.outputs[0]

    generate_kernel_test_case(
        description=f"[chainer] F.softmax {description}",
        graph=graph,
        inputs={
            x:
            np.transpose(
                vx.data,
                [default_order[ndim].axes_dict[a] for a in x.order.axes])
        },
        expected={
            y:
            np.transpose(
                vy.data,
                [default_order[ndim].axes_dict[a] for a in y.order.axes])
        },
    )
Esempio n. 23
0
def _convert_linear_function(
        converter: ChainerConverter,
        c_op: "chainer.functions.connection.linear.LinearFunction"):
    x = converter.get_variable(c_op.inputs[0])
    w = converter.get_variable(c_op.inputs[1])  # type: ConstantVariable

    x2, = Reshape(None,
                  in_order=x.order,
                  out_order=OrderNC,
                  out_shape=[x.shape[0], mul(x.shape[1:])])(x)
    w2, = ReinterpretAxis(None, in_order=w.order, out_order=OrderNC)(w)
    w2, = Transpose(None)(w2)
    w2.change_order(OrderCN)

    y, = Linear(None)(x2, w2)
    y, = ReinterpretAxis(None,
                         in_order=y.order,
                         out_order=Order([x.order.axes[0],
                                          w.order.axes[0]]))(y)

    if len(c_op.inputs) == 3:
        # with bias
        b = converter.get_variable(c_op.inputs[2])
        check_broadcast_constraints(y, b)
        y = y + b

    converter.set_variable(c_op.outputs[0](), y)
Esempio n. 24
0
    def __call__(self, A: Variable, B: Variable):
        for axis in self.axes[0]:
            assert axis in A.order.axes, f"""
[Tensordot] Input variable "A" must have axes "{axis}":
    (op) = {self}
    (op.axes[0]) = {self.axes[0]}
    (A) = {A}"""

        for axis in A.order.axes:
            if axis not in self.axes[0]:
                assert axis in self.axes[1] or axis not in B.order.axes, f"""
[Tensordot] Axes of "A" which are not reduced must not be contained in "B":
    (op) = {self}
    (A.order.axes) = {A.order.axes}
    (B.order.axes) = {B.order.axes}
    (op.axes) = {self.axes}"""

        for axis in self.axes[1]:
            assert axis in B.order.axes, f"""
[Tensordot] Input variable "B" must have axes "{axis}":
    (op) = {self}
    (op.axes[1]) = {self.axes[1]}
    (B) = {B}"""

        for axis in B.order.axes:
            if axis not in self.axes[1]:
                assert axis in self.axes[0] or axis not in A.order.axes, f"""
[Tensordot] Axes of "B" which are not reduced must not be contained in "A":
    (op) = {self}
    (A.order.axes) = {A.order.axes}
    (B.order.axes) = {B.order.axes}
    (op.axes) = {self.axes}"""

        reduction_size_a = mul(A.shape_dict[a] for a in self.axes[0])
        reduction_size_b = mul(B.shape_dict[a] for a in self.axes[1])
        assert reduction_size_a == reduction_size_b, f"""
[Tensordot] Reduction size of "A" and "B" must be same:
    (A) = {A}
    (B) = {B}
    (axes) = {self.axes}
    (reduction size of A) = {reduction_size_a}
    (reduction size of B) = {reduction_size_b}
"""

        self.append_input("A", A)
        self.append_input("B", B)
        return self.exec()
Esempio n. 25
0
def tensordot(op: Tensordot, memory_layout: MemoryLayout) -> List[Kernel]:
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]
    axes = op.axes

    # Reduced axes must be located on inside of input variables.
    assert A.order.axes[-len(axes[0]):] == axes[0]
    assert B.order.axes[-len(axes[1]):] == axes[1]

    # output variable's axes order must be as [*a_remained_axes, *b_remained_axes]
    assert C.order.axes[:A.ndim - len(axes[0])] == A.order.axes[:-len(axes[0])]
    assert C.order.axes[-(B.ndim -
                          len(axes[1])):] == B.order.axes[:-len(axes[1])]
    assert C.ndim == A.ndim - len(axes[0]) + B.ndim - len(axes[1])

    K = mul(A.shape_dict[a] for a in axes[0])
    M = A.size // K
    N = B.size // K

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "sgemm_A": memory_layout[A],
        "sgemm_B": memory_layout[B],
        "sgemm_C": memory_layout[C],
        "sgemm_M": M,
        "sgemm_N": N,
        "sgemm_K": K
    })

    if op.has_attribute(UseEigenAttribute):
        source = generate_template_eigen(True, False)
        buffer_injector.register({
            "sgemm_A": memory_layout[A],
            "sgemm_B": memory_layout[B],
            "sgemm_C": memory_layout[C]
        })

    else:
        source = generate_template(True, False)
        buffer_injector.register({
            "sgemm_A": memory_layout[A],
            "sgemm_B": memory_layout[B],
            "sgemm_C": memory_layout[C],
            "sgemm_M": op.M,
            "sgemm_N": op.N,
            "sgemm_K": op.K
        })

    name_injector = KernelNameInjector(op)

    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
def local_response_normalization_same_order(
        op: LocalResponseNormalization,
        memory_layout: MemoryLayout) -> List[Kernel]:
    x = op.inputs["x"]
    y = op.outputs["y"]

    target_axis = Axis.C  # FIXME
    target_axis_index = x.order.axes_dict[target_axis]
    D1 = mul(x.shape[:target_axis_index])
    D2 = x.shape[target_axis_index]
    D3 = mul(x.shape[target_axis_index + 1:])

    buffer_injector = BufferInjector()
    buffer_injector.register({
        "local_response_normalization_X":
        memory_layout[x],
        "local_response_normalization_Y":
        memory_layout[y],
        "local_response_normalization_D1":
        D1,
        "local_response_normalization_D2":
        D2,
        "local_response_normalization_D3":
        D3,
        "local_response_normalization_param_half_n":
        int(op.parameters["n"] // 2),
        "local_response_normalization_param_k":
        float(op.parameters["k"]),
        "local_response_normalization_param_alpha":
        float(op.parameters["alpha"]),
        "local_response_normalization_param_minus_beta":
        float(-op.parameters["beta"])
    })

    name_injector = KernelNameInjector(op)

    source = template_same_order
    source = buffer_injector.inject(source)
    source = name_injector.inject(source)

    kernel = Kernel({name_injector.name: source}, name_injector.name,
                    GPUSize(8, 1, 1), GPUSize(MAX_THREADS_PER_THREADGROUP, 1,
                                              1), buffer_injector.buffer,
                    buffer_injector.unresolved_value_list)

    return [kernel]
Esempio n. 27
0
def _convert_reshape(converter: ChainerConverter, c_op: "chainer.functions.Reshape"):
    x = converter.get_variable(c_op.inputs[0])
    if any(not Placeholder.check_resolved(v) for v in x.shape):
        raise NotImplementedError("[ChainerConverter] \"Reshape\" for dynamic shape variable is not supported ")

    out_shape = list(c_op.shape)
    out_order = Order([None] * len(out_shape))
    if -1 in out_shape:
        i = out_shape.index(-1)
        out_shape.pop(i)
        out_shape.insert(i, x.size // mul(out_shape))

    assert mul(out_shape) == x.size, f"[ChainerConverter] Shape mismatch: mul(out_shape)={mul(out_shape)}, x.size={x.size}"

    y = x.reshape(out_shape, out_order)

    converter.set_variable(c_op.outputs[0](), y)
Esempio n. 28
0
def optimize_loop_structure(variables: List[Variable], key_variable: Variable):
    """
    Optimize loop structure to iterate each element in variables

    Returns:
        (tuple): two elements are returned

        - First one is shape dictionary of all variables.
        - Second one is stride dictionary of all variables.
    """
    orders, shape_dicts = _simplify_orders(
        variables
    )  # type: Dict[Variable, Order], Dict[Variable, AxisKeyDict[List[int]]]
    shapes = {
        v: [shape_dicts[v][a] for a in orders[v].axes]
        for v in variables
    }
    strides = {
        v:
        [mul(shapes[v][orders[v].axes_dict[a] + 1:]) for a in orders[v].axes]
        for v in variables
    }
    stride_dicts = {
        v: AxisKeyDict(orders[v].axes, strides[v])
        for v in variables
    }

    # re-ordering
    axes = []
    for v in sorted(variables, key=lambda v: orders[v].ndim):
        axes += [axis for axis in orders[v].axes if axis not in axes]

    orders = {
        v: Order(list(filter(lambda x: x in orders[v].axes, axes)))
        for v in variables
    }
    shapes = {
        v: [shape_dicts[v][a] for a in orders[v].axes]
        for v in variables
    }
    strides = {
        v: [stride_dicts[v][a] for a in orders[v].axes]
        for v in variables
    }

    key_order = orders[key_variable]
    if key_order.ndim > 4:
        raise NotImplementedError(
            'Currently, loop nest depth larger than 4 is not supported')

    for v in variables:
        shape = shapes[v]
        stride = strides[v]
        while len(shape) < 4:
            stride.append(1)
            shape.append(1)

    return shapes, strides
Esempio n. 29
0
    def __init__(self, name: Optional[str], M: Union[int, Placeholder],
                 N: Union[int, Placeholder], K: Union[int, Placeholder],
                 out_shape: Sequence[Union[int, Placeholder]],
                 out_order: Order, transpose_A: bool, transpose_B: bool):
        super().__init__(name)

        assert len(out_shape) == out_order.ndim
        if Placeholder.check_resolved(
                mul(out_shape)) and Placeholder.check_resolved(M * N):
            assert mul(out_shape) == M * N

        self.parameters["M"] = M
        self.parameters["N"] = N
        self.parameters["K"] = K
        self.parameters["out_shape"] = out_shape
        self.parameters["out_order"] = out_order
        self.parameters["transpose_A"] = transpose_A
        self.parameters["transpose_B"] = transpose_B
Esempio n. 30
0
def _convert_flatten(converter: KerasConverter, k_op: "keras.layers.Flatten"):
    x = converter.get_variable(converter.get_input_tensor(k_op)[0])

    # flatten without changing memory layout
    y, = Reshape(None,
                 in_order=x.order,
                 out_order=OrderNC,
                 out_shape=[x.shape[0], mul(x.shape[1:])])(x)
    converter.set_variable(converter.get_output_tensor(k_op)[0], y)