def _split_im2col(graph: Graph, op: Im2Col, v: Variable, v_pair: Sequence[Variable], axis: Axis): s1 = v_pair[0].shape_dict[axis] im = op.inputs["im"] col = op.outputs["col"] op.remove_all() if v == col: """ before) im -{Im2Col}- col after) +- col_0 im -{PartialIm2Col}-+ +- col_1 """ col_0, col_1 = PartialIm2Col(None, ksize=op.ksize, stride=op.stride, padding=op.padding, dilation_rate=op.dilation_rate, axis=axis, sections=[s1])(im) OptimizeRule.replace_variable(graph, col_0.transpose(v_pair[0].order), v_pair[0]) OptimizeRule.replace_variable(graph, col_1.transpose(v_pair[1].order), v_pair[1]) elif v == im: raise NotImplementedError(f"Variable is too large to handle in WebGL backend: {v}") else: raise UnexpectedAndPleaseReportError
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Convolution2D): # type: Convolution2D x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] flag_changed = True op.remove_all() a_filter, a_kh, a_kw = Axis(), Axis(), Axis() w, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.C, a_kh, a_kw, a_filter]))(w) if op.WH == 1 and op.WW == 1 and op.stride == ( 1, 1) and op.padding == (0, 0): # Projection col, = ReinterpretAxis( None, in_order=OrderNHWC, out_order=Order([Axis.N, Axis.H, Axis.W, a_filter]))(x) new_y, = Tensordot(None, [[a_filter], [a_kh, a_kw, a_filter]])(col, w) elif op.WH == x.shape_dict[Axis.H] and op.WW == x.shape_dict[ Axis.W] and op.padding == (0, 0): # Global convolution col, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.N, a_kh, a_kw, a_filter]))(x) new_y, = Tensordot( None, [[[a_kh, a_kw, a_filter], [a_kh, a_kw, a_filter]], [a_kh, a_kw, a_filter]])(col, w) else: # General convolution col, = Im2Col(None, ksize=op.ksize, stride=op.stride, padding=op.padding, dilation_rate=op.dilation_rate)(x) col, = ReinterpretAxis( None, in_order=OrderNHWC, out_order=Order([Axis.N, Axis.H, Axis.W, a_filter]))(col) new_y, = Tensordot(None, [[a_filter], [a_kh, a_kw, a_filter]])(col, w) new_y = new_y.transpose(y.order) OptimizeRule.replace_variable(graph, new_y, y) return graph, flag_changed
def main(k, s, p, d, n, h1, w1, c1, expected_shape_dict: AxisKeyDict[int]): for order_x in orders4: op = Im2Col("im2col", ksize=k, stride=s, padding=p, dilation_rate=d) x = Variable((n, h1, w1, c1), OrderNHWC) x.change_order(order_x) y, = op(x) for axis in y.order.axes: assert y.shape_dict[axis] == expected_shape_dict[axis]
def test_NHWC(): v_im, v_col = generate_data_311() im = Variable(v_im.shape, order=OrderNHWC) col, = Im2Col(None, ksize=3, padding=1, stride=1, dilation_rate=1)(im) col.change_order(OrderNHWC) generate_kernel_test_case(description=f"Im2Col output=NHWC", backend=["webgpu", "webgl", "webassembly"], graph=Graph([im], [col]), inputs={im: v_im}, expected={col: v_col})
def test_wide_stride_CNHW(): v_im, v_col = generate_data_212() col_dummy = ConstantVariable(v_col, order=OrderNHWC) col_dummy.change_order(OrderCNHW) im = Variable(v_im.shape, order=OrderNHWC) col, = Im2Col(None, ksize=2, padding=1, stride=2, dilation_rate=1)(im) col.change_order(OrderCNHW) generate_kernel_test_case(description=f"Im2Col output=CNHW stride=2", backend=["webgpu", "webgl", "webassembly"], graph=Graph([im], [col]), inputs={im: v_im}, expected={col: col_dummy.data})
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Convolution2D): # type: Convolution2D x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] assert x.order == OrderNHWC assert y.order == OrderNHWC assert isinstance(w, ConstantVariable) flag_changed = True op.remove_all() w.change_order(OrderHWCN) if op.WH != 1 or op.WW != 1 or op.stride != ( 1, 1) or op.padding != (0, 0): im2col = Im2Col(None, ksize=op.ksize, stride=op.stride, padding=op.padding, dilation_rate=op.dilation_rate) col, = im2col(x) col.change_order(OrderNHWC) else: col = x sgemm = Sgemm( None, M=col.shape_dict[Axis.N] * col.shape_dict[Axis.H] * col.shape_dict[Axis.W], N=w.shape_dict[Axis.N], K=col.shape_dict[Axis.C], out_shape=[ col.shape_dict[Axis.N], col.shape_dict[Axis.H], col.shape_dict[Axis.W], w.shape_dict[Axis.N] ], out_order=OrderNHWC, transpose_A=True if col.order == OrderNHWC else False, transpose_B=True) new_y, = sgemm(col, w) new_y.replace(y) return graph, flag_changed
def _convert_im2col(converter: ChainerConverter, c_op: "chainer.functions.Im2Col"): x = converter.get_variable(c_op.inputs[0]) if any(not Placeholder.check_resolved(v) for v in x.shape): raise NotImplementedError("[ChainerConverter] \"GetItem\" for dynamic shape variable is not supported ") x.order.unify(OrderNCHW) if c_op.cover_all: raise NotImplementedError("[ChainerConverter] \"Im2Col\" function with \"cover_all=True\" is not supported") y, = Im2Col(None, ksize=(c_op.kh, c_op.kw), stride=(c_op.sy, c_op.sx), padding=(c_op.ph, c_op.pw), dilation_rate=(c_op.dy, c_op.dx))(x) y = y.combine_axes([Axis.C, Axis.KH, Axis.KW], Axis.C).change_order(OrderNCHW) converter.set_variable(c_op.outputs[0](), y)
def main(im_shape=[1, 5, 5, 6], im_order=OrderNHWC, ksize=3, stride=1, padding=1, dilation_rate=1, expected_shape_dict: AxisKeyDict[int] = AxisKeyDict( OrderNHWKKC.axes, [1, 5, 5, 3, 3, 6])): op = Im2Col(None, ksize=ksize, stride=stride, padding=padding, dilation_rate=dilation_rate) x = Variable(im_shape, im_order) y, = op(x) for axis in y.order.axes: assert y.shape_dict[axis] == expected_shape_dict[axis]
def template(im_shape=[2, 3, 4, 5], im_order=OrderNCHW, col_order=col_chainer_order, ksize=(3, 3), padding=(1, 1), stride=(1, 1), dilation=(1, 1), description: str = ""): im = Variable(im_shape, im_order) op = Im2Col(None, ksize, stride, padding, dilation_rate=dilation) col, = op(im) col = col.change_order(col_order) vim = np.random.rand(*(im.shape_dict[a] for a in OrderNCHW.axes)).astype(np.float32) vcol = im2col_cpu(vim, op.KH, op.KW, op.SH, op.SW, op.PH, op.PW, dy=op.DH, dx=op.DW) vcol = vcol.transpose( [col_chainer_order.axes_dict[a] for a in col_order.axes]) vim = vim.transpose([OrderNCHW.axes_dict[a] for a in im_order.axes]) generate_kernel_test_case( description=f"Im2Col {description}", backend=["webgpu", "webgl", "webassembly"], graph=Graph([im], [col]), inputs={im: vim}, expected={col: vcol}, )
def _convert_separable_conv2d(converter: KerasConverter, k_op: "keras.layers.SeparableConv2D"): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) check_data_format(x, k_op.data_format) axis_c_in = Axis.C axis_c_out = Axis() axis_depth_multiplier = Axis() w_depthwise = converter.convert_to_constant_variable( k_op.depthwise_kernel, Order([Axis.KH, Axis.KW, axis_c_in, axis_depth_multiplier])) w_pointwise = converter.convert_to_constant_variable( k_op.pointwise_kernel, Order([Axis.KH, Axis.KW, axis_c_in, axis_c_out])) w_pointwise = w_pointwise.reshape( shape=[ x.shape_dict[axis_c_in], k_op.depth_multiplier, w_pointwise.shape_dict[axis_c_out] ], order=Order([axis_c_in, axis_depth_multiplier, axis_c_out])) ksize = tuple(k_op.kernel_size) stride = tuple(k_op.strides) dilation_rate = tuple(k_op.dilation_rate) padding = (parse_padding(k_op.padding, ksize[0], dilation_rate[0]), parse_padding(k_op.padding, ksize[1], dilation_rate[1])) if any(p[0] != p[1] for p in padding): raise NotImplementedError( "[KerasConverter] \"Different size padding\" is not supported yet") padding = tuple(p[0] for p in padding) h, = Im2Col(None, ksize=ksize, stride=stride, padding=padding, dilation_rate=dilation_rate)(x) # TODO: Support depth-wise convolution natively # Currently, depth-wise convolution is not supported natively, and emulated by composition of small convolution operations. ys = [] for i in range(h.shape_dict[axis_c_in]): # 1. Depthwise convolution # # Ideal | Current implementation # ----------------------------------+---------------------------------------------------- # h.axes=[N, H, W, KH, KW, C_in] | g_sub.axes=[N, H, W, KH, KW] # w.axes=[KH, KW, C_in, DM] | w_sub.axes=[KH, KW, DM] # g.axes=[N, H, W, C_in, DM] | g_sub.axes=[N, H, W, DM] h_sub, = Slice( None, indices=AxisKeyDict( h.order.axes, [i if a == axis_c_in else slice(None) for a in h.order.axes]))(h) w_depthwise_sub = w_depthwise[:, :, i, :] g_sub, = Tensordot(None, axes=((Axis.KH, Axis.KW), (Axis.KH, Axis.KW)))(h_sub, w_depthwise_sub) # 2. Pointwise (projection) convolution # # Ideal | Current implementation # ----------------------------------+---------------------------------------------------- # g.axes=[N, H, W, C_in, DM] | g_sub.axes=[N, H, W, DM] # w.axes=[DM, Cin, C_out] | w_sub.axes=[DM, C_out] # y.axes=[N, H, W, C_out] | y_sub.axes=[N, H, W, C_out] w_pointwise_sub = w_pointwise[i, :, :] y_sub, = Tensordot(None, axes=((axis_depth_multiplier, ), (axis_depth_multiplier, )))(g_sub, w_pointwise_sub) ys.append(y_sub) # Sum up all sub convolution results to one while len(ys) > 1: ys.append(ys.pop(0) + ys.pop(0)) y = ys[0] # reinterpret axis "C_out" as C axes = list(y.order.axes) i = axes.index(axis_c_out) axes.pop(i) axes.insert(i, Axis.C) y = y.reinterpret_axes(Order(axes)) if k_op.data_format == "channels_last": y = y.transpose(OrderNHWC) elif k_op.data_format == "channels_first": y = y.transpose(OrderNCHW) else: raise NotImplementedError( f"[KerasConverter] Unknown data format: {k_op.data_format}") if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) y = y + b y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)