def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Linear): x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] flag_changed = True op.remove_all() a_filter = Axis() if x.ndim == 2: w, = ReinterpretAxis(None, in_order=OrderNC, out_order=Order([Axis.C, a_filter]))(w) new_y, = Tensordot(None, axes=[Axis.C, a_filter])(x, w) elif x.ndim == 4: w, = ReinterpretAxis( None, in_order=OrderNHWC, out_order=Order([Axis.C, Axis.H, Axis.W, a_filter]))(w) new_y, = Tensordot(None, axes=[[Axis.H, Axis.W, Axis.C], [Axis.H, Axis.W, a_filter]])(x, w) else: raise NotImplementedError OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y) return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes( traverse.listup_operators(graph), Deconvolution2D): # type: Deconvolution2D x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] flag_changed = True op.remove_all() a_filter, a_kh, a_kw = Axis(), Axis(), Axis() w, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.C, a_kh, a_kw, a_filter]))(w) x, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.N, Axis.H, Axis.W, a_filter]))(x) col, = Tensordot(None, axes=a_filter)(x, w) col = col.transpose( Order([Axis.N, Axis.H, Axis.W, a_kh, a_kw, Axis.C])) col = col.reshape(shape=[*col.shape[0:3], mul(col.shape[3:6])], order=OrderNHWC) new_y, = Col2Im(None, ksize=op.ksize, stride=op.stride, padding=op.padding)(col) OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y) return graph, flag_changed
def _convert_gemm(converter: ONNXConverter, onnx_op: INodeProto): A = converter.get_variable(onnx_op.input[0]) B = converter.get_variable(onnx_op.input[1]) C = converter.get_variable(onnx_op.input[2]) attrs = attribute_dict(onnx_op) alpha = attrs["alpha"].f beta = attrs["beta"].f broadcast = attrs.get("broadcast", 0) y, = Tensordot( None, axes=(A.order.axes[0 if ( attrs.get("transA", False) and attrs["transA"].i) else 1], B.order.axes[1 if ( attrs.get("transB", False) and attrs["transB"].i) else 0]))( A, B) if broadcast: check_broadcast_constraints(y, C) else: y.order.unify(C.order) y = alpha * y + beta * C converter.set_variable(onnx_op.output[0], y)
def matmul_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"): a = converter.get_variable(tf_op.inputs[0]) b = converter.get_variable(tf_op.inputs[1]) transposed_a = tf_op.get_attr("transpose_a") transposed_b = tf_op.get_attr("transpose_b") if a.ndim > 2 or b.ndim > 2: raise NotImplementedError( "[TensorFlowConverter] Currently, MatMul is supported only 2D * 2D case." ) reduced_axes = [] if transposed_a: reduced_axes.append(a.order.axes[0]) else: reduced_axes.append(a.order.axes[1]) if transposed_b: reduced_axes.append(b.order.axes[1]) else: reduced_axes.append(b.order.axes[0]) c, = Tensordot(None, axes=reduced_axes)(a, b) converter.set_variable(tf_op.outputs[0], c)
def _convert_mat_mul_var_var( converter: ChainerConverter, c_op: "chainer.functions.math.basic_math.MatMulVarVar"): x1 = converter.get_variable(c_op.inputs[0]) x2 = converter.get_variable(c_op.inputs[1]) y, = Tensordot(None, axes=[x1.order.axes[1], x2.order.axes[0]])(x1, x2) converter.set_variable(c_op.outputs[0](), y)
def tensordot(op: Tensordot, memory_layout: MemoryLayout) -> List[Kernel]: A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] axes = op.axes # Reduced axes must be located on inside of input variables. assert A.order.axes[-len(axes[0]):] == axes[0] assert B.order.axes[-len(axes[1]):] == axes[1] # output variable's axes order must be as [*a_remained_axes, *b_remained_axes] assert C.order.axes[:A.ndim - len(axes[0])] == A.order.axes[:-len(axes[0])] assert C.order.axes[-(B.ndim - len(axes[1])):] == B.order.axes[:-len(axes[1])] assert C.ndim == A.ndim - len(axes[0]) + B.ndim - len(axes[1]) K = mul(A.shape_dict[a] for a in axes[0]) M = A.size // K N = B.size // K buffer_injector = BufferInjector() buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C], "sgemm_M": M, "sgemm_N": N, "sgemm_K": K }) if op.has_attribute(UseEigenAttribute): source = generate_template_eigen(True, False) buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C] }) else: source = generate_template(True, False) buffer_injector.register({ "sgemm_A": memory_layout[A], "sgemm_B": memory_layout[B], "sgemm_C": memory_layout[C], "sgemm_M": op.M, "sgemm_N": op.N, "sgemm_K": op.K }) name_injector = KernelNameInjector(op) source = buffer_injector.inject(source) source = name_injector.inject(source) kernel = Kernel({name_injector.name: source}, name_injector.name, buffer_injector.buffer, buffer_injector.unresolved_value_list) return [kernel]
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Convolution2D): # type: Convolution2D x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] flag_changed = True op.remove_all() a_filter, a_kh, a_kw = Axis(), Axis(), Axis() w, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.C, a_kh, a_kw, a_filter]))(w) if op.WH == 1 and op.WW == 1 and op.stride == ( 1, 1) and op.padding == (0, 0): # Projection col, = ReinterpretAxis( None, in_order=OrderNHWC, out_order=Order([Axis.N, Axis.H, Axis.W, a_filter]))(x) new_y, = Tensordot(None, [[a_filter], [a_kh, a_kw, a_filter]])(col, w) elif op.WH == x.shape_dict[Axis.H] and op.WW == x.shape_dict[ Axis.W] and op.padding == (0, 0): # Global convolution col, = ReinterpretAxis(None, in_order=OrderNHWC, out_order=Order( [Axis.N, a_kh, a_kw, a_filter]))(x) new_y, = Tensordot( None, [[[a_kh, a_kw, a_filter], [a_kh, a_kw, a_filter]], [a_kh, a_kw, a_filter]])(col, w) else: # General convolution col, = Im2Col(None, ksize=op.ksize, stride=op.stride, padding=op.padding, dilation_rate=op.dilation_rate)(x) col, = ReinterpretAxis( None, in_order=OrderNHWC, out_order=Order([Axis.N, Axis.H, Axis.W, a_filter]))(col) new_y, = Tensordot(None, [[a_filter], [a_kh, a_kw, a_filter]])(col, w) new_y = new_y.transpose(y.order) OptimizeRule.replace_variable(graph, new_y, y) return graph, flag_changed
def _convert_dense(converter: KerasConverter, k_op: "keras.layers.Dense"): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) w = converter.convert_to_constant_variable(k_op.kernel, Order([None, None])) y, = Tensordot(None, axes=[x.order.axes[-1], w.order.axes[0]])(x, w) if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, Order([None])) b.order.axes[0].unify(w.order.axes[1]) y = y + b y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def _convert_mat_mul(converter: ChainerConverter, c_op: "chainer.functions.MatMul"): x0 = converter.get_variable(c_op.inputs[0]) x1 = converter.get_variable(c_op.inputs[1]) if x0.order.axes[1 if c_op.transa else 0] == x1.order.axes[0 if c_op. transb else 1]: x1 = x1.reinterpret_axes(Order([None, None])) y, = Tensordot(None, axes=[ x0.order.axes[0 if c_op.transa else 1], x1.order.axes[1 if c_op.transb else 0] ])(x0, x1) converter.set_variable(c_op.outputs[0](), y)
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes( traverse.listup_operators(graph), Deconvolution2D): # type: Deconvolution2D x = op.inputs["x"] w = op.inputs["w"] y = op.outputs["y"] flag_changed = True op.remove_all() a_filter = Axis() w, = ReinterpretAxis( None, in_order=Order([Axis.N, Axis.KH, Axis.KW, Axis.C]), out_order=Order([Axis.C, Axis.KH, Axis.KW, a_filter]))(w) if op.KH == 1 and op.KW == 1 and op.stride == ( 1, 1) and op.padding == (0, 0): # Projection w = w.transpose(Order([Axis.C, Axis.KH, Axis.KW, a_filter])) w = w.reshape([w.shape_dict[Axis.C], w.shape_dict[a_filter]], Order([Axis.C, a_filter])) new_y, = Tensordot(None, [Axis.C, a_filter])(x, w) else: # General deconvolution w = w.transpose(Order([a_filter, Axis.KH, Axis.KW, Axis.C])) col, = Tensordot(None, axes=[Axis.C, a_filter])(x, w) new_y, = Col2Im(None, ksize=op.ksize, stride=op.stride, padding=op.padding)(col) OptimizeRule.replace_variable(graph, new_y.transpose_like(y), y) return graph, flag_changed
def _convert_linear_function( converter: ChainerConverter, c_op: "chainer.functions.connection.linear.LinearFunction"): x = converter.get_variable(c_op.inputs[0]) w = converter.get_variable(c_op.inputs[1]) # type: ConstantVariable y, = Tensordot(None, axes=[x.order.axes[1:], w.order.axes[1]])(x, w) if len(c_op.inputs) == 3: # with bias b = converter.get_variable(c_op.inputs[2]) check_broadcast_constraints(y, b) y = y + b converter.set_variable(c_op.outputs[0](), y)
def template(a_shape=(2, 3, 4, 5), b_shape=(3, 4, 5, 6), axes=((1, 2, 3), (0, 1, 2)), backend=None, description: str = ""): va = np.random.rand(*a_shape).astype(np.float32) vb = np.random.rand(*b_shape).astype(np.float32) vc = np.tensordot(va, vb, axes) a = Variable(a_shape, Order([None] * len(a_shape))) b = Variable(b_shape, Order([None] * len(b_shape))) c, = Tensordot(None, axes=[[v.order.axes[aaa] for aaa in aa] for v, aa in zip([a, b], axes)])(a, b) generate_kernel_test_case( description=f"Tensordot {description}", backend=backend, graph=Graph([a, b], [c]), inputs={a: va, b: vb}, expected={c: vc} )
def _convert_separable_conv2d(converter: KerasConverter, k_op: "keras.layers.SeparableConv2D"): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) check_data_format(x, k_op.data_format) axis_c_in = Axis.C axis_c_out = Axis() axis_depth_multiplier = Axis() w_depthwise = converter.convert_to_constant_variable( k_op.depthwise_kernel, Order([Axis.KH, Axis.KW, axis_c_in, axis_depth_multiplier])) w_pointwise = converter.convert_to_constant_variable( k_op.pointwise_kernel, Order([Axis.KH, Axis.KW, axis_c_in, axis_c_out])) w_pointwise = w_pointwise.reshape( shape=[ x.shape_dict[axis_c_in], k_op.depth_multiplier, w_pointwise.shape_dict[axis_c_out] ], order=Order([axis_c_in, axis_depth_multiplier, axis_c_out])) ksize = tuple(k_op.kernel_size) stride = tuple(k_op.strides) dilation_rate = tuple(k_op.dilation_rate) padding = (parse_padding(k_op.padding, ksize[0], dilation_rate[0]), parse_padding(k_op.padding, ksize[1], dilation_rate[1])) if any(p[0] != p[1] for p in padding): raise NotImplementedError( "[KerasConverter] \"Different size padding\" is not supported yet") padding = tuple(p[0] for p in padding) h, = Im2Col(None, ksize=ksize, stride=stride, padding=padding, dilation_rate=dilation_rate)(x) # TODO: Support depth-wise convolution natively # Currently, depth-wise convolution is not supported natively, and emulated by composition of small convolution operations. ys = [] for i in range(h.shape_dict[axis_c_in]): # 1. Depthwise convolution # # Ideal | Current implementation # ----------------------------------+---------------------------------------------------- # h.axes=[N, H, W, KH, KW, C_in] | g_sub.axes=[N, H, W, KH, KW] # w.axes=[KH, KW, C_in, DM] | w_sub.axes=[KH, KW, DM] # g.axes=[N, H, W, C_in, DM] | g_sub.axes=[N, H, W, DM] h_sub, = Slice( None, indices=AxisKeyDict( h.order.axes, [i if a == axis_c_in else slice(None) for a in h.order.axes]))(h) w_depthwise_sub = w_depthwise[:, :, i, :] g_sub, = Tensordot(None, axes=((Axis.KH, Axis.KW), (Axis.KH, Axis.KW)))(h_sub, w_depthwise_sub) # 2. Pointwise (projection) convolution # # Ideal | Current implementation # ----------------------------------+---------------------------------------------------- # g.axes=[N, H, W, C_in, DM] | g_sub.axes=[N, H, W, DM] # w.axes=[DM, Cin, C_out] | w_sub.axes=[DM, C_out] # y.axes=[N, H, W, C_out] | y_sub.axes=[N, H, W, C_out] w_pointwise_sub = w_pointwise[i, :, :] y_sub, = Tensordot(None, axes=((axis_depth_multiplier, ), (axis_depth_multiplier, )))(g_sub, w_pointwise_sub) ys.append(y_sub) # Sum up all sub convolution results to one while len(ys) > 1: ys.append(ys.pop(0) + ys.pop(0)) y = ys[0] # reinterpret axis "C_out" as C axes = list(y.order.axes) i = axes.index(axis_c_out) axes.pop(i) axes.insert(i, Axis.C) y = y.reinterpret_axes(Order(axes)) if k_op.data_format == "channels_last": y = y.transpose(OrderNHWC) elif k_op.data_format == "channels_first": y = y.transpose(OrderNCHW) else: raise NotImplementedError( f"[KerasConverter] Unknown data format: {k_op.data_format}") if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) y = y + b y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def _split_tensordot(graph: Graph, op: Tensordot, v: Variable, v_pair: Sequence[Variable], axis: Axis): s1 = v_pair[0].shape_dict[axis] s2 = v_pair[1].shape_dict[axis] A = op.inputs["A"] B = op.inputs["B"] C = op.outputs["C"] axes_M = tuple(filter(lambda a: a not in op.axes[0], A.order.axes)) axes_N = tuple(filter(lambda a: a not in op.axes[1], B.order.axes)) axes_K_A, axes_K_B = op.axes K = mul(A.shape_dict[a] for a in axes_K_A) M = A.size // K N = B.size // K shape_M = [A.shape_dict[a] for a in axes_M] shape_N = [B.shape_dict[a] for a in axes_N] op.remove_all() if v == A: A1, A2 = v_pair if axis in axes_K_A: split_axis_A = axis if (B.shape_dict[axes_K_B[0]] * s1) % (s1 + s2) == 0: split_axis_B = axes_K_B[0] else: # Factorize B's axes consisting to K into A's corresponding axes B = B.transpose(Order(axes_N + axes_K_B)) B = B.reshape(order=Order((Axis(), ) + axes_K_A), shape=[N] + [A.shape_dict[a] for a in axes_K_A]) split_axis_B = split_axis_A axes_K_B = axes_K_A B1, B2 = SplitAxis(None, axis=split_axis_B, sections=[(B.shape_dict[split_axis_B] * s1) // (s1 + s2)])(B) C1, = Tensordot(None, [axes_K_A, axes_K_B])(A1, B1) C2, = Tensordot(None, [axes_K_A, axes_K_B])(A2, B2) OptimizeRule.replace_variable(graph, (C1 + C2).reshape( shape_M + shape_N, Order(axes_M + axes_N)).transpose_like(C), C) else: C1, = Tensordot(None, op.axes)(A1, B) C2, = Tensordot(None, op.axes)(A2, B) for a1, a2 in zip(C1.order.axes, C2.order.axes): if a1 == a2 == axis: continue a1.unify(a2) C_new, = Concat(None, axis=axis)(C1, C2) OptimizeRule.replace_variable(graph, C_new, C) elif v == B: B1, B2 = v_pair if axis in axes_K_B: split_axis_B = axis if (A.shape_dict[axes_K_A[0]] * (s1 + s2)) % s1 == 0: split_axis_A = axes_K_A[0] else: # Factorize A's axes consisting to K into B's corresponding axes A = A.transpose(Order(axes_M + axes_K_A)) A = A.reshape(order=Order((Axis(), ) + axes_K_B), shape=[M] + [B.shape_dict[a] for a in axes_K_B]) split_axis_A = split_axis_B axes_K_A = axes_K_B A1, A2 = SplitAxis(None, axis=split_axis_A, sections=[(A.shape_dict[split_axis_A] * s1) // (s1 + s2)])(A) C1, = Tensordot(None, [axes_K_A, axes_K_B])(A1, B1) C2, = Tensordot(None, [axes_K_A, axes_K_B])(A2, B2) OptimizeRule.replace_variable(graph, (C1 + C2).reshape( shape_M + shape_N, Order(axes_M + axes_N)).transpose_like(C), C) else: C1, = Tensordot(None, op.axes)(A, B1) C2, = Tensordot(None, op.axes)(A, B2) for a1, a2 in zip(C1.order.axes, C2.order.axes): if a1 == a2 == axis: continue a1.unify(a2) C_new, = Concat(None, axis=axis)(C1, C2) OptimizeRule.replace_variable(graph, C_new, C) elif v == C: """ before) C[M, N] = A[M, K] @ B[K, N] after) In case `axis` is in `N`, C[M, N1] = Concat(A[M, K] @ B1[K, N1]) C[M, N2] = Concat(A[M, K] @ B2[K, N2]) """ raise NotImplementedError( f"Variable is too large to handle in WebGL backend: {v}") else: raise UnexpectedAndPleaseReportError