def test_conv_bias(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) bias = AxiswiseBias(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) b_shape = [h.shape_dict[Axis.C]] b = ConstantVariable(arange_shaped(b_shape), OrderC) b_data = b.data.copy() y, = bias(h, b) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) w_data_expected = w_data b_data_expected = b_data ops = listup_operators(graph) assert len(ops) == 2 and isinstance( ops[0], Convolution2D) and isinstance(ops[1], AxiswiseBias) assert np.all(np.equal(ops[0].inputs["w"].data, w_data_expected)) assert np.all(np.equal(ops[1].inputs["b"].data, b_data_expected))
def conv2_d_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"): # FIXME x = converter.get_variable(tf_op.inputs[0]) # NHWC w = converter.get_variable(tf_op.inputs[1]) # HWCN assert tf_op.get_attr("data_format") == b"NHWC" unify_order(x.order, OrderNHWC) unify_order(w.order, OrderHWCN) ksize = (w.shape_dict[Axis.H], w.shape_dict[Axis.W]) stride_nhwc = tf_op.get_attr("strides") # type: List[int] assert stride_nhwc[0] == 1 assert stride_nhwc[3] == 1 stride_hw = stride_nhwc[1:3] padding_name = tf_op.get_attr("padding") # type: str if padding_name == b"SAME": padding = (padding_same(x.shape_dict[Axis.H], ksize[0], stride_hw[0]), padding_same(x.shape_dict[Axis.W], ksize[1], stride_hw[1])) elif padding_name == b"VALID": padding = (0, 0) else: raise NotImplementedError( f"[TensorFlowConverter] Conv2D: padding '{padding_name}' is not supported yet." ) y, = Convolution2D(None, ksize=ksize, stride=stride_hw, padding=padding)(x, w) converter.set_variable(tf_op.outputs[0], y)
def _convert_conv2d(converter: KerasConverter, k_op: "keras.layers.Conv2D"): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) check_data_format(x, k_op.data_format) w = converter.convert_to_constant_variable( k_op.kernel, Order([Axis.KH, Axis.KW, Axis.C, Axis.N])) ksize = tuple(k_op.kernel_size) stride = tuple(k_op.strides) dilation_rate = tuple(k_op.dilation_rate) padding = (parse_padding(k_op.padding, ksize[0], dilation_rate[0]), parse_padding(k_op.padding, ksize[1], dilation_rate[1])) y, = Convolution2D(None, ksize=ksize, stride=stride, padding=padding, dilation_rate=dilation_rate)(x, w) if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) y = y + b y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def _convert_selected_item( converter: ChainerConverter, c_op: "chainer.functions.connection.dilated_convolution_2d.DilatedConvolution2DFunction" ): x = converter.get_variable(c_op.inputs[0]) w = converter.get_variable(c_op.inputs[1]) x.order.unify(OrderNCHW) w.order.unify(OrderNCHW) # when dx == 1, it means ordinary convolution. conv_opr = Convolution2D(None, ksize=(w.shape_dict[Axis.H], w.shape_dict[Axis.W]), stride=(c_op.sy, c_op.sx), padding=(c_op.ph, c_op.pw), dilation_rate=(c_op.dx, c_op.dy)) y, = conv_opr(x, w) if len(c_op.inputs) == 3: # with bias bias = converter.get_variable(c_op.inputs[2]) bias.order.unify(OrderC) y = y + bias converter.set_variable(c_op.outputs[0](), y)
def conv2_d_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"): x = converter.get_variable(tf_op.inputs[0]) data_format = tf_op.get_attr("data_format") check_data_format(x, data_format) w = converter.get_variable(tf_op.inputs[1]) # HWCN w.order.unify(Order([Axis.KH, Axis.KW, Axis.C, Axis.N])) ksize = (w.shape_dict[Axis.KH], w.shape_dict[Axis.KW]) stride = tuple(tf_op.get_attr("strides")) # type: Tuple[int,...] assert stride[x.order.axes_dict[Axis.N]] == 1 assert stride[x.order.axes_dict[Axis.C]] == 1 stride = (stride[x.order.axes_dict[Axis.H]], stride[x.order.axes_dict[Axis.W]]) x, padding = convolution_handler_preprocess( x, ksize=ksize, padding=tf_op.get_attr("padding"), dilation_rate=(1, 1), data_format=data_format) y, = Convolution2D(None, ksize=ksize, stride=stride, padding=padding)(x, w) converter.set_variable(tf_op.outputs[0], y)
def _convert_conv2d(converter: KerasConverter, k_op: "keras.layers.Conv2D"): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) check_data_format(x, k_op.data_format) w = converter.convert_to_constant_variable( k_op.kernel, Order([Axis.KH, Axis.KW, Axis.C, Axis.N])) x, padding = convolution_handler_preprocess( x, ksize=k_op.kernel_size, padding=k_op.padding, dilation_rate=k_op.dilation_rate, data_format=k_op.data_format) y, = Convolution2D(None, ksize=k_op.kernel_size, stride=k_op.strides, padding=padding, dilation_rate=k_op.dilation_rate)(x, w) if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) y = y + b y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def _convert_conv2d(converter: KerasConverter, k_op: "keras.layers.Conv2D"): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) if k_op.data_format == "channels_first": assert x.order == OrderNCHW elif k_op.data_format == "channels_last": assert x.order == OrderNHWC else: raise ValueError(f"[KerasConverter] Unknown data format is detected: {k_op.data_format}") w = converter.convert_to_constant_variable(k_op.kernel, OrderHWCN) ksize = tuple(k_op.kernel_size) stride = tuple(k_op.strides) dilation_rate = tuple(k_op.dilation_rate) if k_op.padding == "valid": padding = (0, 0) elif k_op.padding == "same": padding = (ksize[0] // 2, ksize[1] // 2) else: raise ValueError(f"[KerasConverter] Unknown padding: {k_op.padding}") y, = Convolution2D(None, ksize=ksize, stride=stride, padding=padding, dilation_rate=dilation_rate)(x, w) if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) y = y + b y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def test_conv_scale(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) scale = AxiswiseScale(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) s_shape = [h.shape_dict[Axis.C]] s = ConstantVariable(arange_shaped(s_shape), OrderC) s_data = s.data.copy() y, = scale(h, s) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) # noinspection PyTypeChecker expander = (None, ) * order_w.axes_dict[Axis.N] + ( Ellipsis, ) + (None, ) * (3 - order_w.axes_dict[Axis.N]) w_data_expected = w_data * s_data[expander] ops = listup_operators(graph) assert len(ops) == 1 and isinstance(ops[0], Convolution2D) assert conv.outputs["y"] == y assert np.all(np.equal(w.data, w_data_expected))
def _convert_conv(converter: ONNXConverter, onnx_op: INodeProto): x = converter.get_variable(onnx_op.input[0]) x.order.unify(OrderNCHW) w = converter.get_variable(onnx_op.input[1]) w.order.unify(Order([Axis.N, Axis.C, Axis.KH, Axis.KW])) attrs = attribute_dict(onnx_op) ksize = list(attrs["kernel_shape"].ints) dilations = list(attrs["dilations"].ints) stride = list(attrs["strides"].ints) pad = list(attrs["pads"].ints) if any(pad[2 * i] != pad[2 * i + 1] for i in range(len(pad) // 2)): raise NotImplementedError( "[ONNXConverter] odd-size padding is not supported.") pad = [pad[0], pad[2]] y, = Convolution2D(None, ksize=ksize, stride=stride, padding=pad, dilation_rate=dilations)(x, w) y.change_order(OrderNCHW) if len(onnx_op.input) == 3: # with bias b = converter.get_variable(onnx_op.input[2]) b.order.unify(OrderC) y = y + b converter.set_variable(onnx_op.output[0], y)
def __init__(self, base: Convolution2D, limit: int, removed: int = 0): if base.has_attribute(ConvFilterPruned): raise ValueError( f"\'ConvFilterPruned\' attribute has been already registered to {base}." ) super(ConvFilterPruned, self).__init__(base) self.limit = limit # type: int self.removed = removed # type: int
def main(k, s, p, n, h1, w1, c1, c2, expected_shape_dict: AxisKeyDict[int]): op = Convolution2D(None, ksize=k, stride=s, padding=p) x = Variable((n, h1, w1, c1), Order([Axis.N, Axis.H, Axis.W, Axis.C])) w = Variable((c1, op.ksize[0], op.ksize[1], c2), Order([Axis.C, Axis.KH, Axis.KW, Axis.N])) y, = op(x, w) for axis in y.order.axes: assert y.shape_dict[axis] == expected_shape_dict[axis]
def _convert_conv2d(converter: KerasConverter, k_op: "keras.layers.Conv2D"): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) if k_op.data_format == "channels_first": assert x.order == OrderNCHW elif k_op.data_format == "channels_last": assert x.order == OrderNHWC else: raise ValueError( f"[KerasConverter] Unknown data format is detected: {k_op.data_format}" ) w = converter.convert_to_constant_variable(k_op.kernel, OrderHWCN) ksize = tuple(k_op.kernel_size) stride = tuple(k_op.strides) dilation_rate = tuple(k_op.dilation_rate) if k_op.padding == "valid": padding = (0, 0) elif k_op.padding == "same": # @see https://github.com/tensorflow/tensorflow/blob/e5cf6f0c13b6053e4c58af6a951b204fde263172/tensorflow/python/ops/nn_ops.py#L507-L519 dilated_ksize = [ k + (k - 1) * (d - 1) for k, d in zip(ksize, dilation_rate) ] pad_extra_shape = [dk - 1 for dk in dilated_ksize] if any(p % 2 != 0 for p in pad_extra_shape): raise NotImplementedError( f"[KerasConverter] Currently WebDNN doesn't supports different size padding: " f" (pad_extra_shape)=f{pad_extra_shape}") padding = tuple(p // 2 for p in pad_extra_shape) else: raise ValueError(f"[KerasConverter] Unknown padding: {k_op.padding}") y, = Convolution2D(None, ksize=ksize, stride=stride, padding=padding, dilation_rate=dilation_rate)(x, w) if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) y = y + b y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def main(k, s, p, n, h1, w1, c1, c2, expected_shape_dict: Dict[Axis, int]): orders = [OrderNHWC, OrderHWNC, OrderHWCN, OrderNCHW, OrderCNHW, OrderCHWN] for order_x, order_w in itertools.product(orders, orders): op = Convolution2D(None, ksize=k, stride=s, padding=p) x = Variable((n, h1, w1, c1), OrderNHWC) x.change_order(order_x) w = Variable((c1, op.ksize[0], op.ksize[1], c2), OrderCHWN) w.change_order(order_w) y, = op(x, w) for axis in y.order.axes: assert y.shape_dict[axis] == expected_shape_dict[axis]
def __call__(self, inputs: List[Variable]) -> Tuple[Variable]: w = inputs[1] w_shape_dict = w.shape_dict conv_opr = Convolution2D(generate_unique_name(self.cfunc.label), ksize=(w_shape_dict[Axis.H], w_shape_dict[Axis.W]), stride=(self.cfunc.sy, self.cfunc.sx), padding=(self.cfunc.ph, self.cfunc.pw)) opr_out, = conv_opr(inputs[0], inputs[1]) opr_out.change_order(OrderNCHW) if len(inputs) == 3: # biasあり bias_opr = AxiswiseBias(generate_unique_name(self.cfunc.label), axis=Axis.C) self.hidden_vars.append(opr_out) opr_out, = bias_opr(opr_out, inputs[2]) return opr_out,
def conv2_d_handler(converter: TensorFlowConverter, tf_op: "tf.Operation"): x = converter.get_variable(tf_op.inputs[0]) data_format = tf_op.get_attr("data_format") check_data_format(x, data_format) w = converter.get_variable(tf_op.inputs[1]) # HWCN w.order.unify(Order([Axis.KH, Axis.KW, Axis.C, Axis.N])) ksize = (w.shape_dict[Axis.KH], w.shape_dict[Axis.KW]) stride = tuple(tf_op.get_attr("strides")) # type: Tuple[int,...] assert stride[x.order.axes_dict[Axis.N]] == 1 assert stride[x.order.axes_dict[Axis.C]] == 1 stride = (stride[x.order.axes_dict[Axis.H]], stride[x.order.axes_dict[Axis.W]]) input_size = np.array([x.shape_dict[Axis.H], x.shape_dict[Axis.W]]) padding = np.array([ parse_padding(tf_op.get_attr("padding"), ksize[0], 1), parse_padding(tf_op.get_attr("padding"), ksize[1], 1) ]) apron_size = (input_size + padding.sum(axis=1) - ksize) % stride # cancel padding by apron if possible for i in (0, 1): if padding[i, 0] > apron_size[i]: padding[i, 0] -= apron_size[i] apron_size[i] = 0 else: apron_size[i] -= padding[i, 0] padding[i, 0] = 0 if padding[i, 1] > apron_size[i]: padding[i, 1] -= apron_size[i] apron_size[i] = 0 else: apron_size[i] -= padding[i, 1] padding[i, 1] = 0 padding = padding.tolist() x, padding = convert_odd_padding_to_concat(x, padding=padding) y, = Convolution2D(None, ksize=ksize, stride=stride, padding=padding)(x, w) converter.set_variable(tf_op.outputs[0], y)
def _convert_convolution_2d(converter: ChainerConverter, c_op: "chainer.functions.connection.convolution_2d.Convolution2DFunction"): x = converter.get_variable(c_op.inputs[0]) w = converter.get_variable(c_op.inputs[1]) conv_opr = Convolution2D(None, ksize=(w.shape_dict[Axis.H], w.shape_dict[Axis.W]), stride=(c_op.sy, c_op.sx), padding=(c_op.ph, c_op.pw)) y, = conv_opr(x, w) if len(c_op.inputs) == 3: # with bias bias = converter.get_variable(c_op.inputs[2]) y = y + bias converter.set_variable(c_op.outputs[0](), y)
def _convert_selected_item( converter: ChainerConverter, c_op: chainer.functions.connection. dilated_convolution_2d.DilatedConvolution2DFunction): x = converter.get_variable(c_op.inputs[0]) w = converter.get_variable(c_op.inputs[1]) # when dx == 1, it means ordinary convolution. conv_opr = Convolution2D(None, ksize=(w.shape_dict[Axis.H], w.shape_dict[Axis.W]), stride=(c_op.sy, c_op.sx), padding=(c_op.ph, c_op.pw), dilation_rate=(c_op.dx, c_op.dy)) y, = conv_opr(x, w) if len(c_op.inputs) == 3: # with bias bias_opr = AxiswiseBias(None, axis=Axis.C) bias = converter.get_variable(c_op.inputs[2]) y, = bias_opr(y, bias) converter.set_variable(c_op.outputs[0](), y)
def _convert_convolution_2d( converter: ChainerConverter, c_op: "chainer.functions.connection.convolution_2d.Convolution2DFunction"): x = converter.get_variable(c_op.inputs[0]) w = converter.get_variable(c_op.inputs[1]) x.order.unify(OrderNCHW) w.order.unify(Order([Axis.N, Axis.C, Axis.KH, Axis.KW])) conv_opr = Convolution2D(None, ksize=(w.shape_dict[Axis.KH], w.shape_dict[Axis.KW]), stride=(c_op.sy, c_op.sx), padding=(c_op.ph, c_op.pw)) y, = conv_opr(x, w) if len(c_op.inputs) == 3: # with bias b = converter.get_variable(c_op.inputs[2]) b.order.unify(OrderC) y = y + b converter.set_variable(c_op.outputs[0](), y)
def convert_layer_conv2d(self, layer_config: Dict[str, object], inputs: List[Variable]) -> List[Variable]: """ Example: {'class_name': 'Conv2D', 'config': {'activation': 'relu', 'activity_regularizer': None, 'bias_constraint': None, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'bias_regularizer': None, 'data_format': 'channels_last', 'dilation_rate': [1, 1], 'filters': 64, 'kernel_constraint': None, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'distribution': 'uniform', 'mode': 'fan_avg', 'scale': 1.0, 'seed': None}}, 'kernel_regularizer': None, 'kernel_size': [3, 3], 'name': 'conv2d_2', 'padding': 'valid', 'strides': [1, 1], 'trainable': True, 'use_bias': True}}, :param layer_config: :param inputs: :return: """ assert len(inputs) == 1 input = inputs[0] name: str = layer_config["name"] weight_array = self.weights[f"{name}/{name}/kernel:0"].value assert layer_config["data_format"] == "channels_last" weight_var = ConstantVariable(weight_array, OrderHWCN) # order does not depend on data_format ksize: Tuple[int, int] = tuple(layer_config["kernel_size"]) stride: Tuple[int, int] = tuple(layer_config["strides"]) padding_keras: str = layer_config["padding"] # valid or same if isinstance(padding_keras, tuple): # preprocess_zeropadding2d padding = padding_keras elif padding_keras == "valid": padding = (0, 0) elif padding_keras == "same": padding = (ksize[0] // 2, ksize[1] // 2) else: raise ValueError("Unknown padding") conv2d_opr = Convolution2D(name, ksize=ksize, stride=stride, padding=padding) y, = conv2d_opr(input, weight_var) if layer_config["use_bias"]: bias_array = self.weights[f"{name}/{name}/bias:0"].value bias_var = ConstantVariable(bias_array, OrderC) bias_opr = AxiswiseBias(name + "_bias", Axis.C) y, = bias_opr(y, bias_var) act_opr: Operator = None activation_type: str = layer_config["activation"] if activation_type == "relu": act_opr = Relu(name + "_activation") elif activation_type == "softmax": warn("omitting softmax activation") elif activation_type == "linear": pass else: raise NotImplementedError(f"Unknown activation {activation_type}") if act_opr is not None: y, = act_opr(y) return [y]
def get(base: Convolution2D): if base.has_attribute(ConvFilterPruned): return base.get_attribute(ConvFilterPruned)[0]
def has(base: Convolution2D): return base.has_attribute(ConvFilterPruned)
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for conv in traverse.filter_nodes( traverse.listup_operators(graph), Convolution2D): # type: Convolution2D x = conv.inputs["x"] w = conv.inputs["w"] y = conv.outputs["y"] if not isinstance(w, ConstantVariable): continue C2 = w.shape_dict[Axis.N] KH = w.shape_dict[Axis.H] KW = w.shape_dict[Axis.W] C1 = w.shape_dict[Axis.C] if conv.has_attribute(Convolution2DSvdCompressed): continue if KH != conv.PH * 2 + 1 or KW != conv.PW * 2 + 1 or conv.SH != 1 or conv.SW != 1 or conv.DH != 1 or conv.DW != 1: # TODO: Is this constraint required? continue w_copy = w.copy() w_copy.change_order(OrderNHWC) d = w_copy.data.reshape((C2 * KH * KW, C1)) d_expand, d_squeeze = _svd(d, 0.5) C3 = d_expand.shape[1] """ Computation complexity: before) After) C1*C2*KH*KW > C1*C3 + C3*C2*KH*KW <=> (C1*C2*KH*KW) / (C1+C2*KH*KW) > C3 """ relative_complexity = (C1 * C3 + C3 * C2 * KH * KW) / (C1 * C2 * KH * KW) if relative_complexity >= 1: """ In this case, decomposition makes convolution more complex """ continue conv.remove_all() w_expand = ConstantVariable(d_expand.reshape([C2, KH, KW, C3]), OrderNHWC) w_squeeze = ConstantVariable(d_squeeze.reshape([C3, 1, 1, C1]), OrderNHWC) conv1 = Convolution2D(None, ksize=1, stride=1, padding=0, dilation_rate=1) conv2 = Convolution2D(None, ksize=conv.ksize, stride=conv.stride, padding=conv.padding, dilation_rate=conv.dilation_rate) h, = conv1(x, w_squeeze) y_new, = conv2(h, w_expand) conv1.attributes.add(Convolution2DSvdCompressed(conv1)) conv2.attributes.add(Convolution2DSvdCompressed(conv2)) OptimizeRule.replace_variable(graph, y_new.transpose_like(y), y) flag_changed = True return graph, flag_changed