def __call__(self, inputs: List[Variable]) -> Tuple[Variable]: # noinspection PyUnresolvedReferences linear_opr = Linear(generate_unique_name(self.cfunc.label)) x = inputs[0] w = inputs[1] if x.ndim == 4 and w.ndim == 2: # wを4次元に拡張 (NC -> NCHW) x_shape_dict = x.shape_dict w_shape_dict = w.shape_dict assert x_shape_dict[Axis.C] * x_shape_dict[Axis.H] * x_shape_dict[Axis.W] == w_shape_dict[Axis.C] assert w.order is OrderNC w.order = OrderNCHW w_new_shape = [w_shape_dict[Axis.N], x_shape_dict[Axis.C], x_shape_dict[Axis.H], x_shape_dict[Axis.W]] w.shape = w_new_shape w.data = w.data.reshape(w_new_shape) opr_out, = linear_opr(inputs[0], inputs[1]) if len(inputs) == 3: # biasあり # noinspection PyUnresolvedReferences bias_opr = AxiswiseBias(generate_unique_name(self.cfunc.label), axis=Axis.C) self.hidden_vars.append(opr_out) opr_out, = bias_opr(opr_out, inputs[2]) return opr_out,
def convert_layer_dense(self, layer_config: Dict[str, object], inputs: List[Variable]) -> List[Variable]: assert len(inputs) == 1 input = inputs[0] name: str = layer_config["name"] weight_array = self.weights[f"{name}/{name}/kernel:0"].value weight_var = ConstantVariable(weight_array, OrderCN) # shape: (in, out) linear_opr = Linear(name) y, = linear_opr(input, weight_var) if layer_config["use_bias"]: bias_array = self.weights[f"{name}/{name}/bias:0"].value bias_var = ConstantVariable(bias_array, OrderC) bias_opr = AxiswiseBias(name + "_bias", Axis.C) y, = bias_opr(y, bias_var) act_opr: Operator = None activation_type: str = layer_config["activation"] if activation_type == "relu": act_opr = Relu(name + "_activation") elif activation_type == "softmax": warn("omitting softmax activation") else: raise NotImplementedError(f"Unknown activation {activation_type}") if act_opr is not None: y, = act_opr(y) return [y]
def _convert_linear_function( converter: ChainerConverter, c_op: "chainer.functions.connection.linear.LinearFunction"): linear_opr = Linear(None) x = converter.get_variable(c_op.inputs[0]) w = converter.get_variable(c_op.inputs[1]) if x.ndim == 4 and w.ndim == 2: # wを4次元に拡張 (NC -> NCHW) x_shape_dict = x.shape_dict w_shape_dict = w.shape_dict assert x_shape_dict[Axis.C] * x_shape_dict[Axis.H] * x_shape_dict[ Axis.W] == w_shape_dict[Axis.C] assert w.order is OrderNC w.order = OrderNCHW w_new_shape = [ w_shape_dict[Axis.N], x_shape_dict[Axis.C], x_shape_dict[Axis.H], x_shape_dict[Axis.W] ] w.shape = w_new_shape w.data = w.data.reshape(w_new_shape) y, = linear_opr(x, w) if len(c_op.inputs) == 3: # with bias bias_opr = AxiswiseBias(None, axis=Axis.C) bias = converter.get_variable(c_op.inputs[2]) y, = bias_opr(y, bias) converter.set_variable(c_op.outputs[0](), y)
def convert_layer_batchnormalization(self, layer_config: Dict[str, object], inputs: List[Variable]) -> List[ Variable]: """ Example: {'class_name': 'BatchNormalization', 'config': {'axis': 3, 'beta_constraint': None, 'beta_initializer': {'class_name': 'Zeros', 'config': {}}, 'beta_regularizer': None, 'center': True, 'epsilon': 0.001, 'gamma_constraint': None, 'gamma_initializer': {'class_name': 'Ones', 'config': {}}, 'gamma_regularizer': None, 'momentum': 0.99, 'moving_mean_initializer': {'class_name': 'Zeros', 'config': {}}, 'moving_variance_initializer': {'class_name': 'Ones', 'config': {}}, 'name': 'bn2a_branch2a', 'scale': True, 'trainable': True}, 'inbound_nodes': [[['res2a_branch2a', 0, 0, {}]]], 'name': 'bn2a_branch2a'}, :param layer_config: :param inputs: :return: """ assert len(inputs) == 1 input = inputs[0] name: str = layer_config["name"] axis = input.order.axes[layer_config["axis"]] mean = self.weights[f"{name}/{name}/moving_mean:0"].value variance = self.weights[f"{name}/{name}/moving_variance:0"].value if layer_config["scale"]: gamma = self.weights[f"{name}/{name}/gamma:0"].value else: gamma = np.ones_like(variance) if layer_config["center"]: beta = self.weights[f"{name}/{name}/beta:0"].value else: beta = np.zeros_like(mean) # (x - mean) / sqrt(var + eps) * gamma + beta # gamma_div_std = gamma / sqrt(var + eps) # beta_scaled = beta - mean * gamma_div_std # y = x * gamma_div_std + beta_scaled gamma_div_std = gamma / np.sqrt(variance + layer_config["epsilon"]) beta_scaled = beta - mean * gamma_div_std scale_opr = AxiswiseScale(name + "_scale", axis=axis) bias_opr = AxiswiseBias(name + "_bias", axis=axis) scale_out, = scale_opr(input, ConstantVariable(gamma_div_std, OrderC)) y, = bias_opr(scale_out, ConstantVariable(beta_scaled, OrderC)) return [y]
def test_axiswise_bias(): """ before) b -+ +-{AxiswiseBias}- y x -+ after) b -+ +-{ElementwiseAdd}- y x -+ """ b = ConstantVariable(np.random.rand(3), OrderC) x = Variable((2, 3, 4, 5), OrderNCHW) y, = AxiswiseBias(None, axis=Axis.C)(x, b) assert isinstance(y.output_from, AxiswiseBias) UpgradeOperatorType().optimize(Graph([x], [y])) assert isinstance(y.output_from, ElementwiseAdd)
def test_conv_bias(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) bias = AxiswiseBias(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) b_shape = [h.shape_dict[Axis.C]] b = ConstantVariable(arange_shaped(b_shape), OrderC) b_data = b.data.copy() y, = bias(h, b) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) w_data_expected = w_data b_data_expected = b_data ops = listup_operators(graph) assert len(ops) == 2 and isinstance( ops[0], Convolution2D) and isinstance(ops[1], AxiswiseBias) assert np.all(np.equal(ops[0].inputs["w"].data, w_data_expected)) assert np.all(np.equal(ops[1].inputs["b"].data, b_data_expected))
def _convert_batch_normalization(converter: KerasConverter, k_op: keras.layers.BatchNormalization): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) axis = x.order.axes[k_op.axis] variance_data, mean_data = K.batch_get_value( [k_op.moving_variance, k_op.moving_mean]) if k_op.scale: gamma_data, = K.batch_get_value([k_op.gamma]) else: gamma_data = np.ones_like(variance_data) if k_op.center: beta_data, = K.batch_get_value([k_op.beta]) else: beta_data = np.zeros_like(mean_data) gamma_div_std_data = gamma_data / np.sqrt(variance_data + k_op.epsilon) beta_scaled_data = beta_data - mean_data * gamma_div_std_data gamma_div_std = ConstantVariable(gamma_div_std_data, Order([axis])) beta_scaled = ConstantVariable(beta_scaled_data, Order([axis])) y, = AxiswiseScale(None, axis=axis)(x, gamma_div_std) y, = AxiswiseBias(None, axis=axis)(y, beta_scaled) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def square_converter_handler(converter, keras_layer): keras_x = converter.get_input_tensor(keras_layer)[0] webdnn_x = converter.get_variable(keras_x) webdnn_b = converter.convert_to_constant_variable(keras_layer.bias, OrderC) webdnn_operator = AxiswiseBias(None, axis=Axis.C) webdnn_y, = webdnn_operator(webdnn_x, webdnn_b) keras_y = converter.get_output_tensor(keras_layer)[0] converter.set_variable(keras_y, webdnn_y)
def _convert_dense(converter: KerasConverter, k_op: keras.layers.Dense): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) w = converter.convert_to_constant_variable(k_op.kernel, OrderCN) y, = Linear(None)(x, w) if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) y, = AxiswiseBias(None, Axis.C)(y, b) y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def test_major_axis(): vx = np.random.rand(10, 6, 4, 8) vb = np.random.rand(10) vy = vx + vb[:, None, None, None] x = Variable(vx.shape, order=OrderCNHW) b = ConstantVariable(vb, order=OrderC) y, = AxiswiseBias(None, axis=Axis.C)(x, b) generate_kernel_test_case(description=f"AxiswiseBias for major axis", backend=["webgpu", "fallback"], graph=Graph([x], [y]), inputs={x: vx}, expected={y: vy})
def generate_graph_model2(caption_net, hidden_num): # inputs var_input_img = Variable([1, 1, hidden_num], OrderNTC) var_input_word = Variable([1, 1], OrderNT) var_switch_img = Variable([1, 1, hidden_num], OrderNTC) var_switch_word = Variable([1, 1, hidden_num], OrderNTC) var_last_h = Variable([1, hidden_num], OrderNC) var_last_c = Variable([1, hidden_num], OrderNC) # prepare for lstm var_emb_word, = Embedding(None)(var_input_word, ConstantVariable( caption_net.word_vec.W.data, OrderCN)) # OrderNTC var_lstm_input = (var_emb_word * var_switch_word) + \ (var_input_img * var_switch_img) # lstm lstm_opr = LSTM(None, use_bias=True, return_sequences=False, activation="tanh", recurrent_activation="sigmoid", use_initial_h=True, use_initial_c=True) w_input = _convert_lstm_to_webdnn_order(caption_net.lstm.upward.W.data.T) w_hidden = _convert_lstm_to_webdnn_order(caption_net.lstm.lateral.W.data.T) b = _convert_lstm_to_webdnn_order( caption_net.lstm.upward.b.data[None, :])[0] var_lstm_h, var_lstm_c = lstm_opr( x=var_lstm_input, w_input=ConstantVariable(w_input, OrderCN), w_hidden=ConstantVariable(w_hidden, OrderCN), b=ConstantVariable(b, OrderC), initial_h=var_last_h, initial_c=var_last_c) # word probability var_word_score, = Linear(None)(var_lstm_h, ConstantVariable( caption_net.out_word.W.data.T, OrderCN)) var_word_score_biased, = AxiswiseBias(None, axis=Axis.C)( var_word_score, ConstantVariable(caption_net.out_word.b.data, OrderC)) var_word_prob, = Softmax(None, axis=Axis.C)(var_word_score_biased) return Graph([ var_input_img, var_input_word, var_switch_img, var_switch_word, var_last_h, var_last_c ], [var_word_prob, var_lstm_h, var_lstm_c])
def _compress_sequence(self, seq: List[Operator]): # Convolution2D|LinearとAxiswiseBiasだけに変更する conv_op = seq[0] conv_out = conv_op.outputs["y"] n_channels = conv_out.shape_dict[Axis.C] # scale, biasの集計 merged_scale = np.ones((n_channels, ), dtype=np.float32) merged_bias = np.zeros((n_channels, ), dtype=np.float32) bias_found = False for op in seq[1:]: if isinstance(op, AxiswiseScale): weight_var = op.inputs["s"] merged_scale *= weight_var.data merged_bias *= weight_var.data elif isinstance(op, AxiswiseBias): weight_var = op.inputs["b"] merged_bias += weight_var.data bias_found = True else: raise NotImplementedError() # Conv/Linearの出力チャンネル(N)にscaleをかける conv_weight_var = conv_op.inputs["w"] out_channel_pos = conv_weight_var.order.axes_dict[Axis.N] broadcast = [None] * conv_weight_var.order.ndim broadcast[out_channel_pos] = slice(None) # HWNCなら、broadcast==[None, None, :, None] conv_weight_var.data *= merged_scale[broadcast] final_out = seq[-1].outputs["y"] if bias_found: # Scale/Biasレイヤーを削除して、新しいBiasレイヤーを元々の出力につなぐ for op in seq[1:]: op.remove_all() const_bias = ConstantVariable(merged_bias, OrderC) bias_op = AxiswiseBias(conv_op.name + "_tail_bias", axis=Axis.C) bias_op.append_input("x", conv_out) bias_op.append_input("b", const_bias) bias_op.append_output("y", final_out) else: # Biasはないので、Convレイヤーの出力がブロック全体の出力になる for op in seq[1:]: op.remove_all() conv_op.remove_output(conv_out) conv_op.append_output("y", final_out)
def test_HWNC(): vx = np.random.rand(6, 4, 10, 8) vb = np.random.rand(8) vy = vx + vb[None, None, None, :] x = Variable(vx.shape, order=OrderHWNC) b = ConstantVariable(vb, order=OrderC) y, = AxiswiseBias(None, axis=Axis.C)(x, b) generate_kernel_test_case( description=f"AxiswiseBias for input OrderHWNC", backend=["webgpu", "webassembly", "fallback"], graph=Graph([x], [y]), inputs={x: vx}, expected={y: vy} )
def _convert_batch_normalization_function( converter: ChainerConverter, c_op: chainer.functions.normalization. batch_normalization.BatchNormalizationFunction): x = converter.get_variable(c_op.inputs[0]) gamma = converter.get_variable(c_op.inputs[1]) beta = converter.get_variable(c_op.inputs[2]) if len(c_op.inputs) == 5: # noinspection PyUnresolvedReferences mean_data = converter.get_variable(c_op.inputs[3]).data # noinspection PyUnresolvedReferences variance_data = converter.get_variable(c_op.inputs[4]).data elif len(c_op.inputs) == 3: variance_data = c_op.running_var mean_data = c_op.running_mean else: raise ValueError( "inputs to BatchNormalizationFunction have to be 5 or 3.") console.debug(variance_data) # Simplify scale and bias # # from: # y = (x - mean) / sqrt(var + eps) * gamma + beta # # to: # y = x * gamma_div_std + beta_scaled # # gamma_div_std = gamma / sqrt(var + eps) # beta_scaled = beta - mean * gamma_div_std # noinspection PyUnresolvedReferences gamma_div_std = gamma.data / np.sqrt(variance_data + c_op.eps) # noinspection PyUnresolvedReferences beta_scaled = beta.data - mean_data * gamma_div_std scale_opr = AxiswiseScale(None, axis=Axis.C) gamma_div_std_const = ConstantVariable(gamma_div_std, OrderC) scale_out, = scale_opr(x, gamma_div_std_const) offset_opr = AxiswiseBias(None, axis=Axis.C) beta_scaled_const = ConstantVariable(beta_scaled, OrderC) offset_out, = offset_opr(scale_out, beta_scaled_const) converter.set_variable(c_op.outputs[0](), offset_out)
def test_middle_axis(): vx = np.random.rand(10, 6, 4, 8) vb = np.random.rand(6) vy = vx + vb[None, :, None, None] x = Variable(vx.shape, order=OrderNCHW) b = Variable(vb.shape, order=OrderC) y, = AxiswiseBias(None, axis=Axis.C)(x, b) generate_kernel_test_case(description=f"AxiswiseBias for middle axis", backend=["webgpu", "webassembly", "fallback"], graph=Graph([x, b], [y]), inputs={ x: vx, b: vb }, expected={y: vy})
def test_mix_order(): vx = np.random.rand(10, 6, 4, 8) vb = np.random.rand(10) vy = vx + vb[:, None, None, None] x = Variable(vx.shape, order=OrderCNHW) b = ConstantVariable(vb, order=OrderC) y, = AxiswiseBias(None, axis=Axis.C)(x, b) x.change_order(OrderNHWC) vx = np.rollaxis(vx, 0, 4) generate_kernel_test_case(description=f"AxiswiseBias for mix order", backend=["webgpu"], graph=Graph([x], [y]), inputs={x: vx}, expected={y: vy})
def __call__(self, inputs: List[Variable]) -> Tuple[Variable]: w = inputs[1] w_shape_dict = w.shape_dict conv_opr = Deconvolution2D(generate_unique_name(self.cfunc.label), ksize=(w_shape_dict[Axis.H], w_shape_dict[Axis.W]), stride=(self.cfunc.sy, self.cfunc.sx), padding=(self.cfunc.ph, self.cfunc.pw)) opr_out, = conv_opr(inputs[0], inputs[1]) opr_out.change_order(OrderNCHW) if len(inputs) == 3: # biasあり bias_opr = AxiswiseBias(generate_unique_name(self.cfunc.label), axis=Axis.C) self.hidden_vars.append(opr_out) opr_out, = bias_opr(opr_out, inputs[2]) return opr_out,
def _convert_deconvolution_2d( converter: ChainerConverter, c_op: "chainer.functions.connection.deconvolution_2d.Deconvolution2DFunction"): x = converter.get_variable(c_op.inputs[0]) w = converter.get_variable(c_op.inputs[1]) deconv_opr = Deconvolution2D(None, ksize=(w.shape_dict[Axis.H], w.shape_dict[Axis.W]), stride=(c_op.sy, c_op.sx), padding=(c_op.ph, c_op.pw)) y, = deconv_opr(x, w) if len(c_op.inputs) == 3: # with bias bias_opr = AxiswiseBias(None, axis=Axis.C) bias = converter.get_variable(c_op.inputs[2]) y, = bias_opr(y, bias) converter.set_variable(c_op.outputs[0](), y)
def test_every_order(): orders_x = [ OrderNHWC, OrderHWNC, OrderHWCN, OrderNCHW, OrderCNHW, OrderCHWN ] axes = [Axis.C] default_order = {1: OrderC, 2: OrderNC, 4: OrderNHWC, Axis.C: OrderC} for order_x, axis in itertools.product(orders_x, axes): if axis not in order_x.axes: continue op = AxiswiseBias(None, axis=axis) x = Variable(np.arange(order_x.ndim) + 1, default_order[order_x.ndim]) x.change_order(order_x) w = Variable((x.shape_dict[axis], ), default_order[axis]) y, = op(x, w) for axis in y.order.axes: assert y.shape_dict[axis] == x.shape_dict[axis]
def test_conv_scale_bias(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) scale = AxiswiseScale(None, axis=Axis.C) bias = AxiswiseBias(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) s_shape = [h.shape_dict[Axis.C]] s = ConstantVariable(arange_shaped(s_shape), OrderC) s_data = s.data.copy() h, = scale(h, s) b_shape = [h.shape_dict[Axis.C]] b = ConstantVariable(arange_shaped(b_shape), OrderC) b_data = b.data.copy() y, = bias(h, b) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) # noinspection PyTypeChecker expander = (None, ) * order_w.axes_dict[Axis.N] + ( Ellipsis, ) + (None, ) * (3 - order_w.axes_dict[Axis.N]) w_data_expected = w_data * s_data[expander] b_data_expected = b_data ops = listup_operators(graph) assert len(ops) == 2 and isinstance( ops[0], Convolution2D) and isinstance(ops[1], AxiswiseBias) assert np.all(np.equal(ops[0].inputs["w"].data, w_data_expected)) assert np.all(np.equal(ops[1].inputs["b"].data, b_data_expected))
def _convert_conv2d(converter: KerasConverter, k_op: keras.layers.Conv2D): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) if k_op.data_format == "channels_first": assert x.order == OrderNCHW elif k_op.data_format == "channels_last": assert x.order == OrderNHWC else: raise ValueError( f"[KerasConverter] Unknown data format is detected: {k_op.data_format}" ) w = converter.convert_to_constant_variable(k_op.kernel, OrderHWCN) ksize = tuple(k_op.kernel_size) stride = tuple(k_op.strides) dilation_rate = tuple(k_op.dilation_rate) if k_op.padding == "valid": padding = (0, 0) elif k_op.padding == "same": padding = (ksize[0] // 2, ksize[1] // 2) else: raise ValueError(f"[KerasConverter] Unknown padding: {k_op.padding}") y, = Convolution2D(None, ksize=ksize, stride=stride, padding=padding, dilation_rate=dilation_rate)(x, w) if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) y, = AxiswiseBias(None, Axis.C)(y, b) y = do_activation(k_op.activation, y) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def _convert_selected_item( converter: ChainerConverter, c_op: chainer.functions.connection. dilated_convolution_2d.DilatedConvolution2DFunction): x = converter.get_variable(c_op.inputs[0]) w = converter.get_variable(c_op.inputs[1]) # when dx == 1, it means ordinary convolution. conv_opr = Convolution2D(None, ksize=(w.shape_dict[Axis.H], w.shape_dict[Axis.W]), stride=(c_op.sy, c_op.sx), padding=(c_op.ph, c_op.pw), dilation_rate=(c_op.dx, c_op.dy)) y, = conv_opr(x, w) if len(c_op.inputs) == 3: # with bias bias_opr = AxiswiseBias(None, axis=Axis.C) bias = converter.get_variable(c_op.inputs[2]) y, = bias_opr(y, bias) converter.set_variable(c_op.outputs[0](), y)
def __call__(self, inputs: List[Variable]) -> Tuple[Variable]: assert len(inputs) == 5 x, gamma, beta, mean, variance = inputs # x以外の変数は、加工して新しいConstantとして使う # (x - mean) / sqrt(var + eps) * gamma + beta # gamma_div_std = gamma / sqrt(var + eps) # beta_scaled = beta - mean * gamma_div_std # y = x * gamma_div_std + beta_scaled gamma_div_std = gamma.data / np.sqrt(variance.data + self.cfunc.eps) beta_scaled = beta.data - mean.data * gamma_div_std scale_opr = AxiswiseScale(generate_unique_name(self.cfunc.label), axis=Axis.C) gamma_div_std_const = ConstantVariable(gamma_div_std, OrderC) scale_out, = scale_opr(x, gamma_div_std_const) self.hidden_vars.append(scale_out) self.hidden_consts.append(gamma_div_std_const) offset_opr = AxiswiseBias(generate_unique_name(self.cfunc.label), axis=Axis.C) beta_scaled_const = ConstantVariable(beta_scaled, OrderC) offset_out, = offset_opr(scale_out, beta_scaled_const) self.hidden_consts.append(beta_scaled_const) return offset_out,
def convert_layer_conv2d(self, layer_config: Dict[str, object], inputs: List[Variable]) -> List[Variable]: """ Example: {'class_name': 'Conv2D', 'config': {'activation': 'relu', 'activity_regularizer': None, 'bias_constraint': None, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'bias_regularizer': None, 'data_format': 'channels_last', 'dilation_rate': [1, 1], 'filters': 64, 'kernel_constraint': None, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'distribution': 'uniform', 'mode': 'fan_avg', 'scale': 1.0, 'seed': None}}, 'kernel_regularizer': None, 'kernel_size': [3, 3], 'name': 'conv2d_2', 'padding': 'valid', 'strides': [1, 1], 'trainable': True, 'use_bias': True}}, :param layer_config: :param inputs: :return: """ assert len(inputs) == 1 input = inputs[0] name: str = layer_config["name"] weight_array = self.weights[f"{name}/{name}/kernel:0"].value assert layer_config["data_format"] == "channels_last" weight_var = ConstantVariable(weight_array, OrderHWCN) # order does not depend on data_format ksize: Tuple[int, int] = tuple(layer_config["kernel_size"]) stride: Tuple[int, int] = tuple(layer_config["strides"]) padding_keras: str = layer_config["padding"] # valid or same if isinstance(padding_keras, tuple): # preprocess_zeropadding2d padding = padding_keras elif padding_keras == "valid": padding = (0, 0) elif padding_keras == "same": padding = (ksize[0] // 2, ksize[1] // 2) else: raise ValueError("Unknown padding") conv2d_opr = Convolution2D(name, ksize=ksize, stride=stride, padding=padding) y, = conv2d_opr(input, weight_var) if layer_config["use_bias"]: bias_array = self.weights[f"{name}/{name}/bias:0"].value bias_var = ConstantVariable(bias_array, OrderC) bias_opr = AxiswiseBias(name + "_bias", Axis.C) y, = bias_opr(y, bias_var) act_opr: Operator = None activation_type: str = layer_config["activation"] if activation_type == "relu": act_opr = Relu(name + "_activation") elif activation_type == "softmax": warn("omitting softmax activation") elif activation_type == "linear": pass else: raise NotImplementedError(f"Unknown activation {activation_type}") if act_opr is not None: y, = act_opr(y) return [y]
def test_invalid_size(): op = AxiswiseBias(None, axis=Axis.C) x = Variable((2, 3, 4, 5), OrderNHWC) w = Variable((6,), OrderC) y, = op(x, w)