def get_qconv2d_batchnorm_model(input_shape, kernel_size, folding_mode, kernel_quantizer=None): num_class = 2 x = x_in = layers.Input(input_shape, name="input") x = QConv2DBatchnorm(filters=2, kernel_size=kernel_size, strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, name="foldconv2d")(x) x = layers.Flatten(name="flatten")(x) x = layers.Dense(num_class, use_bias=False, kernel_initializer="ones", name="dense")(x) x = layers.Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) return model
def _get_nonseq_folded_model(x_shape): x = x_in = layers.Input(x_shape, name="input") x1 = layers.Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), name="conv2d_1")(x) x2 = layers.Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), name="conv2d_2")(x) x = layers.Maximum()([x1, x2]) x = QConv2DBatchnorm(filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="foldconv2d")(x) x = layers.Flatten(name="flatten")(x) x = layers.Dense(2, use_bias=False, kernel_initializer="ones", name="dense")(x) model = Model(inputs=[x_in], outputs=[x]) model.layers[4].set_weights( [kernel, gamma, beta, iteration, moving_mean, moving_variance]) return model
def _get_sequantial_folded_model(x_shape): x = x_in = layers.Input(x_shape, name="input") x = QConv2DBatchnorm(filters=2, kernel_size=(2, 2), strides=(2, 2), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="foldconv2d")(x) x = QDepthwiseConv2DBatchnorm(kernel_size=(2, 2), strides=(1, 1), use_bias=False, depthwise_quantizer=kernel_quantizer, folded_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="folddepthwiseconv2d")(x) model = Model(inputs=[x_in], outputs=[x]) model.layers[1].set_weights( [kernel, gamma, beta, iteration, moving_mean, moving_variance]) return model
def test_populate_bias_quantizer_from_accumulator(): """Test populate_bias_quantizer_from_accumulator function. Define a qkeras model with a QConv2DBatchnorm layer. Set bias quantizer in the layer as None. Call populate_bias_quantizer_from_accumulator function to automatically generate bias quantizer type from the MAC accumulator type. Set the bias quantizer accordingly in the model. Call populate_bias_quantizer_from_accumulator again in this model. This time since bias quantizer is already set, populate_bias_quantizer_from_accumulator function should not change the bias quantizer. """ x_shape = (2, 2, 1) # get a qkeras model with QConv2DBatchnorm layer. Set bias quantizer in the # layer as None. x = x_in = layers.Input(x_shape, name="input") x1 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", name="conv2d_1")(x) x2 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", name="conv2d_2")(x) x = layers.Maximum()([x1, x2]) x = QActivation("quantized_relu(4, 1)")(x) x = QConv2DBatchnorm( filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", bias_quantizer=None, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode="batch_stats_folding", ema_freeze_delay=10, name="foldconv2d")(x) x1 = x x2 = layers.Flatten(name="flatten")(x) x2 = QDense(2, use_bias=False, kernel_initializer="ones", kernel_quantizer="quantized_bits(6, 2, 1)", name="dense")(x2) model = Model(inputs=[x_in], outputs=[x1, x2]) assert_equal(model.layers[5].get_quantizers()[1], None) # Call populate_bias_quantizer_from_accumulator function # to automatically generate bias quantizer from the MAC accumulator type. _ = bn_folding_utils.populate_bias_quantizer_from_accumulator( model, ["quantized_bits(8, 0, 1)"]) q = model.layers[5].get_quantizers()[1] assert_equal(q.__str__(), "quantized_bits(10,3,1)") # Call populate_bias_quantizer_from_accumulator function again # bias quantizer should not change _ = bn_folding_utils.populate_bias_quantizer_from_accumulator( model, ["quantized_bits(8, 0, 1)"]) q = model.layers[5].get_quantizers()[1] assert_equal(q.__str__(), "quantized_bits(10,3,1)")
def get_models_with_one_layer(kernel_quantizer, folding_mode, ema_freeze_delay): x_shape = (2, 2, 1) loss_fn = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3) # define a model with seperate conv2d and bn layers x = x_in = layers.Input(x_shape, name="input") x = QConv2D( filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, bias_quantizer=None, name="conv2d")(x) x = layers.BatchNormalization( axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, renorm=False, renorm_clipping=None, renorm_momentum=0.99, fused=None, trainable=True, virtual_batch_size=None, adjustment=None, name="bn")(x) unfold_model = Model(inputs=[x_in], outputs=[x]) unfold_model.compile(loss=loss_fn, optimizer=optimizer, metrics="acc") x = x_in = layers.Input(x_shape, name="input") x = QConv2DBatchnorm( filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer=kernel_quantizer, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode=folding_mode, ema_freeze_delay=ema_freeze_delay, name="foldconv2d")(x) fold_model = Model(inputs=[x_in], outputs=[x]) fold_model.compile(loss=loss_fn, optimizer=optimizer, metrics="acc") return (unfold_model, fold_model)