def test_qpooling_in_model_quantize(): input_size = (16, 16, 3) pool_size = (2, 2) x = Input(input_size) xin = x x = AveragePooling2D(pool_size=pool_size, name="pooling")(x) x = GlobalAveragePooling2D(name="global_pooling")(x) model = Model(inputs=xin, outputs=x) quantize_config = { "QAveragePooling2D": { "average_quantizer": "binary", "activation_quantizer": "binary" }, "QGlobalAveragePooling2D": { "average_quantizer": "quantized_bits(4, 0, 1)", "activation_quantizer": "ternary" } } qmodel = model_quantize(model, quantize_config, 4) print_qstats(qmodel) assert_equal(str(qmodel.layers[1].average_quantizer_internal), "binary()") assert_equal(str(qmodel.layers[1].activation), "binary()") assert_equal(str(qmodel.layers[2].average_quantizer_internal), "quantized_bits(4,0,1)") assert_equal(str(qmodel.layers[2].activation), "ternary()")
def test_birnn_subrnn(): model = Sequential([Bidirectional(LSTM(16)), LSTM(8)]) d = { 'QLSTM': { 'activation_quantizer': 'ternary', 'recurrent_activation_quantizer': 'ternary', 'kernel_quantizer': 'ternary', 'recurrent_quantizer': 'ternary', 'bias_quantizer': 'ternary', 'state_quantizer': 'ternary', }, 'QBidirectional': { 'activation_quantizer': 'binary', 'recurrent_activation_quantizer': 'binary', 'kernel_quantizer': 'binary', 'recurrent_quantizer': 'binary', 'bias_quantizer': 'binary', 'state_quantizer': 'binary', } } qmodel = model_quantize(model, d, 4) layer = qmodel.layers[1] assert str(layer.kernel_quantizer) == 'ternary' assert str(layer.recurrent_quantizer) == 'ternary' assert str(layer.bias_quantizer) == 'ternary' assert str(layer.state_quantizer) == 'ternary' assert str(layer.activation) == 'ternary()'
def getQuantizedFromMaps(full_model, fold, input_shape, full_model_path="one_hot_v2/full_0/saved_model.h5"): qmodels = [] transferWeights = False try: model = tf.keras.models.load_model(full_model_path) transferWeights = True except: model = full_model for name, dict_ in allQDictionaries.items(): # Workaround for deserialization from JSON (used by model_quantize) not # setting _USE_V2_BEHAVIOR=True thus using old V1 implementation custom_objects = { 'BatchNormalization': tf.keras.layers.BatchNormalization } qmodel = model_quantize(model, config, bitwidth, custom_objects=custom_objects, transfer_weights=transferWeights) qmodel._name = 'quantized_%s_%i' % (name, fold) qmodels.append(qmodel) return qmodels
def test_birnn_conversion(rnn): m = create_network_birnn(rnn) name = 'Q' + m.layers[1].layer.__class__.__name__ d = { 'QBidirectional': { 'kernel_quantizer': 'binary', 'recurrent_quantizer': 'binary', 'bias_quantizer': 'binary', 'activation_quantizer': 'binary', } } if name != 'QSimpleRNN': d['QBidirectional']['recurrent_activation_quantizer'] = 'binary' qq = model_quantize(m, d, 4) layer = qq.layers[1].layer assert str(layer.kernel_quantizer) == 'binary' assert str(layer.recurrent_quantizer) == 'binary' assert str(layer.bias_quantizer) == 'binary' assert str(layer.activation) == 'binary()' if name != 'QSimpleRNN': assert str(layer.recurrent_activation) == 'binary()' backward_layer = qq.layers[1].backward_layer # backwards weight quantizers are dict because of contraints.serialize assert str(backward_layer.kernel_quantizer['class_name']) == 'binary' assert str(backward_layer.recurrent_quantizer['class_name']) == 'binary' assert str(backward_layer.bias_quantizer['class_name']) == 'binary' assert str(backward_layer.activation) == 'binary()' if name != 'QSimpleRNN': assert str(backward_layer.recurrent_activation) == 'binary()'
def test_new_forgiving_factor(): """Tests forgiving factor.""" delta_p = 8.0 delta_n = 8.0 rate = 2.0 stress = 1.0 input_bits = 8 output_bits = 8 ref_bits = 8 config = { "QDense": ["parameters", "activations"], "Dense": ["parameters", "activations"], "QConv2D": ["parameters", "activations"], "Conv2D": ["parameters", "activations"], "DepthwiseConv2D": ["parameters", "activations"], "QDepthwiseConv2D": ["parameters", "activations"], "Activation": ["activations"], "QActivation": ["activations"], "QBatchNormalization": ["parameters"], "BatchNormalization": ["parameters"], "default": ["activations"] } model = get_model() ffb = ForgivingFactorBits( delta_p, delta_n, rate, stress, input_bits, output_bits, ref_bits, config ) cached_result = ffb.compute_model_size(model) ref_size = cached_result[0] ref_p = cached_result[1] ref_a = cached_result[2] ref_size_dict = cached_result[3] assert ref_size == 258544 assert ref_p == 43720 assert ref_a == 214824 q_dict = { "c1": { "kernel_quantizer": "binary", "bias_quantizer": "quantized_bits(4)" } } q_model = model_quantize(model, q_dict, 4) cached_result = ffb.compute_model_size(q_model) trial_size_dict = cached_result[3] for name in trial_size_dict: if name != "c1": assert trial_size_dict[name] == ref_size_dict[name] assert trial_size_dict["c1"]["parameters"] == 416
def test_automatic_conversion_from_relu_to_qr(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }} qq = model_quantize(m, d, 4) assert str(qq.layers[3].activation) == "quantized_relu(4,0)"
def test_conversion_print_qstats(): # this tests if references in tensorflow are working properly. m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QActivation": { "relu": "ternary" } } qq = model_quantize(m, d, 4) qq.summary() print_qstats(qq) # test if print_qstats works with unquantized layers print_qstats(m) # test if print_qstats works with mixture of quantized and unquantized layers m1 = create_mix_network() print_qstats(m1) m2 = create_network_with_bn() d2 = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QActivation": { "relu": "ternary" }, "QConv2DBatchnorm": { "kernel_quantizer": "ternary", "bias_quantizer": "ternary", }, "QDepthwiseConv2DBatchnorm": { "depthwise_quantizer": "ternary", "bias_quantizer": "ternary", }, } m2 = model_quantize(m2, d2, 4, enable_bn_folding=True) m2.summary() print_qstats(m2)
def test_linear_activation_conversion(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary", "activation_quantizer": "binary" } } qq = model_quantize(m, d, 4) assert str(qq.layers[1].activation) == "binary()"
def test_network_quantization(rnn): model = Sequential([rnn(16)]) jm = copy.deepcopy(json.loads(model.to_json())) config = jm["config"] layers = config["layers"] d = { f"Q{layers[0]['class_name']}": { "kernel_quantizer": "binary", "recurrent_quantizer": "binary", "bias_quantizer": "binary" } } qmodel = model_quantize(model, d, 4) assert str(qmodel.layers[0].activation) == "quantized_tanh(4,0)"
def test_conversion_qadaptiveactivation_with_preference(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "relu_act": { "relu": "quantized_relu(8)" } } # Test with QActivation preference qq1 = model_quantize(m, d, 4, prefer_qadaptiveactivation=False) assert qq1.layers[2].__class__.__name__ == "QActivation" assert str(qq1.layers[2].quantizer).startswith("quantized_relu(8,") assert qq1.layers[4].__class__.__name__ == "Activation" # Test with QAdaptiveActivation preference qq2 = model_quantize(m, d, 4, prefer_qadaptiveactivation=True) assert qq2.layers[2].__class__.__name__ == "QAdaptiveActivation" assert str(qq2.layers[2].quantizer).startswith("quantized_relu(8,") assert qq2.layers[4].__class__.__name__ == "Activation"
def test_conversion_from_relu_activation_to_qadaptiveactivation(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QAdaptiveActivation": { "relu": "quantized_relu(8)" } } qq = model_quantize(m, d, 4) assert qq.layers[2].__class__.__name__ == "QAdaptiveActivation" assert str(qq.layers[2].quantizer).startswith("quantized_relu(8,") assert qq.layers[4].__class__.__name__ == "Activation"
def test_conversion_print_qstats(): # this tests if references in tensorflow are working properly. m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QActivation": { "relu": "ternary" } } qq = model_quantize(m, d, 4) qq.summary() print_qstats(qq)
def test_conversion_from_relu_activation_to_qr_qactivation(): m = create_network() d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QActivation": { "relu": "ternary" } } qq = model_quantize(m, d, 4) assert qq.layers[2].__class__.__name__ == "QActivation" assert str(qq.layers[2].quantizer) == "ternary()" assert qq.layers[4].__class__.__name__ == "Activation"
def getQuantizedModel(precision, model, weightfile): model.load_weights(weightfile) config = build_config(precision) custom_objects = {'BatchNormalization': tf.keras.layers.BatchNormalization} qmodel = model_quantize(model, config, precision, custom_objects=custom_objects, transfer_weights=True) qmodel._name = 'quantized_%i' % (precision) for layer in qmodel.layers: if hasattr(layer, "kernel_quantizer"): print(layer.name, "kernel:", str(layer.kernel_quantizer_internal), "bias:", str(layer.bias_quantizer_internal)) elif hasattr(layer, "quantizer"): print(layer.name, "quantizer:", str(layer.quantizer)) return qmodel
def test_rnn_conversion(rnn): m = create_network_rnn(rnn) name = 'Q' + m.layers[1].__class__.__name__ d = { name: { 'kernel_quantizer': 'binary', 'recurrent_quantizer': 'binary', 'bias_quantizer': 'binary', 'activation_quantizer': 'binary', } } if name != 'QSimpleRNN': d[name]['recurrent_activation_quantizer'] = 'binary' qq = model_quantize(m, d, 4) assert str(qq.layers[1].kernel_quantizer) == 'binary' assert str(qq.layers[1].recurrent_quantizer) == 'binary' assert str(qq.layers[1].bias_quantizer) == 'binary' assert str(qq.layers[1].activation) == 'binary()' if name != 'QSimpleRNN': assert str(qq.layers[1].recurrent_activation) == 'binary()'
def create_quantized_network(): """Creates a simple quantized conv net model.""" # Create a simple model xi = Input((28, 28, 1)) x = Conv2D(32, (3, 3))(xi) x = Activation("relu")(x) x = Conv2D(32, (3, 3), activation="relu")(x) x = Activation("softmax")(x) model = Model(inputs=xi, outputs=x) # Quantize the model quantizer_config = { "QConv2D": { "kernel_quantizer": "quantized_bits(4)", "bias_quantizer": "quantized_bits(4)" }, "QActivation": { "relu": "ternary" } } activation_bits = 4 qmodel = model_quantize(model, quantizer_config, activation_bits) return qmodel
if Prune: train.setModel(model_pruned) if fullModel: setWeights(train.keras_model,fullModel) print_model_sparsity(train.keras_model) additionalCallbacks = pruning_callbacks.UpdatePruningStep() elif Quantize: try: train.keras_model = keras.models.load_model(fullModel) transferWeights = True except: print("No pretrained model found! Building new model without pretrained weights") transferWeights = False train.keras_model = model_quantize(train.keras_model, qDicts['4_bit'], 4, transfer_weights=transferWeights) #currently dense2_binary', conv2d_binary', '4_bit' print_qstats(train.keras_model) train.compileModel(learningrate=0.0001, loss='binary_crossentropy')#,binary_cross_entropy_with_extras) print(train.keras_model.summary()) model,history = train.trainModel(nepochs=30, batchsize=50,#50, checkperiod=1, # saves a checkpoint model every N epochs verbose=1, additional_callbacks = additionalCallbacks ) train.change_learning_rate(0.0003) model,history = train.trainModel(nepochs=30, batchsize=50,
def quantize_model(self, hp): """Quantize model by hyperparameter search and extracting size schema.""" # configuration for quantization. q_dict = {} model = clone_model(self.model, self.custom_objects) fanin = [] filter_range = [0.5, 0.75, 1.0, 1.5, 2.0] # network_filters=hp.Choice(...) should only be defined if we are sure # current blocks has any layer that need filter sweep. # Otherwise, when no layer needs filter sweep and a hp variable is defined, # there will be uneffective trials that loop around the network # filter range, even though none of the filter sweep was ever applied to # any layers. Therfore, we use filter_sweep_enabled to mark if any layer # in current block needs filter sweep. kernel_quantizer_dict = {} filter_sweep_enabled = False for layer in model.layers: if layer.__class__.__name__ in REGISTERED_LAYERS: kernel_quantizer, bits = self._get_quantizer( hp, layer.name + "_kernel", layer.name, layer.__class__.__name__, is_kernel=True) kernel_quantizer_dict[layer.name] = (kernel_quantizer, bits) # kernel_quantizer is not None -> layer in the current block need # to be quantized if kernel_quantizer: if (not filter_sweep_enabled and self.tune_filters in ["layer", "block"] and not self.tune_filters_exceptions.search(layer.name) and layer.__class__.__name__ in ["Dense", "Conv1D", "Conv2D"]): filter_sweep_enabled = True if layer.__class__.__name__ in SEQUENCE_LAYERS: recurrent_quantizer, _ = self._get_quantizer( hp, layer.name + "_recurrent_kernel", layer.name, layer.__class__.__name__, is_kernel=True) if self.tune_filters == "block" and filter_sweep_enabled: network_filters = hp.Choice("network_filters", values=filter_range, default=1.0) else: network_filters = 1.0 for layer_id, layer in enumerate(model.layers): # we can use these indexes to disable some layers, like the last # layer if self.layer_indexes is not None and layer_id not in self.layer_indexes: continue layer_d = {} if layer.__class__.__name__ in Q_LAYERS: weights = layer.get_weights()[0] if (layer.get_quantizers()[0] and hasattr(layer.get_quantizers()[0], "bits")): bits = layer.get_quantizers()[0].bits else: bits = 8 fanin.append(np.prod(weights.shape[:-1]) * (8. - bits) / 8.) if layer.__class__.__name__ in REGISTERED_LAYERS: # difference between depthwise and the rest is just the name # of the kernel. if layer.__class__.__name__ == "DepthwiseConv2D": kernel_name = "depthwise_quantizer" else: kernel_name = "kernel_quantizer" # sample kernel quantizer. (kernel_quantizer, bits) = kernel_quantizer_dict[layer.name] if not kernel_quantizer: continue # process fanin here if bits < 8: weights = layer.get_weights()[0] fanin.append( np.prod(weights.shape[:-1]) * (8. - bits) / 8.) # we only want to do that if we are going to quantize layer if (self.tune_filters in ["layer", "block"] and not self.tune_filters_exceptions.search(layer.name) and layer.__class__.__name__ in ["Dense", "Conv1D", "Conv2D"]): if self.tune_filters == "layer": layer_filters = hp.Choice("network_filters_" + layer.name, values=filter_range, default=1.0) else: layer_filters = network_filters if layer.__class__.__name__ == "Dense": layer.units = max(int(layer.units * layer_filters), 1) elif layer.__class__.__name__ in ["Conv1D", "Conv2D"]: layer.filters = max(int(layer.filters * layer_filters), 1) layer_d[kernel_name] = kernel_quantizer if layer.__class__.__name__ in SEQUENCE_LAYERS: layer_d['recurrent_quantizer'] = recurrent_quantizer if layer.__class__.__name__ in [ "LSTM", "GRU", "Bidirectional" ]: layer_d['recurrent_activation'], _ = self._get_quantizer( hp, layer.name + "_recurrent_activation", layer.name, layer.__class__.__name__, is_kernel=False) # if we use bias, sample quantizer. if layer.__class__.__name__ == "Bidirectional": layer_d["bias_quantizer"], bits = self._get_quantizer( hp, layer.name + "_bias", layer.name, layer.__class__.__name__, is_kernel=False) layer_d["activation"], bits = self._get_quantizer( hp, layer.name + "_activation", layer.name, layer.__class__.__name__, is_kernel=False) q_dict[layer.name] = layer_d else: if layer.use_bias: layer_d["bias_quantizer"], bits = self._get_quantizer( hp, layer.name + "_bias", layer.name, layer.__class__.__name__, is_kernel=False) # if activation is not linear/softmax we need to process it. if layer.activation is None: is_softmax = False is_linear = False else: if isinstance(layer.activation, six.string_types): is_softmax = layer.activation == "softmax" is_linear = layer.activation == "linear" else: is_softmax = layer.activation.__name__ == "softmax" is_linear = layer.activation.__name__ == "linear" if not is_softmax and not is_linear: layer_d["activation"], bits = self._get_quantizer( hp, layer.name + "_activation", layer.name, layer.__class__.__name__, is_kernel=False) q_dict[layer.name] = layer_d elif layer.__class__.__name__ in ["Reshape"]: # we cannot handle fine tuning filters per layer right now. assert self.tune_filters in ["none", "block"] # we need to make sure this pattern exists, this should only occur for # "scheduler", so the name will be complete and not a pattern. if (self.tune_filters == "none" or layer.name not in self.limit or self.tune_filters_exceptions.search(layer.name)): continue if K.image_data_format() == "channels_last": layer.target_shape = layer.target_shape[:-1] + (min( int(layer.target_shape[-1] * network_filters), 1), ) else: layer.target_shape = (int( layer.target_shape[0] * network_filters), ) + layer.target_shape[1:] elif layer.__class__.__name__ in ["Activation"]: if isinstance(layer.activation, six.string_types): is_linear = layer.activation == "linear" is_softmax = layer.activation == "softmax" else: is_linear = layer.activation.__name__ == "linear" is_softmax = layer.activation.__name__ == "softmax" # if it is a linear activation, we will notify the # quantizer we are searching for linear type of # quantizers if not is_softmax: activation, bits = self._get_quantizer( hp, layer.name + "_activation", layer.name, layer.__class__.__name__, is_kernel=False, is_linear=is_linear) if not activation: continue # look at documentation on model_quantize q_dict[layer.name] = activation elif layer.__class__.__name__ in self.limit: # mark it for conversion q_dict[layer.name] = {} else: for pattern in self.limit: if re.match(pattern, layer.name): q_dict[layer.name] = {} break q_model = model_quantize(model, q_dict, self.activation_bits, custom_objects=self.custom_objects, transfer_weights=self.transfer_weights) return q_model, fanin
def test_folded_layer_conversion(): # create a sequential model with conv2d layer and activation layers m1 = create_network() # create a sequantial model with conv2d layer followed by bn layer m2 = create_network_with_bn() # quantization config d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QDepthwiseConv2D": { "depthwise_quantizer": "binary", "bias_quantizer": "binary" }, "QConv2DBatchnorm": { "kernel_quantizer": "ternary", "bias_quantizer": "ternary", }, "QDepthwiseConv2DBatchnorm": { "depthwise_quantizer": "ternary", "bias_quantizer": "ternary", }, "relu_act": { "relu": "quantized_relu(8)" } } # test when model has no layer to fold # desired behavior: un-folded layers qq1 = model_quantize(m1, d, 4, enable_bn_folding=True) assert qq1.layers[1].__class__.__name__ == "QConv2D" assert str(qq1.layers[1].quantizers[0]).startswith("binary") # test when the 1st conv2d layers needs to fold but the 2nd conv2d layer # does not (not followed by bn layer) # desired behavior: 1st conv2d is folded, 2nd conv2d unfolded # also test the depthwiseconv2d layer should fold qq2 = model_quantize(m2, d, 4, enable_bn_folding=True) assert qq2.layers[1].__class__.__name__ == "QConv2DBatchnorm" assert str(qq2.layers[1].quantizers[0]).startswith("ternary") assert qq2.layers[3].__class__.__name__ == "QConv2D" assert str(qq2.layers[3].quantizers[0]).startswith("binary") assert qq2.layers[5].__class__.__name__ == "QDepthwiseConv2DBatchnorm" assert str(qq2.layers[5].quantizers[0]).startswith("ternary") # test when there are layers to fold but folding is disabled # desired behavior: all conv2d/depthwise2d layers are not folded qq3 = model_quantize(m2, d, 4, enable_bn_folding=False) assert qq3.layers[1].__class__.__name__ == "QConv2D" assert str(qq3.layers[1].quantizers[0]).startswith("binary") assert qq3.layers[2].__class__.__name__ == "BatchNormalization" assert str(qq3.layers[3].quantizer).startswith("quantized_relu") assert qq3.layers[6].__class__.__name__ == "QDepthwiseConv2D" assert str(qq3.layers[6].quantizers[0]).startswith("binary") # test when QConv2DBatchnorm quantizer, e.g., is not given in config # desired behavior: quantizers for QConv2DBatchnorm layer fall back to QConv2D # quantizers d = { "QConv2D": { "kernel_quantizer": "binary", "bias_quantizer": "binary" }, "QDepthwiseConv2D": { "depthwise_quantizer": "binary", "bias_quantizer": "binary" }, "relu_act": { "relu": "quantized_relu(8)" } } qq4 = model_quantize(m2, d, 4, enable_bn_folding=True) assert qq4.layers[1].__class__.__name__ == "QConv2DBatchnorm" assert str(qq4.layers[1].quantizers[0]).startswith("binary") assert qq4.layers[3].__class__.__name__ == "QConv2D" assert str(qq4.layers[3].quantizers[0]).startswith("binary") assert qq4.layers[5].__class__.__name__ == "QDepthwiseConv2DBatchnorm" assert str(qq4.layers[5].quantizers[0]).startswith("binary")
def test_no_activation_conversion_to_quantized(): m = create_network() d = {"QConv2D": {"kernel_quantizer": "binary", "bias_quantizer": "binary"}} qq = model_quantize(m, d, 4) assert qq.layers[2].__class__.__name__ == "Activation" assert qq.layers[4].__class__.__name__ == "Activation"
model.summary() q_dict = { "conv2d_0_m": { "kernel_quantizer": "binary()", "bias_quantizer": "quantized_bits(4,0,1)" }, "conv2d_1_m": { "kernel_quantizer": "ternary()", "bias_quantizer": "quantized_bits(4,0,1)" }, "act2_m": "quantized_relu(6,2)", "QActivation": { "relu": "quantized_relu(4,0)" }, "QConv2D": { "kernel_quantizer": "quantized_bits(4,0,1)", "bias_quantizer": "quantized_bits(4,0,1)" }, "QDense": { "kernel_quantizer": "quantized_bits(3,0,1)", "bias_quantizer": "quantized_bits(3,0,1)" } } qmodel = model_quantize(model, q_dict, 4) qmodel.summary() print_qstats(qmodel)
def test_sequential_model_conversion(): m = create_network_sequential() d = {"QConv2D": {"kernel_quantizer": "binary", "bias_quantizer": "binary"}} qq = model_quantize(m, d, 4) assert str(qq.layers[2].activation) == "quantized_relu(4,0)"