Exemple #1
0
def test_qpooling_in_model_quantize():
    input_size = (16, 16, 3)
    pool_size = (2, 2)

    x = Input(input_size)
    xin = x
    x = AveragePooling2D(pool_size=pool_size, name="pooling")(x)
    x = GlobalAveragePooling2D(name="global_pooling")(x)
    model = Model(inputs=xin, outputs=x)

    quantize_config = {
        "QAveragePooling2D": {
            "average_quantizer": "binary",
            "activation_quantizer": "binary"
        },
        "QGlobalAveragePooling2D": {
            "average_quantizer": "quantized_bits(4, 0, 1)",
            "activation_quantizer": "ternary"
        }
    }

    qmodel = model_quantize(model, quantize_config, 4)
    print_qstats(qmodel)
    assert_equal(str(qmodel.layers[1].average_quantizer_internal), "binary()")
    assert_equal(str(qmodel.layers[1].activation), "binary()")
    assert_equal(str(qmodel.layers[2].average_quantizer_internal),
                 "quantized_bits(4,0,1)")
    assert_equal(str(qmodel.layers[2].activation), "ternary()")
Exemple #2
0
def test_birnn_subrnn():
    model = Sequential([Bidirectional(LSTM(16)), LSTM(8)])
    d = {
        'QLSTM': {
            'activation_quantizer': 'ternary',
            'recurrent_activation_quantizer': 'ternary',
            'kernel_quantizer': 'ternary',
            'recurrent_quantizer': 'ternary',
            'bias_quantizer': 'ternary',
            'state_quantizer': 'ternary',
        },
        'QBidirectional': {
            'activation_quantizer': 'binary',
            'recurrent_activation_quantizer': 'binary',
            'kernel_quantizer': 'binary',
            'recurrent_quantizer': 'binary',
            'bias_quantizer': 'binary',
            'state_quantizer': 'binary',
        }
    }
    qmodel = model_quantize(model, d, 4)
    layer = qmodel.layers[1]
    assert str(layer.kernel_quantizer) == 'ternary'
    assert str(layer.recurrent_quantizer) == 'ternary'
    assert str(layer.bias_quantizer) == 'ternary'
    assert str(layer.state_quantizer) == 'ternary'
    assert str(layer.activation) == 'ternary()'
Exemple #3
0
def getQuantizedFromMaps(full_model,
                         fold,
                         input_shape,
                         full_model_path="one_hot_v2/full_0/saved_model.h5"):
    qmodels = []
    transferWeights = False
    try:
        model = tf.keras.models.load_model(full_model_path)
        transferWeights = True
    except:
        model = full_model
    for name, dict_ in allQDictionaries.items():
        # Workaround for deserialization from JSON (used by model_quantize) not
        # setting _USE_V2_BEHAVIOR=True thus using old V1 implementation
        custom_objects = {
            'BatchNormalization': tf.keras.layers.BatchNormalization
        }
        qmodel = model_quantize(model,
                                config,
                                bitwidth,
                                custom_objects=custom_objects,
                                transfer_weights=transferWeights)
        qmodel._name = 'quantized_%s_%i' % (name, fold)
        qmodels.append(qmodel)
    return qmodels
Exemple #4
0
def test_birnn_conversion(rnn):
    m = create_network_birnn(rnn)
    name = 'Q' + m.layers[1].layer.__class__.__name__
    d = {
        'QBidirectional': {
            'kernel_quantizer': 'binary',
            'recurrent_quantizer': 'binary',
            'bias_quantizer': 'binary',
            'activation_quantizer': 'binary',
        }
    }
    if name != 'QSimpleRNN':
        d['QBidirectional']['recurrent_activation_quantizer'] = 'binary'

    qq = model_quantize(m, d, 4)
    layer = qq.layers[1].layer
    assert str(layer.kernel_quantizer) == 'binary'
    assert str(layer.recurrent_quantizer) == 'binary'
    assert str(layer.bias_quantizer) == 'binary'
    assert str(layer.activation) == 'binary()'
    if name != 'QSimpleRNN':
        assert str(layer.recurrent_activation) == 'binary()'
    backward_layer = qq.layers[1].backward_layer
    # backwards weight quantizers are dict because of contraints.serialize
    assert str(backward_layer.kernel_quantizer['class_name']) == 'binary'
    assert str(backward_layer.recurrent_quantizer['class_name']) == 'binary'
    assert str(backward_layer.bias_quantizer['class_name']) == 'binary'
    assert str(backward_layer.activation) == 'binary()'
    if name != 'QSimpleRNN':
        assert str(backward_layer.recurrent_activation) == 'binary()'
def test_new_forgiving_factor():
  """Tests forgiving factor."""
  delta_p = 8.0
  delta_n = 8.0
  rate = 2.0
  stress = 1.0
  input_bits = 8
  output_bits = 8
  ref_bits = 8

  config = {
      "QDense": ["parameters", "activations"],
      "Dense": ["parameters", "activations"],
      "QConv2D": ["parameters", "activations"],
      "Conv2D": ["parameters", "activations"],
      "DepthwiseConv2D": ["parameters", "activations"],
      "QDepthwiseConv2D": ["parameters", "activations"],
      "Activation": ["activations"],
      "QActivation": ["activations"],
      "QBatchNormalization": ["parameters"],
      "BatchNormalization": ["parameters"],
      "default": ["activations"]
  }

  model = get_model()

  ffb = ForgivingFactorBits(
      delta_p, delta_n, rate, stress,
      input_bits, output_bits, ref_bits,
      config
  )

  cached_result = ffb.compute_model_size(model)
  ref_size = cached_result[0]
  ref_p = cached_result[1]
  ref_a = cached_result[2]
  ref_size_dict = cached_result[3]

  assert ref_size == 258544
  assert ref_p == 43720
  assert ref_a == 214824

  q_dict = {
      "c1": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "quantized_bits(4)"
      }
  }

  q_model = model_quantize(model, q_dict, 4)

  cached_result = ffb.compute_model_size(q_model)
  trial_size_dict = cached_result[3]

  for name in trial_size_dict:
    if name != "c1":
      assert trial_size_dict[name] == ref_size_dict[name]
  assert trial_size_dict["c1"]["parameters"] == 416
def test_automatic_conversion_from_relu_to_qr():
  m = create_network()
  d = {
      "QConv2D": {
          "kernel_quantizer": "binary",
          "bias_quantizer": "binary"
      }}
  qq = model_quantize(m, d, 4)
  assert str(qq.layers[3].activation) == "quantized_relu(4,0)"
def test_conversion_print_qstats():
    # this tests if references in tensorflow are working properly.
    m = create_network()
    d = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "QActivation": {
            "relu": "ternary"
        }
    }
    qq = model_quantize(m, d, 4)
    qq.summary()
    print_qstats(qq)

    # test if print_qstats works with unquantized layers
    print_qstats(m)

    # test if print_qstats works with mixture of quantized and unquantized layers
    m1 = create_mix_network()
    print_qstats(m1)

    m2 = create_network_with_bn()
    d2 = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "QActivation": {
            "relu": "ternary"
        },
        "QConv2DBatchnorm": {
            "kernel_quantizer": "ternary",
            "bias_quantizer": "ternary",
        },
        "QDepthwiseConv2DBatchnorm": {
            "depthwise_quantizer": "ternary",
            "bias_quantizer": "ternary",
        },
    }
    m2 = model_quantize(m2, d2, 4, enable_bn_folding=True)
    m2.summary()
    print_qstats(m2)
def test_linear_activation_conversion():
    m = create_network()

    d = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary",
            "activation_quantizer": "binary"
        }
    }
    qq = model_quantize(m, d, 4)

    assert str(qq.layers[1].activation) == "binary()"
Exemple #9
0
def test_network_quantization(rnn):
    model = Sequential([rnn(16)])
    jm = copy.deepcopy(json.loads(model.to_json()))
    config = jm["config"]
    layers = config["layers"]
    d = {
        f"Q{layers[0]['class_name']}": {
            "kernel_quantizer": "binary",
            "recurrent_quantizer": "binary",
            "bias_quantizer": "binary"
        }
    }
    qmodel = model_quantize(model, d, 4)
    assert str(qmodel.layers[0].activation) == "quantized_tanh(4,0)"
def test_conversion_qadaptiveactivation_with_preference():
    m = create_network()
    d = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "relu_act": {
            "relu": "quantized_relu(8)"
        }
    }

    # Test with QActivation preference
    qq1 = model_quantize(m, d, 4, prefer_qadaptiveactivation=False)
    assert qq1.layers[2].__class__.__name__ == "QActivation"
    assert str(qq1.layers[2].quantizer).startswith("quantized_relu(8,")
    assert qq1.layers[4].__class__.__name__ == "Activation"

    # Test with QAdaptiveActivation preference
    qq2 = model_quantize(m, d, 4, prefer_qadaptiveactivation=True)
    assert qq2.layers[2].__class__.__name__ == "QAdaptiveActivation"
    assert str(qq2.layers[2].quantizer).startswith("quantized_relu(8,")
    assert qq2.layers[4].__class__.__name__ == "Activation"
def test_conversion_from_relu_activation_to_qadaptiveactivation():
    m = create_network()
    d = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "QAdaptiveActivation": {
            "relu": "quantized_relu(8)"
        }
    }
    qq = model_quantize(m, d, 4)
    assert qq.layers[2].__class__.__name__ == "QAdaptiveActivation"
    assert str(qq.layers[2].quantizer).startswith("quantized_relu(8,")
    assert qq.layers[4].__class__.__name__ == "Activation"
Exemple #12
0
def test_conversion_print_qstats():
    # this tests if references in tensorflow are working properly.
    m = create_network()
    d = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "QActivation": {
            "relu": "ternary"
        }
    }
    qq = model_quantize(m, d, 4)
    qq.summary()
    print_qstats(qq)
def test_conversion_from_relu_activation_to_qr_qactivation():
    m = create_network()
    d = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "QActivation": {
            "relu": "ternary"
        }
    }
    qq = model_quantize(m, d, 4)
    assert qq.layers[2].__class__.__name__ == "QActivation"
    assert str(qq.layers[2].quantizer) == "ternary()"
    assert qq.layers[4].__class__.__name__ == "Activation"
Exemple #14
0
def getQuantizedModel(precision, model, weightfile):

    model.load_weights(weightfile)
    config = build_config(precision)
    custom_objects = {'BatchNormalization': tf.keras.layers.BatchNormalization}
    qmodel = model_quantize(model,
                            config,
                            precision,
                            custom_objects=custom_objects,
                            transfer_weights=True)
    qmodel._name = 'quantized_%i' % (precision)
    for layer in qmodel.layers:
        if hasattr(layer, "kernel_quantizer"):
            print(layer.name, "kernel:", str(layer.kernel_quantizer_internal),
                  "bias:", str(layer.bias_quantizer_internal))
        elif hasattr(layer, "quantizer"):
            print(layer.name, "quantizer:", str(layer.quantizer))
    return qmodel
Exemple #15
0
def test_rnn_conversion(rnn):
    m = create_network_rnn(rnn)
    name = 'Q' + m.layers[1].__class__.__name__
    d = {
        name: {
            'kernel_quantizer': 'binary',
            'recurrent_quantizer': 'binary',
            'bias_quantizer': 'binary',
            'activation_quantizer': 'binary',
        }
    }
    if name != 'QSimpleRNN':
        d[name]['recurrent_activation_quantizer'] = 'binary'

    qq = model_quantize(m, d, 4)
    assert str(qq.layers[1].kernel_quantizer) == 'binary'
    assert str(qq.layers[1].recurrent_quantizer) == 'binary'
    assert str(qq.layers[1].bias_quantizer) == 'binary'
    assert str(qq.layers[1].activation) == 'binary()'
    if name != 'QSimpleRNN':
        assert str(qq.layers[1].recurrent_activation) == 'binary()'
Exemple #16
0
def create_quantized_network():
  """Creates a simple quantized conv net model."""
  # Create a simple model
  xi = Input((28, 28, 1))
  x = Conv2D(32, (3, 3))(xi)
  x = Activation("relu")(x)
  x = Conv2D(32, (3, 3), activation="relu")(x)
  x = Activation("softmax")(x)
  model = Model(inputs=xi, outputs=x)

  # Quantize the model
  quantizer_config = {
      "QConv2D": {
          "kernel_quantizer": "quantized_bits(4)",
          "bias_quantizer": "quantized_bits(4)"
      },
      "QActivation": {
          "relu": "ternary"
      }
  }
  activation_bits = 4
  qmodel = model_quantize(model, quantizer_config, activation_bits)
  return qmodel
    if Prune:
      train.setModel(model_pruned)
      if fullModel:
        setWeights(train.keras_model,fullModel)
      print_model_sparsity(train.keras_model)
      additionalCallbacks = pruning_callbacks.UpdatePruningStep()
      
    elif Quantize:
      try:
        train.keras_model = keras.models.load_model(fullModel)
        transferWeights = True 
      except:  
        print("No pretrained model found! Building new model without pretrained weights")
        transferWeights = False 
      
      train.keras_model = model_quantize(train.keras_model, qDicts['4_bit'], 4, transfer_weights=transferWeights)  #currently dense2_binary', conv2d_binary', '4_bit'         
      print_qstats(train.keras_model)
    
    train.compileModel(learningrate=0.0001,
                   loss='binary_crossentropy')#,binary_cross_entropy_with_extras) 
                   
print(train.keras_model.summary())

model,history = train.trainModel(nepochs=30,
                                 batchsize=50,#50,
                                 checkperiod=1, # saves a checkpoint model every N epochs
                                 verbose=1,
                                 additional_callbacks = additionalCallbacks )
train.change_learning_rate(0.0003)
model,history = train.trainModel(nepochs=30,
                                 batchsize=50,
    def quantize_model(self, hp):
        """Quantize model by hyperparameter search and extracting size schema."""

        # configuration for quantization.
        q_dict = {}

        model = clone_model(self.model, self.custom_objects)

        fanin = []

        filter_range = [0.5, 0.75, 1.0, 1.5, 2.0]

        # network_filters=hp.Choice(...) should only be defined if we are sure
        # current blocks has any layer that need filter sweep.
        # Otherwise, when no layer needs filter sweep and a hp variable is defined,
        # there will be uneffective trials that loop around the network
        # filter range, even though none of the filter sweep was ever applied to
        # any layers. Therfore, we use filter_sweep_enabled to mark if any layer
        # in current block needs filter sweep.
        kernel_quantizer_dict = {}
        filter_sweep_enabled = False
        for layer in model.layers:
            if layer.__class__.__name__ in REGISTERED_LAYERS:
                kernel_quantizer, bits = self._get_quantizer(
                    hp,
                    layer.name + "_kernel",
                    layer.name,
                    layer.__class__.__name__,
                    is_kernel=True)

                kernel_quantizer_dict[layer.name] = (kernel_quantizer, bits)

                # kernel_quantizer is not None ->  layer in the current block need
                # to be quantized
                if kernel_quantizer:
                    if (not filter_sweep_enabled
                            and self.tune_filters in ["layer", "block"] and
                            not self.tune_filters_exceptions.search(layer.name)
                            and layer.__class__.__name__
                            in ["Dense", "Conv1D", "Conv2D"]):
                        filter_sweep_enabled = True

                if layer.__class__.__name__ in SEQUENCE_LAYERS:
                    recurrent_quantizer, _ = self._get_quantizer(
                        hp,
                        layer.name + "_recurrent_kernel",
                        layer.name,
                        layer.__class__.__name__,
                        is_kernel=True)

        if self.tune_filters == "block" and filter_sweep_enabled:
            network_filters = hp.Choice("network_filters",
                                        values=filter_range,
                                        default=1.0)
        else:
            network_filters = 1.0

        for layer_id, layer in enumerate(model.layers):

            # we can use these indexes to disable some layers, like the last
            # layer

            if self.layer_indexes is not None and layer_id not in self.layer_indexes:
                continue

            layer_d = {}

            if layer.__class__.__name__ in Q_LAYERS:
                weights = layer.get_weights()[0]
                if (layer.get_quantizers()[0]
                        and hasattr(layer.get_quantizers()[0], "bits")):
                    bits = layer.get_quantizers()[0].bits
                else:
                    bits = 8
                fanin.append(np.prod(weights.shape[:-1]) * (8. - bits) / 8.)

            if layer.__class__.__name__ in REGISTERED_LAYERS:
                # difference between depthwise and the rest is just the name
                # of the kernel.
                if layer.__class__.__name__ == "DepthwiseConv2D":
                    kernel_name = "depthwise_quantizer"
                else:
                    kernel_name = "kernel_quantizer"

                # sample kernel quantizer.
                (kernel_quantizer, bits) = kernel_quantizer_dict[layer.name]

                if not kernel_quantizer:
                    continue

                # process fanin here

                if bits < 8:
                    weights = layer.get_weights()[0]
                    fanin.append(
                        np.prod(weights.shape[:-1]) * (8. - bits) / 8.)

                # we only want to do that if we are going to quantize layer
                if (self.tune_filters in ["layer", "block"]
                        and not self.tune_filters_exceptions.search(layer.name)
                        and layer.__class__.__name__
                        in ["Dense", "Conv1D", "Conv2D"]):
                    if self.tune_filters == "layer":
                        layer_filters = hp.Choice("network_filters_" +
                                                  layer.name,
                                                  values=filter_range,
                                                  default=1.0)
                    else:
                        layer_filters = network_filters

                    if layer.__class__.__name__ == "Dense":
                        layer.units = max(int(layer.units * layer_filters), 1)
                    elif layer.__class__.__name__ in ["Conv1D", "Conv2D"]:
                        layer.filters = max(int(layer.filters * layer_filters),
                                            1)

                layer_d[kernel_name] = kernel_quantizer

                if layer.__class__.__name__ in SEQUENCE_LAYERS:
                    layer_d['recurrent_quantizer'] = recurrent_quantizer

                if layer.__class__.__name__ in [
                        "LSTM", "GRU", "Bidirectional"
                ]:
                    layer_d['recurrent_activation'], _ = self._get_quantizer(
                        hp,
                        layer.name + "_recurrent_activation",
                        layer.name,
                        layer.__class__.__name__,
                        is_kernel=False)

                # if we use bias, sample quantizer.
                if layer.__class__.__name__ == "Bidirectional":
                    layer_d["bias_quantizer"], bits = self._get_quantizer(
                        hp,
                        layer.name + "_bias",
                        layer.name,
                        layer.__class__.__name__,
                        is_kernel=False)
                    layer_d["activation"], bits = self._get_quantizer(
                        hp,
                        layer.name + "_activation",
                        layer.name,
                        layer.__class__.__name__,
                        is_kernel=False)
                    q_dict[layer.name] = layer_d
                else:
                    if layer.use_bias:
                        layer_d["bias_quantizer"], bits = self._get_quantizer(
                            hp,
                            layer.name + "_bias",
                            layer.name,
                            layer.__class__.__name__,
                            is_kernel=False)

                    # if activation is not linear/softmax we need to process it.
                    if layer.activation is None:
                        is_softmax = False
                        is_linear = False
                    else:
                        if isinstance(layer.activation, six.string_types):
                            is_softmax = layer.activation == "softmax"
                            is_linear = layer.activation == "linear"
                        else:
                            is_softmax = layer.activation.__name__ == "softmax"
                            is_linear = layer.activation.__name__ == "linear"

                    if not is_softmax and not is_linear:
                        layer_d["activation"], bits = self._get_quantizer(
                            hp,
                            layer.name + "_activation",
                            layer.name,
                            layer.__class__.__name__,
                            is_kernel=False)

                    q_dict[layer.name] = layer_d

            elif layer.__class__.__name__ in ["Reshape"]:
                # we cannot handle fine tuning filters per layer right now.
                assert self.tune_filters in ["none", "block"]

                # we need to make sure this pattern exists, this should only occur for
                # "scheduler", so the name will be complete and not a pattern.

                if (self.tune_filters == "none" or layer.name not in self.limit
                        or self.tune_filters_exceptions.search(layer.name)):
                    continue

                if K.image_data_format() == "channels_last":
                    layer.target_shape = layer.target_shape[:-1] + (min(
                        int(layer.target_shape[-1] * network_filters), 1), )
                else:
                    layer.target_shape = (int(
                        layer.target_shape[0] *
                        network_filters), ) + layer.target_shape[1:]

            elif layer.__class__.__name__ in ["Activation"]:
                if isinstance(layer.activation, six.string_types):
                    is_linear = layer.activation == "linear"
                    is_softmax = layer.activation == "softmax"
                else:
                    is_linear = layer.activation.__name__ == "linear"
                    is_softmax = layer.activation.__name__ == "softmax"

                # if it is a linear activation, we will notify the
                # quantizer we are searching for linear type of
                # quantizers

                if not is_softmax:
                    activation, bits = self._get_quantizer(
                        hp,
                        layer.name + "_activation",
                        layer.name,
                        layer.__class__.__name__,
                        is_kernel=False,
                        is_linear=is_linear)

                    if not activation:
                        continue

                    # look at documentation on model_quantize
                    q_dict[layer.name] = activation
            elif layer.__class__.__name__ in self.limit:
                # mark it for conversion
                q_dict[layer.name] = {}
            else:
                for pattern in self.limit:
                    if re.match(pattern, layer.name):
                        q_dict[layer.name] = {}
                        break

        q_model = model_quantize(model,
                                 q_dict,
                                 self.activation_bits,
                                 custom_objects=self.custom_objects,
                                 transfer_weights=self.transfer_weights)

        return q_model, fanin
def test_folded_layer_conversion():
    # create a sequential model with conv2d layer and activation layers
    m1 = create_network()

    # create a sequantial model with conv2d layer followed by bn layer
    m2 = create_network_with_bn()

    # quantization config
    d = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "QDepthwiseConv2D": {
            "depthwise_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "QConv2DBatchnorm": {
            "kernel_quantizer": "ternary",
            "bias_quantizer": "ternary",
        },
        "QDepthwiseConv2DBatchnorm": {
            "depthwise_quantizer": "ternary",
            "bias_quantizer": "ternary",
        },
        "relu_act": {
            "relu": "quantized_relu(8)"
        }
    }

    # test when model has no layer to fold
    # desired behavior: un-folded layers
    qq1 = model_quantize(m1, d, 4, enable_bn_folding=True)
    assert qq1.layers[1].__class__.__name__ == "QConv2D"
    assert str(qq1.layers[1].quantizers[0]).startswith("binary")

    # test when the 1st conv2d layers needs to fold but the 2nd conv2d layer
    # does not (not followed by bn layer)
    # desired behavior: 1st conv2d is folded, 2nd conv2d unfolded
    # also test the depthwiseconv2d layer should fold
    qq2 = model_quantize(m2, d, 4, enable_bn_folding=True)
    assert qq2.layers[1].__class__.__name__ == "QConv2DBatchnorm"
    assert str(qq2.layers[1].quantizers[0]).startswith("ternary")
    assert qq2.layers[3].__class__.__name__ == "QConv2D"
    assert str(qq2.layers[3].quantizers[0]).startswith("binary")
    assert qq2.layers[5].__class__.__name__ == "QDepthwiseConv2DBatchnorm"
    assert str(qq2.layers[5].quantizers[0]).startswith("ternary")

    # test when there are layers to fold but folding is disabled
    # desired behavior: all conv2d/depthwise2d layers are not folded
    qq3 = model_quantize(m2, d, 4, enable_bn_folding=False)
    assert qq3.layers[1].__class__.__name__ == "QConv2D"
    assert str(qq3.layers[1].quantizers[0]).startswith("binary")
    assert qq3.layers[2].__class__.__name__ == "BatchNormalization"
    assert str(qq3.layers[3].quantizer).startswith("quantized_relu")
    assert qq3.layers[6].__class__.__name__ == "QDepthwiseConv2D"
    assert str(qq3.layers[6].quantizers[0]).startswith("binary")

    # test when QConv2DBatchnorm quantizer, e.g., is not given in config
    # desired behavior: quantizers for QConv2DBatchnorm layer fall back to QConv2D
    #   quantizers
    d = {
        "QConv2D": {
            "kernel_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "QDepthwiseConv2D": {
            "depthwise_quantizer": "binary",
            "bias_quantizer": "binary"
        },
        "relu_act": {
            "relu": "quantized_relu(8)"
        }
    }
    qq4 = model_quantize(m2, d, 4, enable_bn_folding=True)
    assert qq4.layers[1].__class__.__name__ == "QConv2DBatchnorm"
    assert str(qq4.layers[1].quantizers[0]).startswith("binary")
    assert qq4.layers[3].__class__.__name__ == "QConv2D"
    assert str(qq4.layers[3].quantizers[0]).startswith("binary")
    assert qq4.layers[5].__class__.__name__ == "QDepthwiseConv2DBatchnorm"
    assert str(qq4.layers[5].quantizers[0]).startswith("binary")
def test_no_activation_conversion_to_quantized():
    m = create_network()
    d = {"QConv2D": {"kernel_quantizer": "binary", "bias_quantizer": "binary"}}
    qq = model_quantize(m, d, 4)
    assert qq.layers[2].__class__.__name__ == "Activation"
    assert qq.layers[4].__class__.__name__ == "Activation"
Exemple #21
0
model.summary()

q_dict = {
    "conv2d_0_m": {
        "kernel_quantizer": "binary()",
        "bias_quantizer": "quantized_bits(4,0,1)"
    },
    "conv2d_1_m": {
        "kernel_quantizer": "ternary()",
        "bias_quantizer": "quantized_bits(4,0,1)"
    },
    "act2_m": "quantized_relu(6,2)",
    "QActivation": {
        "relu": "quantized_relu(4,0)"
    },
    "QConv2D": {
        "kernel_quantizer": "quantized_bits(4,0,1)",
        "bias_quantizer": "quantized_bits(4,0,1)"
    },
    "QDense": {
        "kernel_quantizer": "quantized_bits(3,0,1)",
        "bias_quantizer": "quantized_bits(3,0,1)"
    }
}

qmodel = model_quantize(model, q_dict, 4)

qmodel.summary()

print_qstats(qmodel)
def test_sequential_model_conversion():
    m = create_network_sequential()
    d = {"QConv2D": {"kernel_quantizer": "binary", "bias_quantizer": "binary"}}
    qq = model_quantize(m, d, 4)
    assert str(qq.layers[2].activation) == "quantized_relu(4,0)"