Beispiel #1
0
def getEnergy(model):
  reference_internal = "fp32"
  reference_accumulator = "fp32"

  q = run_qtools.QTools(
      model,
      process="horowitz",
      source_quantizers=[quantized_bits(16, 6, 1)],
      is_inference=False,
      weights_path=None,
      keras_quantizer=reference_internal,
      keras_accumulator=reference_accumulator,
      for_reference=True)
  
  energy_dict = q.pe(
    weights_on_memory="fixed",
    activations_on_memory="fixed",
    min_sram_size=8*16*1024*1024,
    rd_wr_on_io=False)

  energy_profile = q.extract_energy_profile(qtools_settings.cfg.include_energy, energy_dict)
  total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy, energy_dict)
  
  pprint.pprint(energy_profile)
  print()
  print("Total energy: {:.2f} uJ".format(total_energy / 1000000.0))
Beispiel #2
0
def GetEnergy(model, verbose=False):
    # energy estimation
    reference_internal = 'fp32'
    reference_accumulator = 'fp32'
    proc = 'horowitz'

    q = run_qtools.QTools(
        model,
        process=proc,
        source_quantizers=[quantized_bits(8, 0, 1)],
        is_inference=False,
        weights_path=None,
        keras_quantizer=reference_internal,
        keras_accumulator=reference_accumulator,
        # whether calculate baseline energy
        for_reference=True)

    energy_dict = q.pe(
        weights_on_memory="sram",
        activations_on_memory="sram",
        min_sram_size=8 * 16 * 1024 *
        1024,  # minimum sram size in number of bits. Let's assume a 16MB SRAM.
        rd_wr_on_io=False)  # assuming data alreadu in SRAM

    energy_profile = q.extract_energy_profile(
        qtools_settings.cfg.include_energy, energy_dict)
    total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy,
                                        energy_dict)
    if (verbose):
        pprint.pprint(energy_profile)
        print()
    print("Total energy: {:.2f} uJ".format(total_energy / 1000000.0))
    return
Beispiel #3
0
    def get_trial(self, model):
        """Computes size of quantization trial."""

        q = run_qtools.QTools(model,
                              process=self.process,
                              source_quantizers=self.source_quantizers,
                              is_inference=self.trained_model,
                              weights_path=None,
                              keras_quantizer=self.reference_internal,
                              keras_accumulator=self.reference_accumulator,
                              for_reference=False)

        energy_dict = q.pe(weights_on_memory=self.parameters_on_memory[1],
                           activations_on_memory=self.activations_on_memory[1],
                           min_sram_size=self.min_sram_size[1],
                           rd_wr_on_io=self.rd_wr_on_io[1])

        self.trial_energy_dict = energy_dict
        # self.trial_size = energy_dict["total_cost"]
        self.trial_size = q.extract_energy_sum(
            qtools_settings.cfg.include_energy, energy_dict)

        self.trial_energy_profile = q.extract_energy_profile(
            qtools_settings.cfg.include_energy, energy_dict)

        return self.trial_size
Beispiel #4
0
    def get_reference(self, model):
        # we only want to compute reference once
        if self.reference_size is not None:
            return self.reference_size * self.stress

        q = run_qtools.QTools(model,
                              process=self.process,
                              source_quantizers=self.reference_internal,
                              is_inference=self.trained_model,
                              weights_path=None,
                              keras_quantizer=self.reference_internal,
                              keras_accumulator=self.reference_accumulator,
                              for_reference=True)

        energy_dict = q.pe(weights_on_memory=self.parameters_on_memory[0],
                           activations_on_memory=self.activations_on_memory[0],
                           min_sram_size=self.min_sram_size[0],
                           rd_wr_on_io=self.rd_wr_on_io[0])

        self.ref_energy_dict = energy_dict
        self.reference_size = q.extract_energy_sum(
            qtools_settings.cfg.include_energy, energy_dict)

        self.reference_energy_profile = q.extract_energy_profile(
            qtools_settings.cfg.include_energy, energy_dict)

        return self.reference_size * self.stress
Beispiel #5
0
    def get_qenergy(model, qenergy_config, for_reference):
        q = run_qtools.QTools(
            model,
            process=qenergy_config["process"],
            source_quantizers=qenergy_config["reference_internal"],
            is_inference=qenergy_config["trained_model"],
            weights_path=None,
            keras_quantizer=qenergy_config["reference_internal"],
            keras_accumulator=qenergy_config["reference_accumulator"],
            for_reference=for_reference)

        # caculate energy of the derived data type map.
        energy_dict = q.pe(
            weights_on_memory=qenergy_config["parameters_on_memory"],
            activations_on_memory=qenergy_config["activations_on_memory"],
            min_sram_size=qenergy_config["min_sram_size"],
            rd_wr_on_io=qenergy_config["rd_wr_on_io"])

        total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy,
                                            energy_dict)

        return q, total_energy
Beispiel #6
0
    reference_accumulator = "int32"

    # By setting for_reference=True, we create QTools object which uses
    # keras_quantizer to quantize weights/bias and
    # keras_accumulator to quantize MAC variables for all layers. Obviously, this
    # overwrites any quantizers that user specified in the qkeras layers. The
    # purpose of doing so is to enable user to calculate a baseline energy number
    # for a given model architecture and compare it against quantized models.
    q = run_qtools.QTools(
        model,
        # energy calculation using a given process
        process="horowitz",
        # quantizers for model input
        source_quantizers=[quantizers.quantized_bits(8, 0, 1)],
        is_inference=False,
        # absolute path (including filename) of the model weights
        weights_path=None,
        # keras_quantizer to quantize weight/bias in un-quantized keras layers
        keras_quantizer=reference_internal,
        # keras_quantizer to quantize MAC in un-quantized keras layers
        keras_accumulator=reference_accumulator,
        # whether calculate baseline energy
        for_reference=True)

    # caculate energy of the derived data type map.
    ref_energy_dict = q.pe(
        # whether to store parameters in dram, sram, or fixed
        weights_on_memory="sram",
        # store activations in dram or sram
        activations_on_memory="sram",
        # minimum sram size in number of bits
Beispiel #7
0
def test_qenergy():
    x = x_in = keras.layers.Input((784, ), name="input")
    x = QDense(300,
               kernel_quantizer=quantizers.binary(),
               bias_quantizer=quantizers.binary(),
               name="d0")(x)
    x = QActivation("quantized_relu(4,0)", name="d0_qr4")(x)
    x = QDense(100,
               kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
               bias_quantizer=quantizers.quantized_bits(4, 0, 1),
               name="d1")(x)
    x = QAdaptiveActivation("quantized_relu", 4, name="d1_qr4")(x)
    x = QDense(10,
               kernel_quantizer=quantizers.quantized_bits(4, 0, 1),
               bias_quantizer=quantizers.quantized_bits(4, 0, 1),
               name="d2")(x)
    x = keras.layers.Activation("softmax", name="softmax")(x)

    model = keras.Model(inputs=[x_in], outputs=[x])
    # print(model.summary())

    reference_internal = "int8"
    reference_accumulator = "int32"

    # get reference energy cost
    q = run_qtools.QTools(model,
                          process="horowitz",
                          source_quantizers=reference_internal,
                          is_inference=False,
                          weights_path=None,
                          keras_quantizer=reference_internal,
                          keras_accumulator=reference_accumulator,
                          for_reference=True)

    ref_energy_dict = q.pe(weights_on_memory="sram",
                           activations_on_memory="sram",
                           min_sram_size=8 * 16 * 1024 * 1024,
                           rd_wr_on_io=False)
    reference_size = q.extract_energy_sum(qtools_settings.cfg.include_energy,
                                          ref_energy_dict)

    # get trial energy cost
    q = run_qtools.QTools(model,
                          process="horowitz",
                          source_quantizers=reference_internal,
                          is_inference=False,
                          weights_path=None,
                          keras_quantizer=reference_internal,
                          keras_accumulator=reference_accumulator,
                          for_reference=False)
    trial_energy_dict = q.pe(weights_on_memory="sram",
                             activations_on_memory="sram",
                             min_sram_size=8 * 16 * 1024 * 1024,
                             rd_wr_on_io=False)
    trial_size = q.extract_energy_sum(qtools_settings.cfg.include_energy,
                                      trial_energy_dict)

    # Reference energy number is now updated with keras_accumulator as
    # output quantizer
    tmp = ref_energy_dict["d0"]["energy"]
    assert tmp["inputs"] == pytest.approx(372.77, abs=0.1)
    assert tmp["outputs"] == pytest.approx(570.57, abs=0.1)
    assert tmp["parameters"] == pytest.approx(111975.96, abs=0.1)
    assert tmp["op_cost"] == pytest.approx(70560.0, abs=0.1)

    tmp = ref_energy_dict["d1"]["energy"]
    assert tmp["inputs"] == pytest.approx(570.57, abs=0.1)
    assert tmp["outputs"] == pytest.approx(190.19, abs=0.1)
    assert tmp["parameters"] == pytest.approx(14313.66, abs=0.1)
    assert tmp["op_cost"] == pytest.approx(26500.0, abs=0.1)

    tmp = ref_energy_dict["d2"]["energy"]
    assert tmp["inputs"] == pytest.approx(190.19, abs=0.1)
    assert tmp["outputs"] == pytest.approx(19.02, abs=0.1)
    assert tmp["parameters"] == pytest.approx(483.08, abs=0.1)
    assert tmp["op_cost"] == pytest.approx(883.33, abs=0.1)

    # Trial
    tmp = trial_energy_dict["d0"]["energy"]
    assert tmp["inputs"] == pytest.approx(372.77, abs=0.1)
    assert tmp["outputs"] == pytest.approx(342.34, abs=0.1)
    assert tmp["parameters"] == pytest.approx(13997.95, abs=0.1)
    assert tmp["op_cost"] == pytest.approx(15729.0, abs=0.1)

    tmp = trial_energy_dict["d1"]["energy"]
    assert tmp["inputs"] == pytest.approx(72.27, abs=0.1)
    assert tmp["outputs"] == pytest.approx(110.31, abs=0.1)
    assert tmp["parameters"] == pytest.approx(7158.73, abs=0.1)
    assert tmp["op_cost"] == pytest.approx(3250.0, abs=0.1)

    tmp = trial_energy_dict["d2"]["energy"]
    assert tmp["inputs"] == pytest.approx(26.63, abs=0.1)
    assert tmp["outputs"] == pytest.approx(11.41, abs=0.1)
    assert tmp["parameters"] == pytest.approx(243.44, abs=0.1)
    assert tmp["op_cost"] == pytest.approx(102.08, abs=0.1)

    # print(ref_energy_dict)
    # print(trial_energy_dict)
    assert int(reference_size) == 226629
    assert int(trial_size) == 41070
Beispiel #8
0
    layer_name = layer.__class__.__name__
    parameters = aq._param_size(layer)
    activations = aq._act_size(layer)
    print("Parameters {}:{}".format(layer.name,parameters))
    print("Activations {}:{}".format(layer.name,activations))
    total_size_params += parameters
    total_size_acts += activations


total_size, p_size, a_size, model_size_dict = aq.compute_model_size(model)

flops = get_flops(model, batch_size=1)
print(f"FLOPS: {flops / 10 ** 9:.03} G")


q = run_qtools.QTools(model, process="horowitz", source_quantizers=[quantized_bits(16, 6, 1)], is_inference=False, weights_path=None,keras_quantizer="fp32",keras_accumulator="fp32", for_reference=False)
q.qtools_stats_print()

# caculate energy of the derived data type map.
energy_dict = q.pe(
    # whether to store parameters in dram, sram, or fixed
    weights_on_memory="sram",
    # store activations in dram or sram
    activations_on_memory="sram",
    # minimum sram size in number of bits. Let's assume a 16MB SRAM.
    min_sram_size=8*16*1024*1024,
    # whether load data from dram to sram (consider sram as a cache
    # for dram. If false, we will assume data will be already in SRAM
    rd_wr_on_io=False)

# get stats of energy distribution in each layer
def generate_json(in_model):
    """example to generate data type map for a given model.

  Args:
    in_model: qkeras model object

  Usage:
    input_quantizer_list:
      A list of input quantizers for the model. It could be in the form of:
        1. a list of quantizers, each quantizer for each one of the model inputs
        2. one single quantizer, which will be used for all of the model inputs
        3. None. Default input quantizer defined in config_xxx.py will be used
        for all of the model inputs

    for_reference: get energy for a reference model/trial model
      1. True: get baseline energy for a given model. Use keras_quantizer/keras_
        accumulator (or default_interm_quantizer in config_xxx.py if keras_
        quantizer/keras_accumulator not given) to quantizer all layers in a
        model in order to calculate its energy. It servers the purpose of
        setting up a baseline energy for a given model architecture.
      2. False: get "real" energy for a given model use user-specified
        quantizers. For layers that are not quantized (keras layer) or have no
        user-specified quantizers (qkeras layers without quantizers specified),
        keras_quantizer and keras_accumulator(or default_interm_quantizer in
        config_xxx.py if keras_quantizer/keras_accumulator not given)
        will be used as their quantizers.

     process: technology process to use in configuration (horowitz, ...)

     weights_path: absolute path to the model weights

     is_inference: whether model has been trained already, which is needed to
         compute tighter bounds for QBatchNormalization Power estimation

     Other parameters (defined in config_xxx.py):
       1. "default_source_quantizer" is used as default input quantizer
          if user do not specify any input quantizers,
       2. "default_interm_quantizer": is used as default quantizer for any
          intermediate variables such as multiplier, accumulator, weight/bias
          in a qkeras layer if user do not secifiy the corresponding variable
       3. process_name: energy calculation parameters for different processes.
          "horowitz" is the process we use by default.
       4. "include_energy": what energy to include at each layer
          when calculation the total energy of the entire model.
          "parameters": memory access energy for loading model parameters.
          "inputs": memory access energy to reading inputs
          "outputs": memory access energy for writing outputs
          "op_cost": operation energy for multiplication and accumulation
  """

    input_quantizer_list = [quantizers.quantized_bits(8, 0, 1)]
    reference_internal = "int8"
    reference_accumulator = "int32"

    # generate QTools object which contains model data type map in json format
    q = run_qtools.QTools(
        in_model,
        # energy calculation using a given process
        process="horowitz",
        # quantizers for model inputs
        source_quantizers=input_quantizer_list,
        # training or inference with a pre-trained model
        is_inference=False,
        # path to pre-trained model weights
        weights_path=None,
        # keras_quantizer to quantize weight/bias in non-quantized keras layers
        keras_quantizer=reference_internal,
        # keras_accumulator to quantize MAC in un-quantized keras layers
        keras_accumulator=reference_accumulator,
        # calculating baseline energy or not
        for_reference=False)

    # print data type map
    q.qtools_stats_print()