Ejemplo n.º 1
0
def GenerateInputQuantizerList(input_quantizers, inputs_length,
                               default_source_quantizer):
    """Generates the list of input quantizers."""
    # generate a list of input quantizers
    input_quantizer_list = []
    quantizer_factory = quantizer_factory_module.QuantizerFactory()
    if input_quantizers is None:
        logging.warning("************ SOURCE has no quantizer type."
                        " Use default quantizer instead")

        for _ in range(inputs_length):
            input_quantizer_list.append(
                quantizer_factory.make_default_quantizer(
                    mode=default_source_quantizer))
    else:
        if inputs_length == len(input_quantizers):
            for quantizer in input_quantizers:
                input_quantizer_list.append(
                    quantizer_factory.make_quantizer(quantizer))
        # pass a single quantizer which will be used for all q list.
        elif not isinstance(input_quantizers, list):
            for _ in range(inputs_length):
                input_quantizer_list.append(
                    quantizer_factory.make_quantizer(input_quantizers))
        else:
            raise WrongInputQuantizerError(
                "ERROR: Numer of input (%d) must be the same as number of source"
                " quantizers (%d)" % (inputs_length, len(input_quantizers)))

    return input_quantizer_list
Ejemplo n.º 2
0
def generate_layer_data_type_map(graph,
                                 source_quantizer_list,
                                 is_inference,
                                 keras_quantizer=None,
                                 keras_accumulator=None,
                                 for_reference=False,
                                 debug=False):
    """main funciton to generate datatype for each layer.

  For each type of layer, this function calculates the sizes and minimum
  number of bits required to represent the parameters and variables (e.g.,
  weights, bias, multiplier and accumulator - MAC, etc.) embedded in
  these layers.

  Args:
    graph: input graph that traverses the model
    source_quantizer_list: a list of quantizers for model inputs
    is_inference: whether model is pre-trained with weights available
    keras_quantizer: default quantizer used to quantize un-quantized layers
    keras_accumulator: default MAC quantizer to quantize un-quantized layers
    for_reference: whether to generate a map for a baseline model
    debug: whether to print debug messages

  Returns:
    a result containing the following fields:
    source_quantizer_list similar as input
    output_layers: names of the layers that are output layers
    input_layers: names of the layers that are input_layers,
    layer_data_type_map: data type map of each layer
  """

    quantizer_factory = quantizer_factory_module.QuantizerFactory()
    layer_data_type_map = collections.OrderedDict()

    # get the output layers

    output_layers = []
    input_layers = []
    predecessors = list(graph.predecessors(qgraph.SINK))
    successors = list(graph.successors(qgraph.SOURCE))

    for u in predecessors:
        if u == qgraph.SOURCE or u == qgraph.SINK:
            continue
        output_layers.append(graph.nodes[u]["layer"][0])

    for u in successors:
        if u == qgraph.SOURCE or u == qgraph.SINK:
            continue
        input_layers.append(graph.nodes[u]["layer"][0])

    for node_id in nx.topological_sort(graph):
        node = graph.nodes[node_id]
        node_type = node["type"][-1]
        layer = node["layer"][0]
        is_input_layer = layer in input_layers

        w_shapes = None
        b_shapes = None
        output_shapes = None

        if hasattr(layer, "output_shape"):
            output_shapes = layer.output_shape

        if hasattr(layer, "get_weights"):
            weights = layer.get_weights()
            if len(weights) != 0:
                w_shapes = layer.get_weights()[0].shape
                b_shapes = weights[0].shape[-1]

        if debug:
            print("########")
            if layer is not None:
                print(layer.name)
            else:
                print("None")

        # deal with keras layer or lack of input quantizer in qkeras layer
        input_qe_list = qtools_util.get_input_quantizers_advanced(
            graph, node_id, is_input_layer, quantizer_factory, cfg)

        if input_qe_list and node_id != qgraph.SINK:
            input_quantizer_list = []
            for node in input_qe_list:
                input_quantizer_list.append(node[0])

            # calculate number of operations (multiplication/accumulation)
            (_, edge_0) = input_qe_list[0]
            input_shape = edge_0["shape"]
            # for merge layers, all input_shape are identical
            operation_count = qtools_util.get_operation_count(
                layer, input_shape)

        # Merge Layers with multiple inputs
        if qtools_util.is_merge_layers(layer):
            # output_shapes = layer.output_shape
            merge_factory = quantized_operators.MergeFactory()
            merge_quantizer = merge_factory.make_quantizer(
                input_qe_list, layer.__class__.__name__)

            if hasattr(layer, "get_quantizers"):
                # QMerge layer from future
                qkeras_quantizer = layer.get_quantizers()[0]
                merge_quantizer.output = quantizer_factory.make_quantizer(
                    qkeras_quantizer)
            else:
                # merge layer is a Keras layer
                if for_reference:
                    merge_quantizer.output = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)

                    if keras_accumulator:
                        # gate_factor and gate_bits remain the same as previously
                        # calculated; only change output quantizer as the keras_accumulator
                        merge_quantizer.output = quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator)

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, merge_quantizer.output,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, merge_quantizer, None, None, None, None,
                None, output_quantizer, output_shapes, operation_count)

        # pooling/reshape/flatten
        if qtools_util.is_shape_alternation_layers(layer):
            input_quantizer = input_quantizer_list[0]

            # output quantizer
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, input_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, None, None, None, None, None, None,
                output_quantizer, output_shapes, operation_count)

        # if Quantized Activation layer
        elif node_type in ["QActivation", "Activation"]:

            if for_reference or not hasattr(layer, "quantizer"):
                # Keras activation layer -> use default_interm_quantizer
                layer_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    layer_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                layer_quantizer = layer.quantizer

                if not quantizer_factory.is_quantizer_supported(
                        layer_quantizer):
                    raise TagMissingError(
                        "Unsupported activation quantizer {} on this layer: {}"
                        .format(layer_quantizer, layer))

                if not layer_quantizer:
                    layer_quantizer = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, None, None, None, w_shapes, None,
                b_shapes, output_quantizer, output_shapes, operation_count)

        elif node_type in ["QBatchNormalization", "BatchNormalization"]:

            (input_quantizer, _) = input_qe_list[0]

            # qkeras layers might be mixed with keras layers
            if for_reference or not hasattr(layer, "get_quantizers"):
                # Keras BatchNorm layer mixed with quantized model
                # -> no reference mode
                gamma_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                beta_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                mean_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                variance_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    gamma_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    beta_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    mean_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    variance_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                (qkeras_gamma_quantizer, qkeras_beta_quantizer,
                 qkeras_mean_quantizer,
                 qkeras_variance_quantizer) = layer.get_quantizers()

                if not qkeras_beta_quantizer:
                    beta_quantizer = quantizer_factory.clone_quantizer(
                        input_quantizer)
                else:
                    beta_quantizer = quantizer_factory.make_quantizer(
                        qkeras_beta_quantizer)

                if not qkeras_mean_quantizer:
                    mean_quantizer = quantizer_factory.clone_quantizer(
                        input_quantizer)
                else:
                    mean_quantizer = quantizer_factory.make_quantizer(
                        qkeras_mean_quantizer)

                if not qkeras_variance_quantizer:
                    variance_quantizer = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)
                else:
                    # if gamma is float, convert to input_quantizer
                    variance_quantizer = quantizer_factory.make_quantizer(
                        qkeras_variance_quantizer)

                if not qkeras_gamma_quantizer:
                    gamma_quantizer = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)
                else:
                    gamma_quantizer = quantizer_factory.make_quantizer(
                        qkeras_gamma_quantizer)

            # during inference, gamma, beta and variance are constants
            # if they are po2 quantizers, we need to modify their bits
            # with actual values and also update graph with the
            # corresponding output_quantizer on the edge
            if is_inference:
                weights = qtools_util.get_weights(layer)
                # if no scale(gamma), num_weights --
                # if no center(beta_quantizer) num_weights --
                num_weights = 4
                if not layer.scale:
                    num_weights -= 1
                if not layer.center:
                    num_weights -= 1

                if layer.scale and gamma_quantizer.is_po2:
                    gamma_quantizer.update_inference_values(weights[0])
                if variance_quantizer.is_po2:
                    variance_quantizer.update_inference_values(
                        weights[num_weights - 1])

            qbn = quantized_operators.QBNFactory()
            qbn.make_quantizer(input_quantizer, gamma_quantizer,
                               beta_quantizer, mean_quantizer,
                               variance_quantizer, layer.scale, layer.center)

            def set_output(op, output):
                if op:
                    op.output = output

            if for_reference or not hasattr(layer, "get_quantizers"):
                set_output(
                    qbn.internal_divide_quantizer,
                    quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer))

                set_output(
                    qbn.internal_multiplier,
                    quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer))

                set_output(
                    qbn.internal_accumulator,
                    quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer))

                set_output(
                    qbn.internal_output,
                    quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer))

                if keras_accumulator:
                    set_output(
                        qbn.internal_divide_quantizer,
                        quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator))

                    set_output(
                        qbn.internal_multiplier,
                        quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator))

                    set_output(
                        qbn.internal_accumulator,
                        quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator))

                    set_output(
                        qbn.internal_output.output,
                        quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator))

            layer_quantizer = qbn.internal_output.output

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            gamma_range = None
            if hasattr(layer, "gamma_range"):
                gamma_range = layer.gamma_range

            beta_range = None
            if hasattr(layer, "beta_range"):
                beta_range = layer.beta_range

            if not layer.center:
                qbn.beta_quantizer = None

            if not layer.scale:
                qbn.gamma_quantizer = None

            layer_data_type_map[layer] = {
                "input_quantizer_list": input_quantizer_list,
                "gamma_quantizer": gamma_quantizer,
                "beta_quantizer": beta_quantizer,
                "mean_quantizer": mean_quantizer,
                "variance_quantizer": variance_quantizer,
                "gamma_range": gamma_range,
                "beta_range": beta_range,
                "internal_divide_quantizer": qbn.internal_divide_quantizer,
                "internal_multiplier": qbn.internal_multiplier,
                "internal_accumulator": qbn.internal_accumulator,
                "output_quantizer": output_quantizer,
                "output_shapes": input_shape,
                "operation_count": operation_count
            }

        # if qdense, qconv, qpool, qoctave
        elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS:

            (input_quantizer, _) = input_qe_list[0]

            if for_reference or not hasattr(layer, "get_quantizers"):
                # for_reference: force all quantizers to keras_quantizer
                weight_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                bias_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    weight_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    bias_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                # qkeras layer
                qkeras_weight_quantizer = layer.get_quantizers()[0]
                qkeras_bias_quantizer = layer.get_quantizers()[1]

                if not quantizer_factory.is_quantizer_supported(
                        qkeras_weight_quantizer):
                    raise TagMissingError(
                        "Unsupported weight quantizer {} on this layer: {}".
                        format(qkeras_weight_quantizer, layer))

                if not quantizer_factory.is_quantizer_supported(
                        qkeras_bias_quantizer):
                    raise TagMissingError(
                        "Unsupported bias quantizer {} on this layer: {}".
                        format(qkeras_bias_quantizer, layer))

                weight_quantizer = quantizer_factory.make_quantizer(
                    qkeras_weight_quantizer)
                bias_quantizer = quantizer_factory.make_quantizer(
                    qkeras_bias_quantizer)

            # TODO(lishanok): during inference, if weight and bias is po2,
            #  need to update corresponding quantizer type with min and max
            #  of the constant values
            if is_inference:
                weights = qtools_util.get_weights(layer)
                if weight_quantizer.is_po2:
                    weight_quantizer.update_inference_values(weights[0])

                if bias_quantizer.is_po2:
                    bias_quantizer.update_inference_values(weights[1])

            multiplier_factory = quantized_operators.MultiplierFactory()
            multiplier = multiplier_factory.make_multiplier(
                weight_quantizer, input_quantizer)

            weights = layer.get_weights()
            kernel = weights[0]

            # if layer.use_bias:
            #  bias = weights[1]

            accumulator_factory = quantized_operators.AccumulatorFactory()
            accumulator = accumulator_factory.make_accumulator(
                kernel.shape, multiplier)

            if not layer.use_bias:
                bias_quantizer = None

            if debug:
                print(layer.name or "None")
                print("weight_quantizer:", weight_quantizer.bits)
                print("input_quantizer:", input_quantizer.bits)
                print("multiplier_quantizer:", multiplier.output.bits)
                print("multiplier_gate_bits:", multiplier.gate_bits)
                print("accumulator:", accumulator.output.bits)

            if for_reference or not hasattr(layer, "get_quantizers"):
                accumulator.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                multiplier.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_accumulator:
                    accumulator.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
                    multiplier.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)

            layer_quantizer = accumulator.output
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, multiplier, accumulator,
                weight_quantizer, w_shapes, bias_quantizer, b_shapes,
                output_quantizer, output_shapes, operation_count)

        elif node_type:
            # any other unsupported layer types -> pass the input quantizer
            # type to output in qraph
            (input_quantizer, _) = input_qe_list[0]

            if for_reference and keras_quantizer:
                input_quantizer = quantizer_factory.make_default_quantizer(
                    mode=keras_quantizer)

            update_output_quantizer_in_graph(graph, node_id, quantizer_factory,
                                             input_quantizer, for_reference)

    result = {
        "source_quantizer_list": source_quantizer_list,
        "output_layers": output_layers,
        "input_layers": input_layers,
        "layer_data_type_map": layer_data_type_map
    }

    return result
Ejemplo n.º 3
0
def GenerateGraphFromModel(model, input_quantizers, default_source_quantizer):
    """Generates single source, single sink graph from model."""

    # node represents layers with attributes [layer, type(class_name)]
    # edge represents the tensor flowing between two layers,
    # attributes is [tensor, output_shape, QA(activation quantizer]

    # input_quantizers are tagged on the edge between input
    # layer and the following layer

    # generate a list of input quantizers
    input_quantizer_list = []
    quantizer_factory = quantizer_factory_module.QuantizerFactory()
    if input_quantizers is None:
        logging.warning("************ SOURCE has no quantizer type."
                        " Use default quantizer instead")

        for _ in range(len(model.inputs)):
            input_quantizer_list.append(
                quantizer_factory.make_default_quantizer(
                    mode=default_source_quantizer))
    else:
        if len(model.inputs) == len(input_quantizers):
            for quantizer in input_quantizers:
                input_quantizer_list.append(
                    quantizer_factory.make_quantizer(quantizer))
        # pass a single quantizer which will be used for all q list.
        elif not isinstance(input_quantizers, list):
            for _ in range(len(model.inputs)):
                input_quantizer_list.append(
                    quantizer_factory.make_quantizer(input_quantizers))
        else:
            raise WrongInputQuantizerError(
                "ERROR: Numer of input (%d) must be the same as number of source"
                " quantizers (%d)" %
                (len(model.inputs), len(input_quantizers)))

    # dict that map input_tensor to its quantizer
    input_quantizer_map = {}
    for (idx, tensor) in enumerate(model.inputs):
        input_quantizer_map[
            tensor.experimental_ref()] = input_quantizer_list[idx]

    graph = nx.DiGraph()

    source = SOURCE
    sink = SINK

    node_list = [(source, {
        "layer": [None],
        "type": [None],
        "out_quantizer": None
    }), (sink, {
        "layer": [None],
        "type": [None],
        "out_quantizer": None
    })]

    graph.add_nodes_from(node_list)

    node_list = []

    for i, layer in enumerate(model.layers):

        node_type = layer.__class__.__name__

        node = (i, {
            "layer": [layer],
            "type": [node_type],
            "out_quantizer": None
        })
        node_list.append(node)

    node_dict = {layer: i for i, layer in enumerate(model.layers)}

    graph.add_nodes_from(node_list)

    # nodes = tensors
    in_nodes = {}
    out_nodes = {}
    for layer in model.layers:
        i_list = layer.input
        if not isinstance(layer.input, list):
            i_list = [i_list.experimental_ref()]
        else:
            i_list = [tmp.experimental_ref() for tmp in i_list]

        for i in i_list:
            # dict: tensor -> layers have this tensor as input
            if i not in in_nodes.keys():
                in_nodes[i] = [layer]
            else:
                in_nodes[i].append(layer)

        o_list = layer.output
        if not isinstance(layer.output, list):
            o_list = [o_list.experimental_ref()]
        else:
            o_list = [tmp.experimental_ref() for tmp in o_list]

        for o in o_list:
            # dict: tensor -> layer have this tensor as output
            if o not in out_nodes.keys():
                out_nodes[o] = [layer]
            else:
                out_nodes[o].append(layer)

    # union of all tensors; non-redundant
    attr_set = set(in_nodes.keys()) | set(out_nodes.keys())

    # add edges. we want edges annotated with tensors and shapes

    edge_list = []

    for a in attr_set:
        # for a given tensor a, find the layer u that outputs this tensor
        # and the layer v that has this tensor as input
        u_list = out_nodes.get(a, [None])
        v_list = in_nodes.get(a, [None])

        for u in u_list:
            for v in v_list:
                if not u or not v:
                    continue

                o_shape = u.output_shape

                # layer -> layer_id
                u_id = node_dict[u]
                v_id = node_dict[v]

                # insert input_quantizers on the edge between
                # input layer and its next layer
                if a in input_quantizer_map.keys():
                    edge_list.append((u_id, v_id, {
                        "shape": o_shape,
                        "tensor": a,
                        "quantizer": input_quantizer_map[a]
                    }))
                else:
                    edge_list.append((u_id, v_id, {
                        "shape": o_shape,
                        "tensor": a,
                        "quantizer": None
                    }))

    graph.add_edges_from(edge_list)

    return (graph, input_quantizer_list)
Ejemplo n.º 4
0
def generate_layer_data_type_map(graph,
                                 source_quantizer_list,
                                 is_inference,
                                 keras_quantizer=None,
                                 keras_accumulator=None,
                                 for_reference=False,
                                 debug=False):
    """main funciton to generate datatype for each layer.

  For each type of layer, this function calculates the sizes and minimum
  number of bits required to represent the parameters and variables (e.g.,
  weights, bias, multiplier and accumulator - MAC, etc.) embedded in
  these layers.

  Args:
    graph: input graph that traverses the model
    source_quantizer_list: a list of quantizers for model inputs
    is_inference: whether model is pre-trained with weights available
    keras_quantizer: default quantizer used to quantize un-quantized layers
    keras_accumulator: default MAC quantizer to quantize un-quantized layers
    for_reference: whether to generate a map for a baseline model
    debug: whether to print debug messages

  Returns:
    a result containing the following fields:
    source_quantizer_list similar as input
    output_layers: names of the layers that are output layers
    input_layers: names of the layers that are input_layers,
    layer_data_type_map: data type map of each layer
  """

    quantizer_factory = quantizer_factory_module.QuantizerFactory()
    layer_data_type_map = collections.OrderedDict()

    # get the output layers

    output_layers = []
    input_layers = []
    predecessors = list(graph.predecessors(qgraph.SINK))
    successors = list(graph.successors(qgraph.SOURCE))

    for u in predecessors:
        if u == qgraph.SOURCE or u == qgraph.SINK:
            continue
        output_layers.append(graph.nodes[u]["layer"][0])

    for u in successors:
        if u == qgraph.SOURCE or u == qgraph.SINK:
            continue
        input_layers.append(graph.nodes[u]["layer"][0])

    for node_id in nx.topological_sort(graph):
        node = graph.nodes[node_id]
        node_type = node["type"][-1]
        layer = node["layer"][0]
        is_input_layer = layer in input_layers

        w_shapes = None
        b_shapes = None
        output_shapes = None

        if hasattr(layer, "output_shape"):
            output_shapes = layer.output_shape

        if hasattr(layer, "get_weights"):
            weights = layer.get_weights()
            if len(weights) != 0:
                w_shapes = layer.get_weights()[0].shape
                b_shapes = weights[0].shape[-1]

        if debug:
            print("########")
            if layer is not None:
                print(layer.name)
            else:
                print("None")

        # Deals with keras layer or lack of input quantizer in qkeras layer.
        input_qe_list = qtools_util.get_input_quantizers_advanced(
            graph, node_id, is_input_layer, quantizer_factory, cfg)

        if input_qe_list and node_id != qgraph.SINK:
            input_quantizer_list = []
            for node in input_qe_list:
                input_quantizer_list.append(node[0])

            # Calculates number of operations (multiplication/accumulation).
            (_, edge_0) = input_qe_list[0]
            input_shape = edge_0["shape"]
            # for merge layers, all input_shape are identical
            operation_count = qtools_util.get_operation_count(
                layer, input_shape)

        # Merges layers with multiple inputs.
        if qtools_util.is_merge_layers(layer):
            # output_shapes = layer.output_shape
            merge_factory = quantized_operators.MergeFactory()
            merge_quantizer = merge_factory.make_quantizer(
                input_qe_list, layer.__class__.__name__)

            if hasattr(layer, "get_quantizers"):
                # QMerge layer from future.
                qkeras_quantizer = layer.get_quantizers()[0]
                merge_quantizer.output = quantizer_factory.make_quantizer(
                    qkeras_quantizer)
            else:
                # Merge layer is a Keras layer.
                if for_reference:
                    merge_quantizer.output = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)

                    if keras_accumulator:
                        # gate_factor and gate_bits remain the same as previously
                        # calculated; only change output quantizer as the keras_accumulator
                        merge_quantizer.output = quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator)

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, merge_quantizer.output,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, merge_quantizer, None, None, None, None,
                None, output_quantizer, output_shapes, operation_count)

        # MaxPooling/reshape/flatten/UpSampling1D/2D/3D
        elif (qtools_util.is_shape_alternation_layers(layer)
              or "UpSampling" in layer.__class__.__name__):
            input_quantizer = input_quantizer_list[0]

            # Output quantizer
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, input_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, None, None, None, None, None, None,
                output_quantizer, output_shapes, operation_count)

        # AveragePooling and GlobalAveragePooling
        elif layer.__class__.__name__ in [
                "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D",
                "GlobalAveragePooling2D", "QAveragePooling2D",
                "QGlobalAveragePooling2D"
        ]:
            (input_quantizer, _) = input_qe_list[0]

            # This is a hack. We don't want to implement a new accumulator class
            # just for averagpooling. So we re-use accumulator type in conv/dense
            # layers which need multiplier and kernel as input parameters.
            # In order to do so, we fake a multiplier which treat the pool_size as
            # the kernel. since kernel needs 4 dimension, k_h, k_w, C_in, C_out,
            # we set the last two dimension as [1, 1]
            if layer.__class__.__name__ in [
                    "AveragePooling2D", "AvgPool2D", "QAveragePooling2D"
            ]:
                pool_size = tuple(list(layer.pool_size) + [1, 1])
            else:
                pool_size = tuple(list(input_shape)[1:-1] + [1, 1])

            multiplier_factory = quantized_operators.MultiplierFactory()
            fake_multiplier = multiplier_factory.make_multiplier(
                input_quantizer, input_quantizer)
            fake_multiplier.output = input_quantizer
            accumulator_factory = quantized_operators.AccumulatorFactory()
            accumulator = accumulator_factory.make_accumulator(pool_size,
                                                               fake_multiplier,
                                                               use_bias=False)

            if layer.__class__.__name__ in [
                    "QAveragePooling2D", "QGlobalAveragePooling2D"
            ]:
                # For the quantized layer, there is an average_quantizer used for
                # the inverse of division operation.
                qkeras_average_quantizer = layer.get_quantizers()[0]
                qtools_average_quantizer = quantizer_factory.make_quantizer(
                    qkeras_average_quantizer)
                multiplier = multiplier_factory.make_multiplier(
                    accumulator.output, qtools_average_quantizer)
            else:
                multiplier = None
            if debug:
                print("accumulator:", accumulator.output.bits)

            if for_reference:
                if multiplier:
                    multiplier.output = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)
                accumulator.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_accumulator:
                    if multiplier:
                        multiplier.output = quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator)
                    accumulator.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)

            if layer.__class__.__name__ in [
                    "QAveragePooling2D", "QGlobalAveragePooling2D"
            ]:
                # If is quantized layer, last operation is multiply (averaging).
                layer_quantizer = multiplier.output
            else:
                layer_quantizer = accumulator.output
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, multiplier, accumulator, None, None,
                None, None, output_quantizer, output_shapes, operation_count)

        # If it's a Quantized Activation layer.
        elif node_type in ["QActivation", "QAdaptiveActivation", "Activation"]:

            if for_reference or not hasattr(layer, "quantizer"):
                # Keras activation layer -> use default_interm_quantizer
                layer_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    layer_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                layer_quantizer = layer.quantizer

                if not quantizer_factory.is_quantizer_supported(
                        layer_quantizer):
                    raise TagMissingError(
                        "Unsupported activation quantizer {} on this layer: {}"
                        .format(layer_quantizer, layer))

                if not layer_quantizer:
                    layer_quantizer = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, None, None, None, w_shapes, None,
                b_shapes, output_quantizer, output_shapes, operation_count)

        elif node_type in ["QBatchNormalization", "BatchNormalization"]:

            (input_quantizer, _) = input_qe_list[0]

            # QKeras layers might be mixed with keras layers.
            if for_reference or not hasattr(layer, "get_quantizers"):
                # Keras BatchNorm layer mixed with quantized model
                # -> no reference mode
                gamma_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                beta_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                mean_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                variance_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    gamma_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    beta_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    mean_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    variance_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                (qkeras_gamma_quantizer, qkeras_beta_quantizer,
                 qkeras_mean_quantizer,
                 qkeras_variance_quantizer) = layer.get_quantizers()

                if not qkeras_beta_quantizer:
                    beta_quantizer = quantizer_factory.clone_quantizer(
                        input_quantizer)
                else:
                    beta_quantizer = quantizer_factory.make_quantizer(
                        qkeras_beta_quantizer)

                if not qkeras_mean_quantizer:
                    mean_quantizer = quantizer_factory.clone_quantizer(
                        input_quantizer)
                else:
                    mean_quantizer = quantizer_factory.make_quantizer(
                        qkeras_mean_quantizer)

                if not qkeras_variance_quantizer:
                    variance_quantizer = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)
                else:
                    # If gamma is float, convert to input_quantizer.
                    variance_quantizer = quantizer_factory.make_quantizer(
                        qkeras_variance_quantizer)

                if not qkeras_gamma_quantizer:
                    gamma_quantizer = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)
                else:
                    gamma_quantizer = quantizer_factory.make_quantizer(
                        qkeras_gamma_quantizer)

            # During inference, gamma, beta and variance are constants
            # if they are po2 quantizers, we need to modify their bits
            # with actual values and also update graph with the
            # corresponding output_quantizer on the edge.
            if is_inference:
                weights = qtools_util.get_weights(layer)
                # If no scale(gamma), num_weights --
                # If no center(beta_quantizer) num_weights --
                num_weights = 4
                if not layer.scale:
                    num_weights -= 1
                if not layer.center:
                    num_weights -= 1

                if layer.scale and gamma_quantizer.is_po2:
                    gamma_quantizer.update_inference_values(weights[0])
                if variance_quantizer.is_po2:
                    variance_quantizer.update_inference_values(
                        weights[num_weights - 1])

            qbn = quantized_operators.QBNFactory()
            qbn.make_quantizer(input_quantizer, gamma_quantizer,
                               beta_quantizer, mean_quantizer,
                               variance_quantizer, layer.scale, layer.center)

            def set_output(op, output):
                if op:
                    op.output = output

            if for_reference or not hasattr(layer, "get_quantizers"):
                set_output(
                    qbn.internal_divide_quantizer,
                    quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer))

                set_output(
                    qbn.internal_multiplier,
                    quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer))

                set_output(
                    qbn.internal_accumulator,
                    quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer))

                set_output(
                    qbn.internal_output,
                    quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer))

                if keras_accumulator:
                    set_output(
                        qbn.internal_divide_quantizer,
                        quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator))

                    set_output(
                        qbn.internal_multiplier,
                        quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator))

                    set_output(
                        qbn.internal_accumulator,
                        quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator))

                    set_output(
                        qbn.internal_output.output,
                        quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator))

            layer_quantizer = qbn.internal_output.output

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            gamma_range = None
            if hasattr(layer, "gamma_range"):
                gamma_range = layer.gamma_range

            beta_range = None
            if hasattr(layer, "beta_range"):
                beta_range = layer.beta_range

            if not layer.center:
                qbn.beta_quantizer = None

            if not layer.scale:
                qbn.gamma_quantizer = None

            layer_data_type_map[layer] = {
                "input_quantizer_list": input_quantizer_list,
                "gamma_quantizer": gamma_quantizer,
                "beta_quantizer": beta_quantizer,
                "mean_quantizer": mean_quantizer,
                "variance_quantizer": variance_quantizer,
                "gamma_range": gamma_range,
                "beta_range": beta_range,
                "internal_divide_quantizer": qbn.internal_divide_quantizer,
                "internal_multiplier": qbn.internal_multiplier,
                "internal_accumulator": qbn.internal_accumulator,
                "output_quantizer": output_quantizer,
                "output_shapes": input_shape,
                "operation_count": operation_count
            }

        # If qdense, qconv, qpool, qoctave
        elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS:

            (input_quantizer, _) = input_qe_list[0]

            if for_reference or not hasattr(layer, "get_quantizers"):
                # for_reference: force all quantizers to keras_quantizer
                weight_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                bias_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    weight_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    bias_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                # qkeras layer
                qkeras_weight_quantizer = layer.get_quantizers()[0]
                qkeras_bias_quantizer = layer.get_quantizers()[1]

                if not quantizer_factory.is_quantizer_supported(
                        qkeras_weight_quantizer):
                    raise TagMissingError(
                        "Unsupported weight quantizer {} on this layer: {}".
                        format(qkeras_weight_quantizer, layer))

                if not quantizer_factory.is_quantizer_supported(
                        qkeras_bias_quantizer):
                    raise TagMissingError(
                        "Unsupported bias quantizer {} on this layer: {}".
                        format(qkeras_bias_quantizer, layer))

                weight_quantizer = quantizer_factory.make_quantizer(
                    qkeras_weight_quantizer)
                bias_quantizer = quantizer_factory.make_quantizer(
                    qkeras_bias_quantizer)

            # TODO(lishanok): During inference, if weight and bias is po2,
            #  need to update corresponding quantizer type with min and max
            #  of the constant values.
            if is_inference:
                weights = qtools_util.get_weights(layer)
                if weight_quantizer.is_po2:
                    weight_quantizer.update_inference_values(weights[0])

                if bias_quantizer.is_po2:
                    bias_quantizer.update_inference_values(weights[1])

            multiplier_factory = quantized_operators.MultiplierFactory()
            multiplier = multiplier_factory.make_multiplier(
                weight_quantizer, input_quantizer)

            weights = layer.get_weights()
            kernel = weights[0]

            accumulator_factory = quantized_operators.AccumulatorFactory()
            accumulator = accumulator_factory.make_accumulator(
                kernel.shape, multiplier)

            if not layer.use_bias:
                bias_quantizer = None

            if debug:
                print(layer.name or "None")
                print("weight_quantizer:", weight_quantizer.bits)
                print("input_quantizer:", input_quantizer.bits)
                print("multiplier_quantizer:", multiplier.output.bits)
                print("multiplier_gate_bits:", multiplier.gate_bits)
                print("accumulator:", accumulator.output.bits)

            if for_reference or not hasattr(layer, "get_quantizers"):
                accumulator.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                multiplier.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_accumulator:
                    accumulator.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
                    multiplier.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)

            layer_quantizer = accumulator.output
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, multiplier, accumulator,
                weight_quantizer, w_shapes, bias_quantizer, b_shapes,
                output_quantizer, output_shapes, operation_count)

        # Folded conv/dense/depthwiseconv layer
        elif node_type in ["QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:

            (input_quantizer, _) = input_qe_list[0]
            if for_reference or not hasattr(layer, "get_quantizers"):
                # For_reference: force all quantizers to keras_quantizer.
                weight_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                bias_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    weight_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    bias_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                # QKeras layer
                qkeras_weight_quantizer = layer.get_quantizers()[0]
                qkeras_bias_quantizer = layer.get_quantizers()[1]
                if not quantizer_factory.is_quantizer_supported(
                        qkeras_weight_quantizer):
                    raise TagMissingError(
                        "Unsupported weight quantizer {} on this layer: {}".
                        format(qkeras_weight_quantizer, layer))

                if not quantizer_factory.is_quantizer_supported(
                        qkeras_bias_quantizer):
                    raise TagMissingError(
                        "Unsupported bias quantizer {} on this layer: {}".
                        format(qkeras_bias_quantizer, layer))

                weight_quantizer = quantizer_factory.make_quantizer(
                    qkeras_weight_quantizer)

                if qkeras_bias_quantizer:
                    bias_quantizer = quantizer_factory.make_quantizer(
                        qkeras_bias_quantizer)
                else:
                    bias_quantizer = None

            # TODO(lishanok): During inference, if weight and bias is po2,
            #  need to update corresponding quantizer type with min and max
            #  of the constant values
            if is_inference:
                weights = qtools_util.get_weights(layer)
                if weight_quantizer.is_po2:
                    weight_quantizer.update_inference_values(weights[0])

                if bias_quantizer and bias_quantizer.is_po2:
                    bias_quantizer.update_inference_values(weights[1])

            multiplier_factory = quantized_operators.MultiplierFactory()
            multiplier = multiplier_factory.make_multiplier(
                weight_quantizer, input_quantizer)

            weights = layer.get_weights()
            kernel = weights[0]

            accumulator_factory = quantized_operators.AccumulatorFactory()
            accumulator = accumulator_factory.make_accumulator(
                kernel.shape,
                multiplier,
                use_bias=True if bias_quantizer else False)

            if not bias_quantizer:
                # Sets bias the same as accumulator type.
                bias_quantizer = copy.deepcopy(accumulator.output)
                if not accumulator.output.is_floating_point:
                    # For fixed point accumulator, needs to add 1 to its bits to avoid
                    # possible satuation.
                    accumulator.output.bits += 1
                    accumulator.output.int_bits += 1
            if for_reference or not hasattr(layer, "get_quantizers"):
                accumulator.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                multiplier.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_accumulator:
                    accumulator.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
                    multiplier.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)

            layer_quantizer = accumulator.output
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, multiplier, accumulator,
                weight_quantizer, w_shapes, bias_quantizer, b_shapes,
                output_quantizer, output_shapes, operation_count)

        elif node_type:
            # Any other unsupported layer types -> pass the input quantizer
            # type to output in qraph
            (input_quantizer, _) = input_qe_list[0]

            if for_reference and keras_quantizer:
                input_quantizer = quantizer_factory.make_default_quantizer(
                    mode=keras_quantizer)

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, input_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, None, None, None, None, None, None,
                output_quantizer, output_shapes, operation_count)

    result = {
        "source_quantizer_list": source_quantizer_list,
        "output_layers": output_layers,
        "input_layers": input_layers,
        "layer_data_type_map": layer_data_type_map
    }

    return result
Ejemplo n.º 5
0
def generate_layer_data_type_map(graph,
                                 source_quantizer_list,
                                 is_inference,
                                 keras_quantizer=None,
                                 keras_accumulator=None,
                                 for_reference=False,
                                 debug=False,
                                 model_weights_already_quantized=True,
                                 hw_weight_dict=None):
    """main funciton to generate datatype for each layer.

  For each type of layer, this function calculates the sizes and minimum
  number of bits required to represent the parameters and variables (e.g.,
  weights, bias, multiplier and accumulator - MAC, etc.) embedded in
  these layers.

  Args:
    graph: input graph that traverses the model
    source_quantizer_list: a list of quantizers for model inputs
    is_inference: whether model is pre-trained with weights available
    keras_quantizer: default quantizer used to quantize weights and bias
    keras_accumulator: default MAC quantizer to quantize multiplier,
      accumulator and output
    for_reference: whether to generate a map for a baseline model
    debug: whether to print debug messages
    model_weights_already_quantized: bool. If model weights are already
      quantized, no need to apply quantizer to weights here in this function.
    hw_weight_dict: weight dictonary for hardware inference. For example, fused
      bn op inference in hardware will need additional fused weights, which
      can be extracted from this dictionary. This dictionary is the output from
      utils.py/model_save_quantized_weights function.

  Returns:
    a result containing the following fields:
    source_quantizer_list similar as input
    output_layers: names of the layers that are output layers
    input_layers: names of the layers that are input_layers,
    layer_data_type_map: data type map of each layer
  """

    quantizer_factory = quantizer_factory_module.QuantizerFactory()
    layer_data_type_map = collections.OrderedDict()

    # get the output layers

    output_layers = []
    input_layers = []
    predecessors = list(graph.predecessors(qgraph.SINK))
    successors = list(graph.successors(qgraph.SOURCE))

    for u in predecessors:
        if u == qgraph.SOURCE or u == qgraph.SINK:
            continue
        output_layers.append(graph.nodes[u]["layer"][0])

    for u in successors:
        if u == qgraph.SOURCE or u == qgraph.SINK:
            continue
        input_layers.append(graph.nodes[u]["layer"][0])

    for node_id in nx.topological_sort(graph):
        node = graph.nodes[node_id]
        node_type = node["type"][-1]
        layer = node["layer"][0]
        is_input_layer = layer in input_layers

        w_shapes = None
        b_shapes = None
        output_shapes = None
        qkeras_weight_quantizer = None

        if hasattr(layer, "output_shape"):
            output_shapes = layer.output_shape

        if hasattr(layer, "get_weights"):
            weights = layer.get_weights()
            if len(weights) != 0:
                w_shapes = layer.get_weights()[0].shape
                b_shapes = weights[0].shape[-1]

        if debug:
            print("########")
            if layer is not None:
                print(layer.name)
            else:
                print("None")

        # Deals with keras layer or lack of input quantizer in qkeras layer.
        input_qe_list = qtools_util.get_input_quantizers_advanced(
            graph, node_id, is_input_layer, quantizer_factory, cfg)

        if input_qe_list and node_id != qgraph.SINK:
            input_quantizer_list = []
            for node in input_qe_list:
                input_quantizer_list.append(node[0])

            # Calculates number of operations (multiplication/accumulation).
            # Previously Merge layers's inputs all have the same shape, however, in
            # MobilenetV3 we found that there is shape broadcast in the keras
            # Multiply layer. Therefore we use the shape with max size as the
            # input shape
            if len(input_qe_list) > 0:
                maxsize = -1
                max_id = 0
                for (idx, item) in enumerate(input_qe_list):
                    shape = item[1]["shape"]
                    size = np.prod(shape[1:])
                    if size > maxsize:
                        maxsize = size
                        max_id = idx
                input_shape = input_qe_list[max_id][1]["shape"]
            else:
                (_, edge_0) = input_qe_list[0]
                input_shape = edge_0["shape"]

            operation_count = qtools_util.get_operation_count(
                layer, input_shape)

        # Merges layers with multiple inputs.
        if qtools_util.is_merge_layers(layer):

            # merge_factory.make_quantizer automatically calculates the merge output
            # quantizer bitwidth according to input quantizer type.
            merge_factory = quantized_operators.MergeFactory()
            merge_quantizer = merge_factory.make_quantizer(
                input_qe_list, layer.__class__.__name__)

            if for_reference:
                # The for_reference option overwrites the auto-calculated merge output
                # quantizer
                if keras_accumulator:
                    # gate_factor and gate_bits remain the same as previously
                    # calculated; only change output quantizer as the keras_accumulator
                    merge_quantizer.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
                else:
                    merge_quantizer.output = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, merge_quantizer.output,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, merge_quantizer, None, None, None, None,
                None, output_quantizer, output_shapes, operation_count)

        # MaxPooling/reshape/flatten/UpSampling1D/2D/3D
        elif (qtools_util.is_shape_alternation_layers(layer)
              or "UpSampling" in layer.__class__.__name__):
            input_quantizer = input_quantizer_list[0]

            # Output quantizer
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, input_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, None, None, None, None, None, None,
                output_quantizer, output_shapes, operation_count)

        # AveragePooling and GlobalAveragePooling
        elif layer.__class__.__name__ in [
                "AveragePooling2D", "AvgPool2D", "GlobalAvgPool2D",
                "GlobalAveragePooling2D", "QAveragePooling2D",
                "QGlobalAveragePooling2D"
        ]:
            (input_quantizer, _) = input_qe_list[0]
            qtools_average_quantizer = None
            # This is a hack. We don't want to implement a new accumulator class
            # just for averagpooling. So we re-use accumulator type in conv/dense
            # layers which need multiplier and kernel as input parameters.
            # In order to do so, we fake a multiplier which treat the pool_size as
            # the kernel. since kernel needs 4 dimension, k_h, k_w, C_in, C_out,
            # we set the last two dimension as [1, 1]
            if layer.__class__.__name__ in [
                    "AveragePooling2D", "AvgPool2D", "QAveragePooling2D"
            ]:
                pool_size = tuple(list(layer.pool_size) + [1, 1])
            else:
                pool_size = tuple(list(input_shape)[1:-1] + [1, 1])

            # Automatically calculates the accumulator bitwidth according to input
            # quantizer type for both quantized pooling and regular pooling layers
            multiplier_factory = quantized_operators.MultiplierFactory()
            fake_multiplier = multiplier_factory.make_multiplier(
                input_quantizer, input_quantizer)
            fake_multiplier.output = input_quantizer
            accumulator_factory = quantized_operators.AccumulatorFactory()
            accumulator = accumulator_factory.make_accumulator(pool_size,
                                                               fake_multiplier,
                                                               use_bias=False)

            # For quantized pooling layers, we also need to consider the division
            # precision, which is controlled by the average quantizer
            if layer.__class__.__name__ in [
                    "QAveragePooling2D", "QGlobalAveragePooling2D"
            ]:
                # For the quantized layer, there is an average_quantizer used for
                # the inverse of division operation.
                qkeras_average_quantizer = layer.get_quantizers()[0]
                qtools_average_quantizer = quantizer_factory.make_quantizer(
                    qkeras_average_quantizer)
                multiplier = multiplier_factory.make_multiplier(
                    accumulator.output, qtools_average_quantizer)
            else:
                multiplier = None
            if debug:
                print("accumulator:", accumulator.output.bits)

            # Re-calcualte accumulator/multiplier type when it's using
            # for_reference option
            if for_reference:
                if keras_accumulator:
                    # If keras_accumulator exists, use keras_accumulator as multiplier
                    # or accumulator type
                    if multiplier:
                        # Quantized layers need to define multiplier type
                        multiplier.output = quantizer_factory.make_default_quantizer(
                            mode=keras_accumulator)
                    accumulator.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
                else:
                    # If user didn't provide keras_accumulator, use the default settings
                    # in cfg to define multiplier/accumulator type
                    if multiplier:
                        multiplier.output = quantizer_factory.make_default_quantizer(
                            mode=cfg.default_interm_quantizer)
                    accumulator.output = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)
                layer_quantizer = accumulator.output

            # set the output quantizer
            if layer.__class__.__name__ in [
                    "QAveragePooling2D", "QGlobalAveragePooling2D"
            ]:
                # If is quantized layer, last operation is multiply (averaging).
                layer_quantizer = multiplier.output
            else:
                layer_quantizer = accumulator.output
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = {
                "input_quantizer_list": input_quantizer_list,
                "average_quantizer": qtools_average_quantizer,
                "pool_sum_accumulator": accumulator,
                "pool_avg_multiplier": multiplier,
                "output_quantizer": output_quantizer,
                "output_shapes": output_shapes,
                "operation_count": operation_count
            }

        # If it's a Quantized Activation layer.
        elif node_type in ["QActivation", "QAdaptiveActivation", "Activation"]:

            if for_reference or not hasattr(layer, "quantizer"):
                # Keras activation layer -> use default_interm_quantizer
                layer_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_accumulator:
                    layer_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
            else:
                layer_quantizer = layer.quantizer

                if not quantizer_factory.is_quantizer_supported(
                        layer_quantizer):
                    raise TagMissingError(
                        "Unsupported activation quantizer {} on this layer: {}"
                        .format(layer_quantizer, layer))

                if not layer_quantizer:
                    layer_quantizer = quantizer_factory.make_default_quantizer(
                        mode=cfg.default_interm_quantizer)

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, None, None, None, w_shapes, None,
                b_shapes, output_quantizer, output_shapes, operation_count)

        elif node_type in ["QBatchNormalization", "BatchNormalization"]:
            # If this batchnorm layer needs to be fused with the previous layer,
            # we pass the input quantizer type as the output type in qraph.

            (input_quantizer, _) = input_qe_list[0]

            if (hw_weight_dict is not None
                    and hw_weight_dict[layer.name]["enable_bn_fusing"]):
                if for_reference and keras_accumulator and not is_input_layer:
                    input_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
                output_quantizer = update_output_quantizer_in_graph(
                    graph, node_id, quantizer_factory, input_quantizer,
                    for_reference)
                layer_data_type_map[layer] = {
                    "input_quantizer_list": input_quantizer_list,
                    "output_quantizer": output_quantizer,
                    "output_shapes": input_shape,
                    "operation_count": operation_count
                }
            else:
                (gamma_quantizer, beta_quantizer, mean_quantizer,
                 variance_quantizer,
                 _) = get_bn_quantizers(layer, quantizer_factory, cfg,
                                        keras_quantizer, input_quantizer,
                                        is_inference, for_reference,
                                        model_weights_already_quantized)

                qbn = quantized_operators.QBNFactory()
                qbn.make_quantizer(input_quantizer, gamma_quantizer,
                                   beta_quantizer, mean_quantizer,
                                   variance_quantizer, layer.scale,
                                   layer.center)

                def set_output(op, output):
                    if op:
                        op.output = output

                if for_reference or not hasattr(layer, "get_quantizers"):
                    set_output(
                        qbn.internal_divide_quantizer,
                        quantizer_factory.make_default_quantizer(
                            mode=cfg.default_interm_quantizer))

                    set_output(
                        qbn.internal_multiplier,
                        quantizer_factory.make_default_quantizer(
                            mode=cfg.default_interm_quantizer))

                    set_output(
                        qbn.internal_accumulator,
                        quantizer_factory.make_default_quantizer(
                            mode=cfg.default_interm_quantizer))

                    set_output(
                        qbn.internal_output,
                        quantizer_factory.make_default_quantizer(
                            mode=cfg.default_interm_quantizer))

                    if keras_accumulator:
                        set_output(
                            qbn.internal_divide_quantizer,
                            quantizer_factory.make_default_quantizer(
                                mode=keras_accumulator))

                        set_output(
                            qbn.internal_multiplier,
                            quantizer_factory.make_default_quantizer(
                                mode=keras_accumulator))

                        set_output(
                            qbn.internal_accumulator,
                            quantizer_factory.make_default_quantizer(
                                mode=keras_accumulator))

                        set_output(
                            qbn.internal_output.output,
                            quantizer_factory.make_default_quantizer(
                                mode=keras_accumulator))

                gamma_range = None
                if hasattr(layer, "gamma_range"):
                    gamma_range = layer.gamma_range

                beta_range = None
                if hasattr(layer, "beta_range"):
                    beta_range = layer.beta_range

                if not layer.center:
                    qbn.beta_quantizer = None

                if not layer.scale:
                    qbn.gamma_quantizer = None

                layer_quantizer = qbn.internal_output.output
                output_quantizer = update_output_quantizer_in_graph(
                    graph, node_id, quantizer_factory, layer_quantizer,
                    for_reference)
                layer_data_type_map[layer] = {
                    "input_quantizer_list": input_quantizer_list,
                    "gamma_quantizer": gamma_quantizer,
                    "beta_quantizer": beta_quantizer,
                    "mean_quantizer": mean_quantizer,
                    "variance_quantizer": variance_quantizer,
                    "gamma_range": gamma_range,
                    "beta_range": beta_range,
                    "internal_divide_quantizer": qbn.internal_divide_quantizer,
                    "internal_multiplier": qbn.internal_multiplier,
                    "internal_accumulator": qbn.internal_accumulator,
                    "output_quantizer": output_quantizer,
                    "output_shapes": input_shape,
                    "operation_count": operation_count
                }
        # If qdense, qconv, qpool, qoctave
        elif node_type in QKERAS_LAYERS or node_type in KERAS_LAYERS:
            (input_quantizer, _) = input_qe_list[0]

            if for_reference or not hasattr(layer, "get_quantizers"):
                # for_reference: force all quantizers to keras_quantizer
                weight_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                bias_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    weight_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    bias_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                # qkeras layer
                qkeras_weight_quantizer = layer.get_quantizers()[0]
                qkeras_bias_quantizer = layer.get_quantizers()[1]

                if not quantizer_factory.is_quantizer_supported(
                        qkeras_weight_quantizer):
                    raise TagMissingError(
                        "Unsupported weight quantizer {} on this layer: {}".
                        format(qkeras_weight_quantizer, layer))

                if not quantizer_factory.is_quantizer_supported(
                        qkeras_bias_quantizer):
                    raise TagMissingError(
                        "Unsupported bias quantizer {} on this layer: {}".
                        format(qkeras_bias_quantizer, layer))

                weight_quantizer = quantizer_factory.make_quantizer(
                    qkeras_weight_quantizer)
                bias_quantizer = quantizer_factory.make_quantizer(
                    qkeras_bias_quantizer)

            # TODO(lishanok): During inference, if weight and bias is po2,
            #  need to update corresponding quantizer type with min and max
            #  of the constant values.
            if is_inference:
                weights = qtools_util.get_weights(
                    layer, model_weights_already_quantized)
                if weight_quantizer.is_po2:
                    weight_quantizer.update_inference_values(weights[0])

                if bias_quantizer.is_po2:
                    bias_quantizer.update_inference_values(weights[1])

            multiplier_factory = quantized_operators.MultiplierFactory()
            multiplier = multiplier_factory.make_multiplier(
                weight_quantizer, input_quantizer)
            if qkeras_weight_quantizer:
                qtools_util.adjust_multiplier_for_auto_po2(
                    multiplier, qkeras_weight_quantizer)
            weights = layer.get_weights()
            kernel = weights[0]

            kernel_accumulator_factory = quantized_operators.AccumulatorFactory(
            )
            # Set use_bias=False so that the accumulator doesn't account for bias
            # bitwdith
            kernel_accumulator = kernel_accumulator_factory.make_accumulator(
                kernel.shape, multiplier, use_bias=False)

            if not layer.use_bias:
                bias_quantizer = None
                accumulator = kernel_accumulator
            else:
                # Add bias quantizer bitwidth to the overall accumulator
                bias_accumulator_instance = adder_factory.IAdder()
                accumulator = bias_accumulator_instance.make_quantizer(
                    kernel_accumulator.output, bias_quantizer)
            if debug:
                print(layer.name or "None")
                print("weight_quantizer:", weight_quantizer.bits)
                print("input_quantizer:", input_quantizer.bits)
                print("multiplier_quantizer:", multiplier.output.bits)
                print("multiplier_gate_bits:", multiplier.gate_bits)
                print("accumulator:", accumulator.output.bits)

            if for_reference or not hasattr(layer, "get_quantizers"):
                accumulator.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                multiplier.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_accumulator:
                    accumulator.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
                    multiplier.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)

            if (hw_weight_dict is not None
                    and hw_weight_dict.get(layer.name, None) and
                    hw_weight_dict[layer.name].get("enable_bn_fusing", None)):
                bn_layer_name = hw_weight_dict[
                    layer.name]["fused_bn_layer_name"]
                successor_ids = list(graph.successors(node_id))
                bn_layer = graph.nodes[successor_ids[0]]["layer"][0]
                assert bn_layer.name == bn_layer_name, (
                    "Batchnorm layer in the graph has different name from hw_weight"
                    f"_dict: {layer.name} vs {bn_layer_name}. Check both places to "
                    "ensure they are matching.")

                # Add additional datatype for bn fused weights
                (gamma_quantizer, beta_quantizer, mean_quantizer,
                 variance_quantizer, inverse_quantizer) = get_bn_quantizers(
                     bn_layer, quantizer_factory, cfg, keras_quantizer,
                     input_quantizer, is_inference, for_reference,
                     model_weights_already_quantized)

                fused_bn = FusedBNFactory()
                fused_bn.make_quantizer(
                    prev_output_quantizer=kernel_accumulator.output,
                    prev_bias_quantizer=bias_quantizer,
                    beta_quantizer=beta_quantizer,
                    mean_quantizer=mean_quantizer,
                    inverse_quantizer=inverse_quantizer,
                    use_beta=bn_layer.center,
                    use_bias=layer.use_bias,
                )
                if for_reference or not hasattr(layer, "get_quantizers"):
                    fused_bn.internal_accumulator.output = (
                        quantizer_factory.make_default_quantizer(
                            mode=cfg.default_interm_quantizer))
                    if keras_accumulator:
                        fused_bn.internal_accumulator.output = (
                            quantizer_factory.make_default_quantizer(
                                mode=keras_accumulator))
                    fused_bn.internal_output.output = fused_bn.internal_accumulator.output

                layer_quantizer = fused_bn.internal_accumulator.output
                output_quantizer = update_output_quantizer_in_graph(
                    graph, node_id, quantizer_factory, layer_quantizer,
                    for_reference)
                layer_data_type_map[layer] = {
                    "input_quantizer_list": input_quantizer_list,
                    "multiplier": multiplier,
                    "accumulator": accumulator,
                    "weight_quantizer": weight_quantizer,
                    "w_shapes": w_shapes,
                    "bias_quantizer": bias_quantizer,
                    "b_shapes": b_shapes,
                    "bn_inverse_quantizer": inverse_quantizer,
                    "bn_mean_quantizer": mean_quantizer,
                    "bn_beta_quantizer": beta_quantizer,
                    "fused_accumulator": fused_bn.internal_accumulator,
                    "output_quantizer": output_quantizer,
                    "output_shapes": output_shapes,
                    "operation_count": operation_count
                }
            else:
                layer_quantizer = accumulator.output
                output_quantizer = update_output_quantizer_in_graph(
                    graph, node_id, quantizer_factory, layer_quantizer,
                    for_reference)

                layer_data_type_map[layer] = LayerDataType(
                    input_quantizer_list, multiplier, accumulator,
                    weight_quantizer, w_shapes, bias_quantizer, b_shapes,
                    output_quantizer, output_shapes, operation_count)
        elif node_type in ["QConv2DBatchnorm", "QDepthwiseConv2DBatchnorm"]:
            # Datatype for Folded Conv/DepthwiseConv layer
            # TODO(lishanok): Add additional support for Folded Dense layer
            (input_quantizer, _) = input_qe_list[0]
            if for_reference or not hasattr(layer, "get_quantizers"):
                # For_reference: force all quantizers to keras_quantizer.
                weight_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                bias_quantizer = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_quantizer:
                    weight_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
                    bias_quantizer = quantizer_factory.make_default_quantizer(
                        mode=keras_quantizer)
            else:
                # QKeras layer
                qkeras_weight_quantizer = layer.get_quantizers()[0]
                qkeras_bias_quantizer = layer.get_quantizers()[1]
                if not quantizer_factory.is_quantizer_supported(
                        qkeras_weight_quantizer):
                    raise TagMissingError(
                        "Unsupported weight quantizer {} on this layer: {}".
                        format(qkeras_weight_quantizer, layer))

                if not quantizer_factory.is_quantizer_supported(
                        qkeras_bias_quantizer):
                    raise TagMissingError(
                        "Unsupported bias quantizer {} on this layer: {}".
                        format(qkeras_bias_quantizer, layer))

                weight_quantizer = quantizer_factory.make_quantizer(
                    qkeras_weight_quantizer)

                if qkeras_bias_quantizer:
                    bias_quantizer = quantizer_factory.make_quantizer(
                        qkeras_bias_quantizer)
                else:
                    bias_quantizer = None

            # TODO(lishanok): During inference, if weight and bias is po2,
            #  need to update corresponding quantizer type with min and max
            #  of the constant values
            if is_inference:
                weights = qtools_util.get_weights(
                    layer, model_weights_already_quantized)
                if weight_quantizer.is_po2:
                    weight_quantizer.update_inference_values(weights[0])

                if bias_quantizer and bias_quantizer.is_po2:
                    bias_quantizer.update_inference_values(weights[1])

            multiplier_factory = quantized_operators.MultiplierFactory()
            multiplier = multiplier_factory.make_multiplier(
                weight_quantizer, input_quantizer)
            if qkeras_weight_quantizer:
                qtools_util.adjust_multiplier_for_auto_po2(
                    multiplier, qkeras_weight_quantizer)
            weights = layer.get_weights()
            kernel = weights[0]

            accumulator_factory = quantized_operators.AccumulatorFactory()
            accumulator = accumulator_factory.make_accumulator(
                kernel.shape,
                multiplier,
                use_bias=True if bias_quantizer else False)

            if not bias_quantizer:
                # Set bias the same as accumulator type.
                bias_quantizer = copy.deepcopy(accumulator.output)
                if not accumulator.output.is_floating_point:
                    # For fixed point accumulator, needs to add 1 to its bits to avoid
                    # possible satuation.
                    accumulator.output.bits += 1
                    accumulator.output.int_bits += 1
            if for_reference or not hasattr(layer, "get_quantizers"):
                accumulator.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)
                multiplier.output = quantizer_factory.make_default_quantizer(
                    mode=cfg.default_interm_quantizer)

                if keras_accumulator:
                    accumulator.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)
                    multiplier.output = quantizer_factory.make_default_quantizer(
                        mode=keras_accumulator)

            layer_quantizer = accumulator.output
            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, layer_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, multiplier, accumulator,
                weight_quantizer, w_shapes, bias_quantizer, b_shapes,
                output_quantizer, output_shapes, operation_count)

        elif node_type:
            # Any other unsupported layer types -> pass the input quantizer
            # type to output in qraph
            (input_quantizer, _) = input_qe_list[0]

            if for_reference and keras_accumulator and not is_input_layer:
                input_quantizer = quantizer_factory.make_default_quantizer(
                    mode=keras_accumulator)

            output_quantizer = update_output_quantizer_in_graph(
                graph, node_id, quantizer_factory, input_quantizer,
                for_reference)

            layer_data_type_map[layer] = LayerDataType(
                input_quantizer_list, None, None, None, None, None, None,
                output_quantizer, output_shapes, operation_count)

    result = {
        "source_quantizer_list": source_quantizer_list,
        "output_layers": output_layers,
        "input_layers": input_layers,
        "layer_data_type_map": layer_data_type_map
    }

    return result