Exemple #1
0
def convert_const_node(node, shape):
    """convert tf const node into relay const or var"""

    # get the value of the constant
    tensor_value = node.attr["value"].tensor
    np_array = tensor_util.MakeNdarray(tensor_value)

    if np_array.dtype == np.dtype(object):
        if shape and node.name in shape:
            var_shape = shape[node.name]
        else:
            var_shape = tensor_util.TensorShapeProtoToList(
                tensor_value.tensor_shape)
        param = None
        sym = [_expr.var(node.name, shape=var_shape, dtype="uint8")]
        return sym, param

    if len(np_array.shape) == 0:
        param = None
        sym = [tvm.relay.const(np_array, np_array.dtype)]
    else:
        param = tvm.nd.array(np_array)
        sym = [_expr.var(node.name, shape=param.shape, dtype=param.dtype)]

    return sym, param
Exemple #2
0
def octize_weight_int8_signed(input_node):
    input_tensor = input_node.attr["value"].tensor
    tensor_value = tensor_util.MakeNdarray(input_tensor)
    tensor_shape = input_tensor.tensor_shape
    tensor_shape_list = tensor_util.TensorShapeProtoToList(tensor_shape)
    nmax = max(abs(tensor_value.max()), abs(tensor_value.min()))
    scale = nmax / 127.
    b = np.zeros(shape=(tensor_shape_list[1]), dtype=float)
    transpose_tensor_value = np.zeros(shape=(tensor_shape_list[1],
                                             tensor_shape_list[0]),
                                      dtype=float)
    tensor_value = np.round(tensor_value / scale)
    for i in range(tensor_shape_list[0]):
        for j in range(tensor_shape_list[1]):
            b[j] += tensor_value[i, j] * 127
    for i in range(tensor_shape_list[1]):
        for j in range(tensor_shape_list[0]):
            transpose_tensor_value[i][j] = tensor_value[j][i]

    new_node = create_constant_node(
        input_node.name,
        transpose_tensor_value,
        dtypes.qint8,
        shape=[tensor_shape_list[1], tensor_shape_list[0]])
    return new_node, scale, b
Exemple #3
0
def _replace_incomplete_shape_placeholers(gdef,
                                          import_prefix_name="NM_IMPORT"):
    """
    Replace placeholders of incomplete shapes with new ones with incomplete dimensions
    being replaced by size of 1

    :param: gdef Graph definition
    :param import_prefix_name Prefix used for the resulting graph
    """
    placeholders = [o for o in gdef.node if o.op == "Placeholder"]
    input_map = {}
    for pl in placeholders:
        dtype = tf_compat.as_dtype(pl.attr["dtype"].type)
        shape = tensor_util.TensorShapeProtoToList(pl.attr["shape"].shape)
        new_shape = [1 if d == -1 else d for d in shape]
        new_pl = tf_compat.placeholder(dtype,
                                       shape=new_shape,
                                       name="new_{}".format(pl.name))
        input_map[pl.name] = new_pl

    # Get correct import_graph_def function for TF version
    import_graph_def = (tf_compat.graph_util.import_graph_def if hasattr(
        tf_compat.graph_util, "import_graph_def") else
                        tf_compat.import_graph_def)
    import_graph_def(gdef, input_map=input_map, name=import_prefix_name)
Exemple #4
0
    def _parse_param(self, key, value, name, shape):
        try:
            from tensorflow.python.framework import tensor_util
        except ImportError as e:
            raise ImportError("Unable to import tensorflow which is required {}".format(e))

        if key == "value":
            np_array = tensor_util.MakeNdarray(value.tensor)

            if np_array.dtype == np.dtype(object):
                # Object types are generally tensorflow DT_STRING (DecodeJpeg op).
                # Just leave it as placeholder.
                if shape and name in shape:
                    var_shape = shape[name]
                else:
                    var_shape = tensor_util.TensorShapeProtoToList(value.tensor.tensor_shape)
                self._nodes[name] = [_expr.var(name, shape=var_shape, dtype="uint8")]
                return

            array_ndim = len(np_array.shape)
            if array_ndim == 0:
                self._nodes[name] = [tvm.relay.const(np_array, np_array.dtype)]
            else:
                self._params[name] = tvm.nd.array(np_array)
                self._nodes[name] = [
                    _expr.var(name, shape=self._params[name].shape, dtype=self._params[name].dtype)
                ]
        else:
            if key not in ("dtype", "_output_shapes", "_class"):
                raise NotImplementedError(
                    "Other attributes for a Const(param) Node {} ? .".format(key)
                )
Exemple #5
0
def quantize_weight_rounded(input_node):
    input_tensor = input_node.attr["value"].tensor
    tensor_value = tensor_util.MakeNdarray(input_tensor)
    shape = input_tensor.tensor_shape
    num_buckets = 1 << FLAGS.bitdepth
    tensor_value_rounded = quantize_array(tensor_value, num_buckets)
    tensor_shape_list = tensor_util.TensorShapeProtoToList(shape)
    return [
        create_constant_node(input_node.name,
                             tensor_value_rounded,
                             dtypes.float32,
                             shape=tensor_shape_list)
    ]
Exemple #6
0
def quantize_weight_eightbit(input_node, quantization_mode):
    """Returns replacement nodes for input_node using the Dequantize op."""
    base_name = input_node.name + "_"
    quint8_const_name = base_name + "quint8_const"
    min_name = base_name + "min"
    max_name = base_name + "max"
    float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
    min_value = np.min(float_tensor.flatten())
    max_value = np.max(float_tensor.flatten())
    # Make sure that the range includes zero.
    if min_value > 0.0:
        min_value = 0.0
    # min_value == max_value is a tricky case. It can occur for general
    # tensors, and of course for scalars. The quantized ops cannot deal
    # with this case, so we set max_value to something else.
    # It's a tricky question what is the numerically best solution to
    # deal with this degeneracy.
    # TODO(petewarden): Better use a tolerance than a hard comparison?
    if min_value == max_value:
        if abs(min_value) < 0.000001:
            max_value = min_value + 1.0
        elif min_value > 0:
            max_value = 2 * min_value
        else:
            max_value = min_value / 2.0

    sess = session.Session()
    with sess.as_default():
        quantize_op = array_ops.quantize_v2(float_tensor,
                                            min_value,
                                            max_value,
                                            dtypes.quint8,
                                            mode=quantization_mode)
        quint8_tensor = quantize_op[0].eval()
        min_value = quantize_op[1].eval()
        max_value = quantize_op[2].eval()
    shape = tensor_util.TensorShapeProtoToList(
        input_node.attr["value"].tensor.tensor_shape)
    quint8_const_node = create_constant_node(quint8_const_name,
                                             quint8_tensor,
                                             dtypes.quint8,
                                             shape=shape)

    dtype = dtypes.as_dtype(input_node.attr["dtype"].type)
    min_node = create_constant_node(min_name, min_value, dtypes.float32)
    max_node = create_constant_node(max_name, max_value, dtypes.float32)
    dequantize_node = create_node("Dequantize", input_node.name,
                                  [quint8_const_name, min_name, max_name])
    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
    set_attr_string(dequantize_node, "mode", quantization_mode)
    return [quint8_const_node, min_node, max_node, dequantize_node]
Exemple #7
0
def intel_cpu_quantize_weight_eightbit(input_node, quantization_mode="SCALED"):
    """Returns replacement of constant weight node.

    This function creates (i) a quantized constant node, (ii) a float min node
    (iii) a float max node, and (iv) a dequantize node."""
    base_name = input_node.name + "_"
    qint8_const_name = base_name + "qint8_const"
    min_name = base_name + "min"
    max_name = base_name + "max"
    float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
    min_value = np.min(float_tensor.flatten())
    max_value = np.max(float_tensor.flatten())
    # Same processing of min-max as in quantize_weight_eightbit function.
    if min_value > 0.0:
        min_value = 0.0
    if min_value == max_value:
        if abs(min_value) < 0.000001:
            max_value = min_value + 1.0
        elif min_value > 0:
            max_value = 2 * min_value
        else:
            max_value = min_value / 2.0

    sess = session.Session()
    with sess.as_default():
        quantize_op = array_ops.quantize_v2(float_tensor,
                                            min_value,
                                            max_value,
                                            dtypes.qint8,
                                            mode=quantization_mode,
                                            round_mode="HALF_TO_EVEN")
        qint8_tensor = quantize_op[0].eval()
        # Updated min-max values should be passed to the next feeding node.
        min_value = quantize_op[1].eval()
        max_value = quantize_op[2].eval()
    shape = tensor_util.TensorShapeProtoToList(
        input_node.attr["value"].tensor.tensor_shape)
    qint8_const_node = create_constant_node(qint8_const_name,
                                            qint8_tensor,
                                            dtypes.qint8,
                                            shape=shape)
    min_node = create_constant_node(min_name, min_value, dtypes.float32)
    max_node = create_constant_node(max_name, max_value, dtypes.float32)

    dequantize_node = create_node("Dequantize", input_node.name,
                                  [qint8_const_name, min_name, max_name])
    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
    set_attr_string(dequantize_node, "mode", b'SCALED')
    return [qint8_const_node, min_node, max_node, dequantize_node]
Exemple #8
0
def quantize_weight_rounded(input_node):
  """Returns a replacement node for input_node containing bucketed floats."""
  input_tensor = input_node.attr["value"].tensor
  tensor_value = tensor_util.MakeNdarray(input_tensor)
  shape = input_tensor.tensor_shape
  # Currently, the parameter FLAGS.bitdepth is used to compute the
  # number of buckets as 1 << FLAGS.bitdepth, meaning the number of
  # buckets can only be a power of 2.
  # This could be fixed by introducing a new parameter, num_buckets,
  # which would allow for more flexibility in chosing the right model
  # size/accuracy tradeoff. But I didn't want to add more parameters
  # to this script than absolutely necessary.
  num_buckets = 1 << FLAGS.bitdepth
  tensor_value_rounded = quantize_array(tensor_value, num_buckets)
  tensor_shape_list = tensor_util.TensorShapeProtoToList(shape)
  return [create_constant_node(input_node.name, tensor_value_rounded,
                               tf.float32, shape=tensor_shape_list)]
Exemple #9
0
def convert_placeholder(shape, node, in_type=None):
    """convert tf placeholder into relay var.

    Example
    --------
    a tf placeholder with name "x" is converted to [Var(x, ty=TensorType([], float32))]
    """

    if shape and node.name in shape:
        input_shape = list(shape[node.name])
    else:
        input_shape = tensor_util.TensorShapeProtoToList(node.attr["shape"].shape)
        for idx, dim in enumerate(input_shape):
            if dim < 0:
                input_shape[idx] = Any()
    attr = parse_attr(node.attr)
    if in_type is not None:
        sym = [_expr.var(node.name, type_annotation=in_type)]
    else:
        sym = [_expr.var(node.name, shape=input_shape, dtype=attr["dtype"].name)]
    return input_shape, sym
Exemple #10
0
    def _Predict(self, input_data: Any, num_inputs: int,
                 model_config: ModelConfig) -> List[List[float]]:
        """Get prediction from the model server.

    Send the provided input data to the model server over gRPC and returns
    the response.

    Args:
      input_data: Input data fed into the model.
      num_inputs: Number of model inputs (should be set to 1 for non-batch).
      model_config: Configuration for the model to be called.

    Returns:
      The prediction response from TF serving.

    Raises:
      exception.CustomExceptionError: If the shape of response is incompatible
      or if the model server returns an error and it sets the status code to
      INTERNAL.
    """
        try:
            req = predict_pb2.PredictRequest()
            req.model_spec.name = model_config.name
            req.model_spec.signature_name = model_config.signature
            req.inputs[model_config.input_key].CopyFrom(
                tf.make_tensor_proto(input_data, shape=[num_inputs]))
            resp = self._InvokePredictRequest(req)
        except Exception as e:
            raise exception.CustomExceptionError(str(e),
                                                 code_pb2.Code.INTERNAL)
        floats = []
        for r in model_config.response:
            value = resp.outputs[r.key]
            shape = tensor_util.TensorShapeProtoToList(value.tensor_shape)
            if not tf.TensorShape(shape).is_compatible_with(r.shape):
                raise exception.CustomExceptionError(
                    'Model returned invalid shape {}, want {}'.format(
                        shape, r.shape), code_pb2.Code.INTERNAL)
            floats.append(value.float_val[:])
        return floats
Exemple #11
0
def quantize_weight_eightbit(input_node, quantization_mode):
    base_name = input_node.name + "_"
    quint8_const_name = base_name + "quint8_const"
    min_name = base_name + "min"
    max_name = base_name + "max"
    float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
    min_value = np.min(float_tensor.flatten())
    max_value = np.max(float_tensor.flatten())
    if min_value > 0.0:
        min_value = 0.0
    if min_value == max_value:
        if abs(min_value) < 0.000001:
            max_value = min_value + 1.0
        elif min_value > 0:
            max_value = 2 * min_value
        else:
            max_value = min_value / 2.0

    sess = session.Session()
    with sess.as_default():
        quantize_op = array_ops.quantize_v2(float_tensor,
                                            min_value,
                                            max_value,
                                            dtypes.quint8,
                                            mode=quantization_mode)
        quint8_tensor = quantize_op[0].eval()
    shape = tensor_util.TensorShapeProtoToList(
        input_node.attr["value"].tensor.tensor_shape)
    quint8_const_node = create_constant_node(quint8_const_name,
                                             quint8_tensor,
                                             dtypes.quint8,
                                             shape=shape)
    min_node = create_constant_node(min_name, min_value, dtypes.float32)
    max_node = create_constant_node(max_name, max_value, dtypes.float32)
    dequantize_node = create_node("Dequantize", input_node.name,
                                  [quint8_const_name, min_name, max_name])
    set_attr_dtype(dequantize_node, "T", dtypes.quint8)
    set_attr_string(dequantize_node, "mode", quantization_mode)
    return [quint8_const_node, min_node, max_node, dequantize_node]
Exemple #12
0
    def from_tensorflow(self, graph):
        """Construct nnvm nodes from tensorflow  graph definition - GraphDef.

        Follow the tensorflow graph definition to parse and convert it to NNVM.
        Some of the assumptions listed below.

            -> First Placeholder or Const node will be considered as graph input.
            -> Rest all Const nodes are params.
            -> Last node is assumed as graph output.
            -> _output_shapes : Attribute should present in the tenserflow forzen graph.
            -> DecodeJpeg, ResizeBilinear: These are dummy operators.
                                           Hence user should handle preprocessing outside.
            -> CheckNumerics: No implementation as of now for this.
                              Just copies input to output.

        TODO: Change algorithm to stop treating first 'Const' in a special way.

        Parameters
        ----------
        graph : tensorflow graph definition object
            The loaded tensorflow GraphDef

        Returns
        -------
        sym : nnvm.sym.Symbol
            The returned nnvm symbol
        params : dict
            A dict of name: tvm.nd.array pairs, used as pretrained weights
        """

        try:
            from tensorflow.python.framework import tensor_util
        except ImportError as e:
            raise ImportError(
                "Unable to import tensorflow which is required {}".format(e))

        missing_operators = self._parse_import_prerequisites(graph)

        if missing_operators:
            raise NotImplementedError( \
                "The following operators are not implemented: {}".format(missing_operators))

        # Parse the nodes to re-create TF graph using Symbol API of NNVM
        for node in graph.node:
            # Tensorflow doesn't have seperate list for params extraction.
            # Operator name 'Const' is treated as a parameter to build NNVM params dict.

            input_shapes = {}

            attr = self._parse_attr(node.attr)

            #Variable converted to Const will not have only value attr
            if 'value' in attr and node.op == 'Const':
                tensor_value = attr['value']
                self._output_shapes[node.name] = \
                    [tensor_util.TensorShapeProtoToList( \
                        tensor_value.tensor_shape)]
            elif '_output_shapes' in attr:
                self._output_shapes[node.name] = \
                    [tensor_util.TensorShapeProtoToList(shape) \
                    for shape in attr['_output_shapes']]
            else:
                raise NotImplementedError( \
                    "Please freeze the graph with add_shapes=True")

            if node.op == "Placeholder":
                self._nodes[node.name] = _sym.Variable(
                    name=node.name, shape=self._output_shapes[node.name][0])

                #input_shapes[self._nodes[node.name]] = self._output_shapes[node.name]
            elif node.op == "Const":
                # All Const nodes are Param nodes, lets parse
                self._num_param += 1
                for key, value in node.attr.items():
                    self._parse_param(key, value, node.name)
                if node.name not in self._nodes:
                    raise NotImplementedError( \
                        "Const {} couldn't be converted to Param.".format(node.name))

                attr = self._parse_attr(node.attr)

            else:
                # Pass the parsed shapes instead
                attr["_output_shapes"] = self._output_shapes[node.name]

                # Pass the node name too in attr
                attr["_node_name"] = node.name

                #ToDo: Some of the tensorflow operators internaly maintain
                #execution layers and its output name will the layer number along with
                #graph node name.eg: Node name:- 'Model/RNN/cell_0/RnnCell', but the
                #output name will be 'Model/RNN/cell_0/RnnCell:0'. In this case,
                #the digit has to be ignored.
                if ":" in node.input[0]:
                    in_name, _ = node.input[0].split(':')
                    node.input[0] = in_name

                # Fill shapes for all inputs in a list
                try:
                    inputs = [self._nodes[i] for i in node.input]
                    for i in node.input:
                        input_shapes[self._nodes[i]] = self._output_shapes[i]
                    attr['_input_shapes'] = input_shapes
                except KeyError:
                    # TODO: Need to find clean way to handle '^CheckNumerics'
                    pass

                inputs = self._fix_extranodes(node.op, attr, inputs)

                op = self._convert_operator(node.op, inputs, attr, graph)
                # Assuming only one output.
                self._nodes[node.name] = op
                node_output = op

        # Assume the final node is the output node
        out = node_output

        #Add the RNN outputs also with 'head' nodes of the nnvm graph
        if self._num_rnn_layer:
            out_rnn = _sym.concatenate(*self._out_rnn, axis=0)
            out = [out, out_rnn]

        if isinstance(out, list):
            out = _sym.Group(out)

        return out, self._params
Exemple #13
0
def _PlaceholderShape(op):
  given_shape = tensor_util.TensorShapeProtoToList(op.get_attr("shape"))
  if given_shape:
    return [tensor_shape.TensorShape(given_shape)]
  else:
    return [tensor_shape.unknown_shape()]
Exemple #14
0
    def _get_relay_func(self, graph, layout="NHWC", shape=None, outputs=None):
        """Construct relay nodes from tensorflow graph definition - GraphDef.

        Follow the tensorflow graph definition to parse and convert it to Relay.
        Some of the assumptions listed below.

            -> All Placeholders are considered as graph input.
            -> All Const nodes are params.
            -> Last node is assumed as graph output.
            -> _output_shapes : Graph should be frozen with add_shapes=True.
                                Or user can pass input shape dictionary optionally.
            -> DecodeJpeg, ResizeBilinear: These are dummy operators.
                                           Hence user should handle preprocessing outside.
            -> CheckNumerics: No implementation as of now for this.
                              Just copies input to output.

        Parameters
        ----------
        graph : tensorflow graph definition object
            The loaded tensorflow GraphDef

        layout : target layout to be used (Optional)
            NCHW only supported now to enable NHWC models on GPU.

        shape : Dictionary of input dimensions (Optional)
            Graph level input shape dictionary.

        outputs : List of output tensor names (Optional)
            if not specified then the last node is assumed as graph output.

        Returns
        -------
        mod : tvm.IRModule
            The module that optimizations will be performed on.

        params : dict
            A dict of name: tvm.nd.array pairs, used as pretrained weights
        """
        try:
            from tensorflow.python.framework import tensor_util
        except ImportError as e:
            raise ImportError(
                "Unable to import tensorflow which is required {}".format(e))

        missing_operators = self._parse_import_prerequisites(graph)
        control_flow_nodes = []
        ta_write_nodes = []
        ta_gather_nodes = []
        ta_construct_nodes = []
        self._in_shape = shape
        self._layout = layout
        self._graph = graph

        if missing_operators:
            freezed_ops = [
                op for op in missing_operators
                if op in _freezed_graph_pruned_op_list
            ]
            if freezed_ops:
                raise Exception("Graph is not frozen. Provide a frozen graph. "
                                "Found operators {}".format(freezed_ops))

            raise NotImplementedError(
                "The following operators are not implemented: {}".format(
                    missing_operators))

        for node in graph.node:
            node_name_prefix = node.name.rsplit("/", 1)[0]
            self._control_flow_node_map[node_name_prefix].add(node.op)
            self._tf_node_map[node.name] = node

            # Parse output_shapes attribute
            parsed_attr = self._parse_attr(node.attr)
            if "_output_shapes" in parsed_attr:
                self._output_shapes[node.name] = [
                    tensor_util.TensorShapeProtoToList(tshape)
                    for tshape in parsed_attr["_output_shapes"]
                ]
            else:
                self._output_shapes[node.name] = [None]

            # Parse placeholder and const here since input shape info is required.
            if node.op == "Placeholder" or node.op == "PlaceholderWithDefault":
                # Give priority to user argument.
                if shape and node.name in shape:
                    self._input_shapes[node.name] = list(shape[node.name])
                else:
                    self._input_shapes[
                        node.name] = tensor_util.TensorShapeProtoToList(
                            node.attr["shape"].shape)
                    for idx, dim in enumerate(self._input_shapes[node.name]):
                        if dim < 0:
                            self._input_shapes[node.name][idx] = Any()

                self._output_shapes[node.name] = [
                    self._input_shapes[node.name]
                ]
                attr = self._parse_attr(node.attr)
                self._nodes[node.name] = [
                    _expr.var(node.name,
                              shape=self._input_shapes[node.name],
                              dtype=attr["dtype"].name)
                ]

                # Ignore user's input shape for Non placeholder
            elif node.op == "Const":
                tensor_value = node.attr["value"].tensor
                self._input_shapes[
                    node.name] = tensor_util.TensorShapeProtoToList(
                        tensor_value.tensor_shape)
                self._output_shapes[node.name] = [
                    self._input_shapes[node.name]
                ]
                if shape and node.name in shape:
                    warnings.warn("Ignore the passed shape. Shape in graphdef "
                                  "will be used for operator %s." % node.name)
                for key, value in node.attr.items():
                    self._parse_param(key, value, node.name, self._in_shape)
            elif node.op in _control_flow_nodes:
                # We assume that the direct parent node of Exit is a while loop block
                if node.op == "Exit":
                    self._while_loop_name_set.add(node_name_prefix)
                control_flow_nodes.append(node)
            elif node.op.startswith("TensorArray"):
                if is_tensor_array_constuctor(node):
                    ta_construct_nodes.append(node)
                else:
                    for ta_write_name, idx in _tensor_array_write_ops.items():
                        if node.op.startswith(ta_write_name):
                            ta_write_nodes.append((node, idx))
                            break
                    if node.op.startswith("TensorArrayGather"):
                        ta_gather_nodes.append(node)

        # Use tensor array gather to infer static tensor array shape
        for gather_node in ta_gather_nodes:
            input_ta_name = gather_node.input[0]
            input_ta_node = self._tf_node_map[input_ta_name]
            if is_tensor_array_constuctor(input_ta_node):
                gather_attr = self._parse_attr(gather_node.attr)
                if "element_shape" not in gather_attr:
                    continue
                raw_elem_shape = tensor_util.TensorShapeProtoToList(
                    gather_attr["element_shape"])
                elem_shape = []
                for dim in raw_elem_shape:
                    if dim < 0:
                        elem_shape.append(Any())
                    else:
                        elem_shape.append(int(dim))
                self._tensor_array_shapes[input_ta_node.name] = elem_shape

        # Fetch node contains static tensor array shape
        for item in ta_write_nodes:
            wnode = item[0]
            ta_idx, inode_idx = item[1]

            stack = [self._tf_node_map[wnode.input[ta_idx].split(":")[0]]]
            while stack:
                cnode = stack.pop(0)
                if not cnode.op.startswith("TensorArray"):
                    for iname in cnode.input:
                        stack.append(self._tf_node_map[iname.split(":")[0]])
                elif cnode.name != wnode.name:
                    if is_tensor_array_constuctor(cnode):
                        inode = self._tf_node_map[wnode.input[inode_idx].split(
                            ":")[0]]
                        tn = wnode.input[inode_idx].split(":")
                        output_index = int(tn[1]) if len(tn) > 1 else 0
                        self._tensor_array_shape_nodes[cnode.name] = (
                            inode, wnode.op, output_index)
                    break

        # First, parse all control flow nodes.
        # Convert tf.cond to Branch and tf.while_loop to Loop.
        sorted_cf_nodes = []
        exit_pos_map = {}
        ordered_prefix = []
        # Sort control flow nodes to move all Exit nodes to the end
        # of corresponding while_loop block.
        for node in control_flow_nodes:
            loop_name = find_parent_loop_name(node.name,
                                              self._while_loop_name_set)
            if node.op == "Exit":
                if loop_name not in exit_pos_map:
                    ordered_prefix.append(loop_name)
                    exit_pos_map[loop_name] = len(sorted_cf_nodes)
                sorted_cf_nodes.append(node)
            elif loop_name in self._while_loop_name_set:
                if loop_name not in exit_pos_map:
                    sorted_cf_nodes.append(node)
                else:
                    sorted_cf_nodes.insert(exit_pos_map[loop_name], node)
                    for j in range(ordered_prefix.index(loop_name),
                                   len(ordered_prefix)):
                        exit_pos_map[ordered_prefix[j]] += 1
            else:
                sorted_cf_nodes.append(node)

        for node in sorted_cf_nodes:
            self._sorted_cf_node_names.append(node.name)

        for node in sorted_cf_nodes:
            self._backtrack_construct(node.name)

        # Second, parse other nodes to re-create TF graph using Relay operators.
        for node in graph.node:
            self._backtrack_construct(node.name)

        out = []
        if outputs is None:
            last_node = graph.node[-1]
            op = self._nodes[last_node.name.split(":")[0]]
            if last_node.op == "Exit":
                out = [op[0].tuple_value]
            else:
                out = op
        else:
            for out_name in outputs:
                if ":" in out_name:
                    out_name, out_num = out_name.split(":")
                    out_num = int(out_num)
                    out.append(self._nodes[out_name][out_num])
                else:
                    out.append(self._nodes[out_name][0])

        if isinstance(out, _expr.TupleWrapper):
            out = out.tuple_value
        else:
            out = out[0] if len(out) == 1 else _expr.Tuple(out)
        fvars = analysis.free_vars(out)
        func = _function.Function(fvars, out)
        final_params = {}
        for fv in fvars:
            if fv.name_hint in self._params:
                final_params[fv.name_hint] = self._params[fv.name_hint]
        self._params = final_params
        return func
Exemple #15
0
    def _backtrack_construct(self, node_name):
        """Convert a specific tensorflow node to relay expression.

        If any of its ancestor node is not converted yet, backtrack as
        far as input node and covert all nodes on the path.

        This is required when parsing control flow nodes, since the parsing
        order may not follow the original graph def.

        Parameters
        ----------
        node_name : str
            TensorFlow node name.

        Returns
        -------
        op : relay.Expr
            Converted relay expression
        """
        try:
            from tensorflow.python.framework import tensor_util
        except ImportError as e:
            raise ImportError(
                "Unable to import tensorflow which is required {}".format(e))

        input_op_name = node_name.split(":")[0].split("^")[-1]
        if input_op_name not in self._nodes:
            node = self._tf_node_map[input_op_name]
            attr = self._parse_attr(node.attr)

            if node.op in _control_flow_nodes:
                attr = self._parse_attr(node.attr)
                op = self._convert_control_flow_operator(
                    node, [], attr, self._control_flow_node_map)
            else:
                attr["_output_shapes"] = self._output_shapes[input_op_name]
                attr["_node_name"] = node.name
                attr["_target_layout"] = self._layout

                inputs = [
                    self._backtrack_construct(iname) for iname in node.input
                ]

                plname = find_parent_loop_name(node_name,
                                               self._while_loop_name_set)

                # For TensorArrayV3 op, we need to infer shape first
                if is_tensor_array_constuctor(node):
                    raw_elem_shape = tensor_util.TensorShapeProtoToList(
                        attr["element_shape"])
                    elem_shape = []
                    for dim in raw_elem_shape:
                        if dim < 0:
                            elem_shape.append(Any())
                        else:
                            elem_shape.append(dim)

                    if elem_shape:
                        attr["shape"] = elem_shape
                    if attr["identical_element_shapes"] or elem_shape:
                        shape_node, wnode_op, output_index = self._tensor_array_shape_nodes[
                            node.name]
                        name = shape_node.name
                        if output_index > 0:
                            name += ":" + str(output_index)
                        converted = self._backtrack_construct(name)
                        shape = _infer_shape(converted, self._mod)
                        if wnode_op.startswith("TensorArraySplit"):
                            shape = (Any(), ) + shape[1:]
                        elif wnode_op.startswith("TensorArrayScatter"):
                            shape = shape[1:]

                        if node.name in self._tensor_array_shapes:
                            preset_shape = self._tensor_array_shapes[node.name]
                            shape = _get_more_static_shape(shape, preset_shape)

                        if "shape" in attr:
                            attr["shape"] = _get_more_static_shape(
                                shape, attr["shape"])
                        else:
                            attr["shape"] = shape

                # LICM
                if plname in self._while_loop_name_set:
                    for i, iname in enumerate(node.input):
                        actual_input = self._licm_construct(plname, iname)
                        inputs[i] = actual_input

                op = self._convert_operator(node.op, node.name, inputs, attr)
            if isinstance(op, np.ndarray):
                self._params[node.name] = tvm.nd.array(op)
                op = [
                    _expr.var(
                        node.name,
                        shape=self._params[node.name].shape,
                        dtype=self._params[node.name].dtype,
                    )
                ]

            elif isinstance(op, (_expr.Expr, _expr.TupleGetItem)):
                op = [op]

            self._nodes[input_op_name] = op

        out = self._nodes[input_op_name]

        if isinstance(out, _expr.TupleWrapper):
            tn = node_name.split(":")
            tensor_slot = int(tn[1]) if len(tn) > 1 else 0
            return out[tensor_slot]
        return out[0]
Exemple #16
0
    def do_transformation(self):
        g = GraphAnalyzer()
        g.graph = self.model
        graph_info = g.parse_graph()

        for i in self.rnn_details.keys():  # pragma: no cover
            start_node_name = graph_info[i[0]].node.input[0]
            min_str = i[0] + '_eightbit_min_' + \
                start_node_name + '__print__;__min:'
            input_min_values = []
            input_max_values = []
            output_min_values = []
            output_max_values = []
            max_str = i[0] + '_eightbit_max_' + \
                start_node_name + '__print__;__max:'
            output_str = i[0] + \
                '_eightbit_requant_range__print__;__requant_min_max:'
            for j in self.calibration_data:
                if j.find(min_str) != -1:
                    input_min_values.append(
                        float(j.split('[')[-1].split(']')[0]))
                if j.find(max_str) != -1:
                    input_max_values.append(
                        float(j.split('[')[-1].split(']')[0]))

                if j.find(output_str) != -1:
                    output_min_values.append(
                        float(j.split(':')[-1][1:].split(']')[0]))
                    output_max_values.append(float(j.split('][')[-1][:-1]))
            min_input = min(input_min_values)
            max_input = max(input_max_values)
            min_output = min(output_min_values)
            max_output = max(output_max_values)
            q_max_in_node = Helper.create_constant_node(
                i[0] + '_quant_max', max_input, dtypes.float32)

            q_min_in_node = Helper.create_constant_node(
                i[0] + '_quant_min', min_input, dtypes.float32)
            q_enter_min_node = Helper.create_node(
                'Enter', q_min_in_node.name + '_enter', [q_min_in_node.name])
            Helper.set_attr_string(q_enter_min_node, 'frame_name',
                                   self.rnn_details[i].encode())
            Helper.set_attr_dtype(q_enter_min_node, 'T', dtypes.float32)
            Helper.set_attr_bool(q_enter_min_node, 'is_constant', True)
            Helper.set_attr_int(q_enter_min_node, 'parallel_iterations', 32)
            q_enter_max_node = Helper.create_node(
                'Enter', q_max_in_node.name + '_enter', [q_max_in_node.name])
            Helper.set_attr_dtype(q_enter_max_node, 'T', dtypes.float32)
            Helper.set_attr_string(q_enter_max_node, 'frame_name',
                                   self.rnn_details[i].encode())
            Helper.set_attr_bool(q_enter_max_node, 'is_constant', True)
            Helper.set_attr_int(q_enter_max_node, 'parallel_iterations', 32)

            split_node_name = graph_info[i[0]].node.input[1]
            enter_node_name = graph_info[Helper.node_name_from_input(
                split_node_name)].node.input[1]
            weight_node_name = graph_info[Helper.node_name_from_input(
                enter_node_name)].node.input[0]
            weight_node = graph_info[Helper.node_name_from_input(
                weight_node_name)].node
            if weight_node.attr['dtype'].type == dtypes.qint8:
                qint8_const_name = weight_node_name
            else:
                base_name = weight_node_name + "_"
                qint8_const_name = base_name + "qint8_const"
                min_name = base_name + "min"
                max_name = base_name + "max"

            need_to_create_const_node = bool(
                qint8_const_name not in graph_info)
            if need_to_create_const_node:
                float_tensor = tensor_util.MakeNdarray(
                    weight_node.attr["value"].tensor)

                min_value = np.min(float_tensor.flatten())
                max_value = np.max(float_tensor.flatten())
                # Same processing of min-max as in quantize_weight_eightbit
                # function.
                if min_value > 0.0:
                    min_value = 0.0
                if min_value == max_value:
                    if abs(min_value) < 0.000001:
                        max_value = min_value + 1.0
                    elif min_value > 0:
                        max_value = 2 * min_value
                    else:
                        max_value = min_value / 2.0

                sess = tf.compat.v1.Session()
                with sess.as_default():
                    quantize_op = array_ops.quantize_v2(
                        float_tensor,
                        min_value,
                        max_value,
                        dtypes.qint8,
                        mode='SCALED',
                        round_mode="HALF_TO_EVEN")
                    qint8_tensor = quantize_op[0].numpy(
                    ) if tf.executing_eagerly() else quantize_op[0].eval()
                    # Updated min-max values should be passed to the next
                    # feeding node.
                    min_value = quantize_op[1].numpy() if tf.executing_eagerly(
                    ) else quantize_op[1].eval()
                    max_value = quantize_op[2].numpy() if tf.executing_eagerly(
                    ) else quantize_op[2].eval()
                sess.close()

                shape = tensor_util.TensorShapeProtoToList(
                    weight_node.attr["value"].tensor.tensor_shape)
                qint8_const_node = Helper.create_constant_node(
                    qint8_const_name, qint8_tensor, dtypes.qint8, shape=shape)

                min_node = Helper.create_constant_node(min_name, min_value,
                                                       dtypes.float32)

                max_node = Helper.create_constant_node(max_name, max_value,
                                                       dtypes.float32)
                enter_min_node = Helper.create_node('Enter',
                                                    min_name + '_enter',
                                                    [min_name])
                Helper.set_attr_string(enter_min_node, 'frame_name',
                                       self.rnn_details[i].encode())
                Helper.set_attr_dtype(enter_min_node, 'T', dtypes.float32)
                Helper.set_attr_bool(enter_min_node, 'is_constant', True)
                Helper.set_attr_int(enter_min_node, 'parallel_iterations', 32)
                enter_max_node = Helper.create_node('Enter',
                                                    max_name + '_enter',
                                                    [max_name])
                Helper.set_attr_dtype(enter_max_node, 'T', dtypes.float32)
                Helper.set_attr_string(enter_max_node, 'frame_name',
                                       self.rnn_details[i].encode())
                Helper.set_attr_bool(enter_max_node, 'is_constant', True)
                Helper.set_attr_int(enter_max_node, 'parallel_iterations', 32)
            else:
                qint8_const_node = graph_info[qint8_const_name].node
                min_node = graph_info[min_name].node
                max_node = graph_info[max_name].node
            quant_input = [
                start_node_name, q_enter_min_node.name, q_enter_max_node.name
            ]
            quantize_node = Helper.create_node('QuantizeV2',
                                               i[0] + '_quantize', quant_input)
            Helper.set_attr_dtype(quantize_node, "T", dtypes.quint8)
            Helper.set_attr_string(quantize_node, "mode", b"MIN_FIRST")
            g.add_node(quantize_node, start_node_name, [i[0]])
            g.add_node(q_enter_max_node, None, [quantize_node.name])
            g.add_node(q_enter_min_node, None, [quantize_node.name])
            g.add_node(q_max_in_node, None, [q_enter_max_node.name])
            g.add_node(q_min_in_node, None, [q_enter_min_node.name])

            bias_node = graph_info[graph_info[i[0]].outputs[0]].node
            if graph_info[bias_node.name].outputs:
                last_node_name = [
                    graph_info[graph_info[bias_node.name].outputs[0]].node.name
                ]
            else:
                last_node_name = []
            quantized_matmul_input = [
                quantize_node.name,
                Helper.node_name_from_input(graph_info[i[0]].node.input[1]),
                bias_node.input[1]
            ]
            quantized_matmul_input.append(quantize_node.name + ':1')
            quantized_matmul_input.append(quantize_node.name + ':2')

            quantized_matmul_input.append(enter_min_node.name)
            quantized_matmul_input.append(enter_max_node.name)
            quantized_matmul_with_bias_node = Helper.create_node(
                'QuantizedMatMulWithBias', i[0] + '_quantized_mat_mul',
                quantized_matmul_input)
            Helper.set_attr_dtype(quantized_matmul_with_bias_node, 'T1',
                                  dtypes.quint8)
            Helper.set_attr_dtype(quantized_matmul_with_bias_node, 'T2',
                                  dtypes.qint8)
            Helper.set_attr_dtype(quantized_matmul_with_bias_node, 'Tbias',
                                  dtypes.float32)
            Helper.set_attr_dtype(quantized_matmul_with_bias_node, 'Toutput',
                                  dtypes.qint32)
            Helper.set_attr_bool(quantized_matmul_with_bias_node,
                                 'transpose_a', False)
            Helper.set_attr_bool(quantized_matmul_with_bias_node,
                                 'transpose_b', False)
            Helper.set_attr_string(quantized_matmul_with_bias_node,
                                   'input_quant_mode', b"MIN_FIRST")
            g.add_node(quantized_matmul_with_bias_node, quantize_node.name,
                       [bias_node.name])

            if qint8_const_node.name not in graph_info:
                g.add_node(qint8_const_node, None, [enter_node_name])
                enter_node = graph_info[enter_node_name].node
                split_node = graph_info[Helper.node_name_from_input(
                    split_node_name)].node
                Helper.set_attr_dtype(enter_node, 'T', dtypes.qint8)
                Helper.set_attr_dtype(split_node, 'T', dtypes.qint8)
                graph_info[
                    enter_node.name].node.input[0] = qint8_const_node.name
            elif qint8_const_node.name in graph_info:
                pass
            else:
                g.add_node(qint8_const_node, None,
                           [quantized_matmul_with_bias_node.name])

            if need_to_create_const_node:
                g.add_node(enter_min_node, None,
                           [quantized_matmul_with_bias_node.name])
                g.add_node(enter_max_node, None,
                           [quantized_matmul_with_bias_node.name])
                g.add_node(min_node, None, [enter_min_node.name])
                g.add_node(max_node, None, [enter_max_node.name])

            # create requantize node
            requantize_min_node = Helper.create_constant_node(
                i[0] + 'requant_w_min', min_output, dtypes.float32)
            requantize_max_node = Helper.create_constant_node(
                i[0] + 'requant_w_max', max_output, dtypes.float32)

            enter_req_min_node = Helper.create_node(
                'Enter', requantize_min_node.name + '_enter',
                [requantize_min_node.name])
            Helper.set_attr_string(enter_req_min_node, 'frame_name',
                                   self.rnn_details[i].encode())
            Helper.set_attr_dtype(enter_req_min_node, 'T', dtypes.float32)
            Helper.set_attr_bool(enter_req_min_node, 'is_constant', True)
            Helper.set_attr_int(enter_req_min_node, 'parallel_iterations', 32)

            enter_req_max_node = Helper.create_node(
                'Enter', requantize_max_node.name + '_enter',
                [requantize_max_node.name])
            Helper.set_attr_dtype(enter_req_max_node, 'T', dtypes.float32)
            Helper.set_attr_string(enter_req_max_node, 'frame_name',
                                   self.rnn_details[i].encode())
            Helper.set_attr_bool(enter_req_max_node, 'is_constant', True)
            Helper.set_attr_int(enter_req_max_node, 'parallel_iterations', 32)
            requantize_input = [
                quantized_matmul_with_bias_node.name,
                quantized_matmul_with_bias_node.name + ':1',
                quantized_matmul_with_bias_node.name + ':2',
                enter_req_min_node.name, enter_req_max_node.name
            ]
            requantize_node = Helper.create_node('Requantize',
                                                 i[0] + '_requantize',
                                                 requantize_input)
            Helper.set_attr_dtype(requantize_node, 'out_type', dtypes.qint8)
            Helper.set_attr_dtype(requantize_node, 'Tinput', dtypes.qint32)

            g.add_node(requantize_node, quantized_matmul_with_bias_node.name,
                       [bias_node.name])
            dequantize_input = [
                requantize_node.name, requantize_node.name + ':1',
                requantize_node.name + ':2'
            ]
            dequantize_node = Helper.create_node('Dequantize',
                                                 i[0] + '_dequantize',
                                                 dequantize_input)
            Helper.set_attr_dtype(dequantize_node, "T", dtypes.qint8)
            Helper.set_attr_dtype(dequantize_node, "dtype", dtypes.float32)
            Helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST")

            g.add_node(enter_req_min_node, None, [requantize_node.name])
            g.add_node(enter_req_max_node, None, [requantize_node.name])
            g.add_node(requantize_min_node, None, [enter_req_min_node.name])
            g.add_node(requantize_max_node, None, [enter_req_max_node.name])
            g.add_node(dequantize_node, requantize_node.name, last_node_name)
            if last_node_name:
                graph_info[
                    last_node_name[0]].node.input[0] = dequantize_node.name
            g.remove_node(bias_node.name)
            g.remove_node(i[0])

            # g.remove_node(weight_node_name)

        return g.dump_graph()
Exemple #17
0
    def _backtrack_construct(self, graph, node_name):
        """Convert a specific tensorflow node to relay expression.

        If any of its ancestor node is not converted yet, backtrack as
        far as input node and covert all nodes on the path. resurion is used here.

        This is required when parsing control flow nodes, since the parsing
        order may not follow the original graph def.

        to discover input node, current tf node's input is iterated:

        tensorflow/core/framework/node_def.proto
            message NodeDef {
                repeated string input = 3;
            }

        a node has many inputs (other nodes). each input has the following format:
            data input is "node:src_output".  node is the string name.
            control input is "^node".

        Parameters
        ----------
        graph : <class 'tensorflow.core.framework.graph_pb2.GraphDef'>
            TF2 frozen graph def

        node_name : str
            node name

        Returns
        -------
        op : relay.Expr
            Converted relay expression.

        Examples
        --------
        tf expression "x+1" is converted to relay expression:
            CallNode(Op(add), [Var(x, ty=TensorType([], float32)), Constant(1.0)], (nullptr), [])

        """

        input_op_name = node_name.split(":")[0].split("^")[-1]
        if input_op_name not in self._nodes:
            node = self._tf_node_map[input_op_name]
            attr = parse_attr(node.attr)
            if "_output_shapes" in attr:
                self._output_shapes[node.name] = [
                    tensor_util.TensorShapeProtoToList(tshape) for tshape in attr["_output_shapes"]
                ]
            else:
                self._output_shapes[node.name] = [None]

            attr["_output_shapes"] = self._output_shapes[input_op_name]
            attr["_node_name"] = node.name
            attr["_target_layout"] = self._layout
            inputs = [self._backtrack_construct(graph, iname) for iname in node.input]
            op = self._convert_operator(graph, node.op, node.name, inputs, attr)

            if isinstance(op, np.ndarray):
                self._params[node.name] = tvm.nd.array(op)
                op = [
                    _expr.var(
                        node.name,
                        shape=self._params[node.name].shape,
                        dtype=self._params[node.name].dtype,
                    )
                ]
            elif isinstance(op, (_expr.Expr, _expr.TupleGetItem)):
                op = [op]
            self._nodes[input_op_name] = op

        out = self._nodes[input_op_name]
        if isinstance(out, _expr.TupleWrapper):
            tn = node_name.split(":")
            tensor_slot = int(tn[1]) if len(tn) > 1 else 0
            return out[tensor_slot]

        return out[0]
Exemple #18
0
    def from_tensorflow(self, graph):
        """Construct nnvm nodes from tensor flow  graph definition - GraphDef.

        Follow the tensorflow graph definition to parse and convert it to NNVM.
        Some of the assumptions listed below.

            -> First Const node will be comsidered as graph input.
            -> Rest all Const nodes are params.
            -> Last node is assumed as graph output.
            -> _output_shapes : Attribute should present in the tenserflow forzen graph.
            -> DecodeJpeg, ResizeBilinear: These are dymmy operators.
                                           Hence user should handle preprocessing outside.
            -> CheckNumerics: No implementation as of now for this.
                              Just copies input to output.


        Parameters
        ----------
        graph : tensorflow graph definition object
            The loaded tensorflow GraphDef

        Returns
        -------
        sym : nnvm.sym.Symbol
            The returned nnvm symbol
        params : dict
            A dict of name: tvm.nd.array pairs, used as pretrained weights
        """
        # Parse throught all nodes and start extracting
        # params aka Const nodes
        # input nodes  : First const node
        # normal nodes : other normal nodes

        try:
            from tensorflow.python.framework import tensor_util
        except ImportError as e:
            raise ImportError(
                "Unable to import tensorflow which is required {}".format(e))

        for node in graph.node:
            # Tensor flow doesn't have seperate list for params extraction.
            # Operator name 'Const' is treated as a parameter to build NNVM params dict.
            attr = self._parse_attr(node.attr)
            input_shapes = {}
            if node.op == "Placeholder":
                # Assuming only one input graph with type 'Placeholder'
                self._input_node = node.name
                self._num_input += 1
                self._nodes[node.name] = _sym.Variable(name=node.name)
                self._output_shapes[node.name] = \
                     [tensor_util.TensorShapeProtoToList(shape) for shape in attr['_output_shapes']]
                input_shapes[self._nodes[node.name]] = self._output_shapes[
                    node.name]
                attr['_input_shapes'] = input_shapes
            elif node.op == "Const":
                # Assuming first Const node as Graph Input node
                if self._input_node == '':
                    self._input_node = node.name
                    self._num_input += 1
                    self._nodes[node.name] = _sym.Variable(name=node.name)
                else:
                    # Rest all nodes are Param nodes, lets parse
                    self._num_param += 1
                    for key, value in node.attr.items():
                        self._parse_param(key, value, node.name)
                    if node.name not in self._nodes:
                        raise NotImplementedError( \
                            "Const {} couldn't be converted to Param.".format(node.name))
            else:
                self._output_shapes[node.name] = \
                     [tensor_util.TensorShapeProtoToList(shape) for shape in attr['_output_shapes']]

                try:
                    """ToDo: Some of the tensorflow operators internaly maintain layers and
                    its output name will the layer number along with graph node name.
                    eg: Node name:- 'Model/RNN/cell_0/RnnCell', but the output name
                    will be 'Model/RNN/cell_0/RnnCell:0'
                    In thin case, the digit has to be ignored
                    """
                    if ":" in node.input[0]:
                        in_name, _ = node.input[0].split(':')
                        node.input[0] = in_name

                    inputs = [self._nodes[i] for i in node.input]
                    for i in node.input:
                        if i not in self._params:
                            input_shapes[
                                self._nodes[i]] = self._output_shapes[i]
                    attr['_input_shapes'] = input_shapes
                except KeyError:
                    # TODO: Need to find clean way to handle '^CheckNumerics'
                    print("Some Exception while inputs list:", node.input,
                          " ignoring...")

                inputs = self._fix_extranodes(node.op, attr, inputs)

                op = self._convert_operator(node.op, inputs, attr, graph)
                # Assuming only one output.
                self._nodes[node.name] = op
                node_output = op
        # Assume the final node is the output node
        out = node_output
        #Set the RNN states as another output if exists
        if (self._num_rnn_layer > 0):
            out_states = _sym.concatenate(*self._out_states, axis=0)
            out = [out, out_states]

        if isinstance(out, list):
            out = _sym.Group(out)
        return out, self._params
Exemple #19
0
def _TemporaryVariableShape(op):
    """Shape function for the TemporaryVariable op."""
    shape = tensor_util.TensorShapeProtoToList(op.get_attr("shape"))
    return [tensor_shape.TensorShape(shape)]
    def _intel_cpu_quantize_weight_eightbit(self,
                                            parent,
                                            input_node,
                                            per_channel,
                                            quantization_mode=b"SCALED"):
        base_name = input_node.name + "_"
        qint8_const_name = base_name + "qint8_const"
        min_name = base_name + "min"
        max_name = base_name + "max"
        float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
        epsilon = 1e-4  # Needs to be set empirically if accuracy is not satisfactory
        range_coefficent = 127 / (2**self.weight_bit - 1)
        if parent in ("Conv2D", "MatMul"):
            if per_channel:
                ranges = np.abs(float_tensor).max(axis=(0, 1, 2))
                ranges *= range_coefficent
                min_value = -ranges
                max_value = ranges
                # nudging min-max values outside epsilon radius around zero
                ranges[ranges < epsilon] = epsilon
                min_value[np.abs(min_value) < epsilon] = -epsilon
                max_value[np.abs(max_value) < epsilon] = epsilon
                qint8_tensor = (float_tensor * 127.0 / ranges).astype(np.int8)
            else:
                min_value = np.min(float_tensor.flatten())
                max_value = np.max(float_tensor.flatten())
                min_value *= range_coefficent
                max_value *= range_coefficent
                # Same processing of min-max as in quantize_weight_eightbit
                # function.
                if min_value > 0.0:
                    min_value = 0.0
                if min_value == max_value:
                    if abs(min_value) < 0.000001:
                        max_value = min_value + 1.0
                    elif min_value > 0:
                        max_value = 2 * min_value
                    else:
                        max_value = min_value / 2.0

                sess = tf.compat.v1.Session()
                with sess.as_default():
                    quantize_op = array_ops.quantize_v2(
                        float_tensor,
                        min_value,
                        max_value,
                        dtypes.qint8,
                        mode=quantization_mode,
                        round_mode="HALF_TO_EVEN")
                    qint8_tensor = quantize_op[0].numpy(
                    ) if tf.executing_eagerly() else quantize_op[0].eval()
                    # Updated min-max values should be passed to the next
                    # feeding node.
                    min_value = quantize_op[1].numpy() if tf.executing_eagerly(
                    ) else quantize_op[1].eval()
                    max_value = quantize_op[2].numpy() if tf.executing_eagerly(
                    ) else quantize_op[2].eval()
                sess.close()
        elif parent == "DepthwiseConv2dNative":
            # get the max values based on dim 0 and 1 for depthwise conv
            # since, the output channel will be dim 2 * dim 3
            ranges = np.abs(float_tensor).max(axis=(0, 1))
            ranges = ranges.flatten()
            min_value = -ranges
            max_value = ranges
            # nudging min-max values outside epsilon radius around zero
            ranges[ranges < epsilon] = epsilon
            min_value[np.abs(min_value) < epsilon] = -epsilon
            max_value[np.abs(max_value) < epsilon] = epsilon
            # Since output channel will be 1 dim which is dim 2 * dim 3
            # When divide by range, qint8_tensor needs to be 3 dim
            # where, 3rd dim should be same dim of ranges
            a, b, c, d = float_tensor.shape
            qint8_tensor = (float_tensor.reshape(a, b, c * d) * 127.0 /
                            ranges).astype(np.int8)
            # get the shape back to 4 dim
            qint8_tensor = qint8_tensor.reshape(a, b, c, d)
        shape = tensor_util.TensorShapeProtoToList(
            input_node.attr["value"].tensor.tensor_shape)
        qint8_const_node = helper.create_constant_node(qint8_const_name,
                                                       qint8_tensor,
                                                       dtypes.qint8,
                                                       shape=shape)

        min_node = helper.create_constant_node(min_name,
                                               min_value,
                                               dtypes.float32,
                                               device=self.device)

        max_node = helper.create_constant_node(max_name,
                                               max_value,
                                               dtypes.float32,
                                               device=self.device)

        self.add_output_graph_node(qint8_const_node)
        self.add_output_graph_node(min_node)
        self.add_output_graph_node(max_node)

        return qint8_const_node.name, min_node.name, max_node.name
Exemple #21
0
    def from_tensorflow(self, graph):
        """Construct nnvm nodes from tensorflow  graph definition - GraphDef.

        Follow the tensorflow graph definition to parse and convert it to NNVM.
        Some of the assumptions listed below.

            -> First Const or Placeholder node will be considered as graph input.
            -> Rest all Const nodes are params.
            -> Last node is assumed as graph output.
            -> _output_shapes : Attribute should present in the tenserflow forzen graph.
            -> DecodeJpeg, ResizeBilinear: These are dummy operators.
                                           Hence user should handle preprocessing outside.
            -> CheckNumerics: No implementation as of now for this.
                              Just copies input to output.


        Parameters
        ----------
        graph : tensorflow graph definition object
            The loaded tensorflow GraphDef

        Returns
        -------
        sym : nnvm.sym.Symbol
            The returned nnvm symbol
        params : dict
            A dict of name: tvm.nd.array pairs, used as pretrained weights
        """
        # Parse throught all nodes and start extracting
        # params aka Const nodes
        # input nodes  : First const node
        # normal nodes : other normal nodes

        try:
            from tensorflow.python.framework import tensor_util
        except ImportError as e:
            raise ImportError(
                "Unable to import tensorflow which is required {}".format(e))

        for node in graph.node:
            # Tensorflow doesn't have seperate list for params extraction.
            # Operator name 'Const' is treated as a parameter to build NNVM params dict.
            if node.op == "Placeholder":
                # Assuming only one input graph with type 'Placeholder'
                self._input_node = node.name
                self._num_input += 1
                self._nodes[node.name] = _sym.Variable(name=node.name)

                try:
                    self._output_shapes[node.name] = \
                         [tensor_util.TensorShapeProtoToList(shape) \
                         for shape in self._parse_attr(node.attr)['_output_shapes']]
                except KeyError:
                    raise NotImplementedError( \
                        "Please freeze the graph with add_shapes=True")
            elif node.op == "Const":
                # Assuming first Const node as Graph Input node
                if self._input_node == '':
                    self._input_node = node.name
                    self._num_input += 1
                    self._nodes[node.name] = _sym.Variable(name=node.name)
                else:
                    # Rest all nodes are Param nodes, lets parse
                    self._num_param += 1
                    for key, value in node.attr.items():
                        self._parse_param(key, value, node.name)
                    if node.name not in self._nodes:
                        raise NotImplementedError( \
                            "Const {} couldn't be converted to Param.".format(node.name))

                try:
                    self._output_shapes[node.name] = \
                         [tensor_util.TensorShapeProtoToList(shape) \
                         for shape in self._parse_attr(node.attr)['_output_shapes']]
                except KeyError:
                    raise NotImplementedError( \
                        "Please freeze the graph with add_shapes=True")
            else:
                attr = self._parse_attr(node.attr)
                try:
                    self._output_shapes[node.name] = \
                         [tensor_util.TensorShapeProtoToList(shape) \
                          for shape in attr['_output_shapes']]
                except KeyError:
                    raise NotImplementedError( \
                        "Please freeze the graph with add_shapes=True")

                # Pass the parsed shapes instead
                attr["_output_shapes"] = self._output_shapes[node.name]

                # Pass the node name too in attr
                attr["_node_name"] = node.name

                try:
                    inputs = [self._nodes[i] for i in node.input]
                    input_shapes = {}
                    for i in node.input:
                        if i not in self._params:
                            input_shapes[self._nodes[i]] = self._output_shapes[i]
                    attr['_input_shapes'] = input_shapes
                except KeyError:
                    # TODO: Need to find clean way to handle '^CheckNumerics'
                    print("Some Exception while inputs list:", node.input, " ignoring...")

                inputs = self._fix_extranodes(node.op, attr, inputs)

                op = self._convert_operator(node.op, inputs, attr)
                # Assuming only one output.
                self._nodes[node.name] = op
                node_output = op
        # Assume the final node is the output node
        out = node_output
        return out, self._params