def version_13(cls, node, **kwargs): tensor_dict = kwargs["tensor_dict"] x = tensor_dict[node.inputs[0]] x = tf.cast(x, tf.float32) x_scale = tensor_dict[node.inputs[1]] axis = node.attrs.get("axis", 1) x_shape = tf_shape(x) x_rank = len(x_shape) x_scale_shape = tf_shape(x_scale) x_scale_rank = len(x_scale_shape) # Reshape process is needed for per-axis dequantization # when scale is a 1-D tensor if x_scale_rank == 1: shape_broadcast = list([1 for _ in range(axis)] + [x_shape[axis]] + [1 for _ in range(axis + 1, x_rank)]) x_scale = tf.reshape(x_scale, shape_broadcast) if len(node.inputs) == 3 and x.dtype != tf.int32: x_zero_point = tensor_dict[node.inputs[2]] x_zero_point = tf.cast(x_zero_point, tf.float32) x_zero_point = tf.reshape( x_zero_point, shape_broadcast) if x_scale_rank == 1 else x_zero_point x = tf.subtract(x, x_zero_point) y = tf.multiply(x, x_scale) return [y]
def version_13(cls, node, **kwargs): tensor_dict = kwargs["tensor_dict"] x = tensor_dict[node.inputs[0]] y_scale = tensor_dict[node.inputs[1]] axis = node.attrs.get("axis", 1) x = tf.cast(x, tf.float32) x_shape = tf_shape(x) x_rank = len(x_shape) y_scale_shape = tf_shape(y_scale) y_scale_rank = len(y_scale_shape) # Reshape process is needed for per-axis quantization # when scale is a 1-D tensor if y_scale_rank == 1: shape_broadcast = list([1 for _ in range(axis)] + [x_shape[axis]] + [1 for _ in range(axis + 1, x_rank)]) y_scale = tf.reshape(y_scale, shape_broadcast) y = tf.divide(x, y_scale) y = tf.round(y) if len(node.inputs) == 3: y_zero_point = tensor_dict[node.inputs[2]] y_dtype = y_zero_point.dtype y_zero_point = tf.cast(y_zero_point, tf.float32) y_zero_point = tf.reshape( y_zero_point, shape_broadcast) if y_scale_rank == 1 else y_zero_point y = tf.add(y, y_zero_point) else: # y_zero_point default dtype = uint8 y_dtype = tf.uint8 y = tf.saturate_cast(y, y_dtype) return [y]
def create_nodes(boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, result): # get number of batches in boxes num_batches = tf_shape(boxes)[0] for batch_i in tf.range(num_batches): # get boxes in batch_i only tf_boxes = tf.squeeze(tf.gather(boxes, [batch_i]), axis=0) # get scores of all classes in batch_i only batch_i_scores = tf.squeeze(tf.gather(scores, [batch_i]), axis=0) # get number of classess in batch_i only num_classes = tf_shape(batch_i_scores)[0] for class_j in tf.range(num_classes): # get scores in class_j for batch_i only tf_scores = tf.squeeze(tf.gather(batch_i_scores, [class_j]), axis=0) # get the selected boxes indices selected_indices = tf.image.non_max_suppression( tf_boxes, tf_scores, max_output_boxes_per_class, iou_threshold, score_threshold) # add batch and class information into the indices output = tf.transpose([tf.cast(selected_indices, dtype=tf.int64)]) paddings = tf.constant([[0, 0], [1, 0]]) output = tf.pad(output, paddings, constant_values=tf.cast(class_j, dtype=tf.int64)) output = tf.pad(output, paddings, constant_values=tf.cast(batch_i, dtype=tf.int64)) # tf.function will auto convert "result" from variable to placeholder # therefore don't need to use assign here result = output if tf.equal(batch_i, 0) and tf.equal( class_j, 0) else tf.concat([result, output], 0) return result
def chk_idx_out_of_bounds(cls, data, indices, batch_dims=0): """ Check indices out of bounds for ScatterND and GatherND In Tensorflow GPU version, if an out of bound index is found, a 0 is stored in the corresponding output value for GatherND; and the index is ignored for ScatterND/TensorScatterNDUpdate. But ONNX spec state that it is an error if any index values are out of bounds. Therefore the converter need to run this function to verify all the indices are in bounds before send it to Tensoflow. If out of bound is detected then the caller of this function need to throw InvalidArgumentError exception. """ data_shape = tf_shape(data) indices_shape = tf_shape(indices) if batch_dims > 0: new_shape = indices_shape[0] for d in range(1, batch_dims): new_shape = tf.multiply(new_shape, indices_shape[d]) new_shape = [new_shape, indices_shape[-1]] indices = tf.reshape(indices, new_shape) def _chk_idx_out_of_bounds(i, result): indices_i = tf.transpose(indices)[i] limit_i = tf.cast(data_shape, indices.dtype)[i + batch_dims] cond1 = tf.greater_equal(indices_i, tf.negative(limit_i)) cond2 = tf.less(indices_i, limit_i) result = tf.reduce_all(tf.logical_and(cond1, cond2)) return i + 1, result _, result = tf.while_loop( lambda i, result: tf.logical_and(tf.less(i, indices_shape[-1]), result), _chk_idx_out_of_bounds, [tf.zeros([], tf.int64), True]) return result
def version_11(cls, node, **kwargs): # split the input first tensor_dict = kwargs["tensor_dict"] dtype = tensor_dict[node.inputs[0]].dtype original_input = tensor_dict[node.inputs[0]] split = tensor_dict[node.inputs[1]] if len(node.inputs) > 1 else None axis = node.attrs.get("axis", 0) keepdims = node.attrs.get("keepdims", 1) input_shape = tf_shape(original_input) if len(node.inputs) > 1: split_shape = tf_shape(split) # check if the split is 1-d or scalar if split_shape.shape[0] == 1: split_sizes = split else: # Need to build the split sizes # First int(size/n) of ns [n, n, n...] # Then append m if needed [n, n, n..., m] where m=size(mod n) # Currently tf.split does not take an unknown shape tensor # for the num_or_size_splits input. Since this parameter # has to be calculated based on ONNX inputs, the shape is # unknown during graph generation time, causing a Tensorflow # exception. # Due to the limitation in tf.split, this option is currently # not supported. # split_sizes = tf.tile([split], tf.reshape(tf.math.floordiv( # tf.cast(input_shape[axis], dtype=tf.int32), split), [1])) raise RuntimeError( "Split to sequence with scalar split is not supported due to API limitations." ) split_inputs = tf.split(original_input, split_sizes, axis=axis) else: # split is not provided, use default 1 split_sizes = tf.tile([1], tf.reshape(input_shape[axis], [1])) split_inputs = tf.split(original_input, split_sizes, axis=axis) if keepdims == 0: split_inputs = [ tf.squeeze(split_input) for split_input in split_inputs ] # create an empty sequence next input_sequence = tf.ragged.constant([], dtype=dtype) # insert tensors at the end of sequence for i in range(len(split_inputs)): input_tensor = tf.expand_dims(split_inputs[i], 0) if input_sequence.shape[0] == 0: output_seq = tf.RaggedTensor.from_tensor(input_tensor) else: output_seq = tf.concat([input_sequence, input_tensor], axis=0) input_sequence = output_seq return [output_seq]
def process_neg_idx(cls, data, indices): """ Convert all the negative indices to positive GatherND and ScatterND/TensorScatterNDUpdate in Tensorflow doesn't support negative indices. Therefore need to run this function to convert all the negative indices to positive before send it to Tensorflow. """ data_shape = tf_shape(data) indices_shape = tf_shape(indices) max_i = tf.cast(data_shape[:indices_shape[-1]], indices.dtype) return tf_floormod(tf.add(indices, max_i), max_i)
def version_10(cls, node, **kwargs): # x, roi and scales are all in NCHW format x = kwargs["tensor_dict"][node.inputs[0]] x_shape = tf_shape(x) x_dtype = x.dtype scales = kwargs["tensor_dict"][node.inputs[1]] # get the new size from scales h_w_scale = scales[2:] h_w_shape = x_shape[2:] new_h_w_shape = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype), tf.int32) mode = node.attrs.get("mode", "nearest") if mode.lower() == "linear": mode = tf.image.ResizeMethod.BILINEAR else: mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR # process tf.image.resize unsupported datatype for x x = tf.cast( x, cls.x_cast_map[x_dtype]) if x_dtype in cls.x_cast_map else x # The input image is in NCHW format. But tf.image.resize only # support channel last data format. Therefore need to transpose # to NHWC format first then process resize and then transpose # back to NCHW format. x_t = tf.transpose(x, perm=[0, 2, 3, 1]) y = tf.image.resize(x_t, size=new_h_w_shape, method=mode) output = tf.transpose(y, perm=[0, 3, 1, 2]) # cast output back to the original x.dtype output = tf.cast(output, x_dtype) if x_dtype is not tf.float32 else output return [output]
def _common(cls, node, **kwargs): tensor_dict = kwargs["tensor_dict"] x = tensor_dict[node.inputs[0]] x_shape = tf_shape(x) attrs = copy.deepcopy(node.attrs) axis = attrs.get("axis", 0) axis = axis if axis >= 0 else len(x.get_shape()) + axis if "split" in node.attrs: split = attrs["split"] elif len(node.inputs) == 2: # since version 1 split = tensor_dict[node.inputs[1]] else: per_part = x_shape[axis] / len(node.outputs) if x.get_shape().is_fully_defined(): if int(per_part) != per_part: raise ValueError("Split can not be evenly divided.") split = [int(per_part)] * len(node.outputs) else: split = [tf.cast(per_part, tf.int32)] * len(node.outputs) #attrs["num_or_size_splits"] = split attrs["num_or_size_splits"] = len(node.outputs) return list( cls.make_tensor_from_onnx_node(node, inputs=[x], attrs=attrs, **kwargs))
def version_9(cls, node, **kwargs): x = kwargs["tensor_dict"][node.inputs[0]] x_shape = tf_shape(x) attrs = copy.deepcopy(node.attrs) scales = kwargs["tensor_dict"][node.inputs[1]] assert_n_c_scale_is_one = tf.Assert( tf.logical_and(tf.equal(scales[0], 1), tf.equal(scales[1], 1)), [scales]) with tf.control_dependencies([assert_n_c_scale_is_one]): h_w_scale = scales[2:] h_w_shape = x_shape[2:] new_h_w_shape = tf.cast( h_w_scale * tf.cast(h_w_shape, scales.dtype), tf.int32) mode = attrs.get("mode", "nearest") if mode.lower() == "bilinear" or mode.lower() == "linear": mode = tf.image.ResizeMethod.BILINEAR else: mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR attrs["size"] = new_h_w_shape attrs["method"] = mode # Remove scale. upsample_node = copy.deepcopy(node) del upsample_node.inputs[1] return [ cls.make_tensor_from_onnx_node(upsample_node, attrs=attrs, c_last_only=True, **kwargs) ]
def __init__(self, input, kernel_shape, strides, dilations, padding="VALID", ceil_mode=False, count_include_pad=False, pooling_type="MAX"): self.input = tf.convert_to_tensor(input) self.kernel_shape = kernel_shape self.strides = strides self.dilations = dilations self.padding = padding self.is_explicit_padding = type(padding) is list self.ceil_mode = ceil_mode self.count_include_pad = count_include_pad self.pooling_type = pooling_type.upper() self.is_known_shape = self.input.shape.is_fully_defined() self.spatial_size = len(kernel_shape) self.input_rank = self.spatial_size + 2 # if the rank is not defined, set it to the calculated input_rank # rank should be known for ops like tf.gather_nd if not input.shape.rank: input.set_shape([None] * self.input_rank) self.orig_input_shape = tf_shape(input) self.input_shape = self.orig_input_shape if pooling_type.startswith("MAX"): self.padding_constant = input.dtype.min else: self.padding_constant = 0
def _pad_input(self): """ Pad the input according to the parameters """ # check if we need to do any padding at all if not self.ceil_mode and ((type(self.padding) is list and self.padding == [0] * self.spatial_size * 2) or self.padding == "VALID"): self.pads = np.array([0] * self.spatial_size * 2) return (self.input, self.pads) in_spatial_shape = self.input_shape[1:self.spatial_size + 1] pads = self._calc_pads(in_spatial_shape) if self.is_known_shape and np.count_nonzero(pads) == 0: self.pads = pads return (self.input, pads) tf_paddings = [[0, 0]] for i in range(self.spatial_size): tf_paddings += [[pads[i * 2], pads[i * 2 + 1]]] tf_paddings += [[0, 0]] self.input = tf.pad( self.input, tf_paddings, mode='CONSTANT', constant_values=self.padding_constant) # update input shape and pads values self.input_shape = tf_shape(self.input) self.pads = pads
def version_10(cls, node, **kwargs): # x, roi and scales are all in NCHW format x = kwargs["tensor_dict"][node.inputs[0]] x_shape = tf_shape(x) scales = kwargs["tensor_dict"][node.inputs[1]] h_w_scale = scales[2:] h_w_shape = x_shape[2:] new_h_w_shape = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype), tf.int32) mode = node.attrs.get("mode", "nearest") if mode.lower() == "linear": mode = tf.image.ResizeMethod.BILINEAR else: mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR # The input image is in NCHW format. But tf.image.resize only # support channel last data format. Therefore need to transpose # to NHWC format first then process resize and then transpose # back to NCHW format. x_t = tf.transpose(x, perm=[0, 2, 3, 1]) y = tf.image.resize(x_t, size=new_h_w_shape, method=mode) output = tf.transpose(y, perm=[0, 3, 1, 2]) return [output]
def version_14(cls, node, **kwargs): x = kwargs["tensor_dict"][node.inputs[0]] if len(node.inputs) >= 2: k = kwargs["tensor_dict"][node.inputs[1]] #handle pos out x_shape = tf_shape(x, dtype=k.dtype) if k > x_shape[-1]: k = x_shape[-1] elif k < 0 - x_shape[-2]: k = 0 - x_shape[-2] else: k = tf.constant(0, dtype=tf.int64) keep_triangle = tf.constant(-1, dtype=k.dtype) upper = node.attrs.get("upper", 1) if upper == 1: if k > 0: return [ tf.subtract(x, tf.linalg.band_part(x, keep_triangle, k - 1)) ] else: return [tf.linalg.band_part(x, -k, keep_triangle)] else: if k >= 0: return [tf.linalg.band_part(x, keep_triangle, k)] else: return [ tf.subtract(x, tf.linalg.band_part(x, -1 - k, keep_triangle)) ]
def max_unpool(cls, node, input_dict): """ MaxUnpooling operation """ x = input_dict[node.inputs[0]] ind = input_dict[node.inputs[1]] if len(node.inputs) > 2: output_shape = input_dict.get(node.inputs[2], None) else: output_shape = None kernel_shape = node.attrs["kernel_shape"] spatial_size = len(kernel_shape) # if strides are not provided default is 1 along each spatial axis strides = node.attrs.get("strides", [1] * spatial_size) pads = node.attrs.get("pads", None) input_shape = tf_shape(x) default_shape = cls._get_default_shape(input_shape, kernel_shape, strides) default_shape = [input_shape[0]] + [input_shape[1]] + default_shape unpooled = cls._unpool(x, ind, default_shape) if output_shape is not None: pads = cls._get_pads_from_output_shape(unpooled, output_shape) if pads is not None: unpooled = cls._pad_output(unpooled, pads, 0) return [unpooled]
def process_neg_idx_along_axis(cls, data, axis, indices): """ Convert all the negative indices to positive ScatterND/TensorScatterNDUpdate in Tensorflow doesn't support negative indices. Therefore need to run this function to convert all the negative indices to positive before send it to Tensorflow. """ data_shape = tf_shape(data) max_i = tf.cast(data_shape[axis], indices.dtype) return tf.math.floormod(tf.add(indices, max_i), max_i)
def process_neg_idx(cls, data, indices, batch_dims=0): """ Convert all the negative indices to positive GatherND and ScatterND/TensorScatterNDUpdate in Tensorflow doesn't support negative indices. Therefore need to run this function to convert all the negative indices to positive before send it to Tensorflow. """ data_shape = tf_shape(data) if data.get_shape().is_fully_defined(): indices_shape = indices.get_shape().as_list() else: indices_shape = tf_shape(indices) if batch_dims > 0: max_i = tf.cast( data_shape[batch_dims:indices_shape[-1] + batch_dims], indices.dtype) else: max_i = tf.cast(data_shape[:indices_shape[-1]], indices.dtype) return tf.math.floormod(tf.add(indices, max_i), max_i)
def version_11(cls, node, **kwargs): axis = node.attrs.get("axis", 0) data = kwargs["tensor_dict"][node.inputs[0]] indices = kwargs["tensor_dict"][node.inputs[1]] updates = kwargs["tensor_dict"][node.inputs[2]] # poocess negative axis axis = axis if axis >= 0 else tf.add(tf.rank(data), axis) # check are there any indices are out of bounds result = cls.chk_idx_out_of_bounds_along_axis(data, axis, indices) msg = 'ScatterElements indices are out of bounds, please double check the indices and retry.' with tf.control_dependencies( [tf.compat.v1.assert_equal(result, True, message=msg)]): # process negative indices indices = cls.process_neg_idx_along_axis(data, axis, indices) # Calculate shape of the tensorflow version of indices tensor. sparsified_dense_idx_shape = tf_shape(updates) # Move on to convert ONNX indices to tensorflow indices in 2 steps: # # Step 1: # What would the index tensors look like if updates are all # dense? In other words, produce a coordinate tensor for updates: # # coordinate[i, j, k ...] = [i, j, k ...] # where the shape of "coordinate" tensor is same as that of updates. # # Step 2: # But the coordinate tensor needs some correction because coord # vector at position axis is wrong (since we assumed update is dense, # but it is not at the axis specified). # So we update coordinate vector tensor elements at psotion=axis with # the sparse coordinate indices. idx_tensors_per_axis = tf.meshgrid(*list( map(lambda x: tf.range(x, dtype=tf.dtypes.int64), sparsified_dense_idx_shape)), indexing='ij') idx_tensors_per_axis[axis] = indices dim_expanded_idx_tensors_per_axis = list( map(lambda x: tf.expand_dims(x, axis=-1), idx_tensors_per_axis)) coordinate = tf.concat(dim_expanded_idx_tensors_per_axis, axis=-1) # Now the coordinate tensor is in the shape # [updates.shape, updates.rank] # we need it to flattened into the shape: # [product(updates.shape), updates.rank] indices = tf.reshape(coordinate, [-1, tf.rank(data)]) updates = tf.reshape(updates, [-1]) return [tf.tensor_scatter_nd_update(data, indices, updates)]
def max_unpool(cls, node, input_dict): """ MaxUnpooling operation """ x = input_dict[node.inputs[0]] ind = input_dict[node.inputs[1]] if len(node.inputs) > 2: output_shape = input_dict.get(node.inputs[2], None) else: output_shape = None kernel_shape = node.attrs["kernel_shape"] spatial_size = len(kernel_shape) x_rank = spatial_size + 2 storage_format, _ = get_data_format(x_rank) # if strides are not provided default is 1 along each spatial axis strides = node.attrs.get("strides", [1] * spatial_size) pads = node.attrs.get("pads", None) input_shape = tf_shape(x) default_shape = cls._get_default_shape(input_shape, kernel_shape, strides) need_trans = storage_format != "NHWC" if need_trans: x = tf.transpose(x, perm=get_perm_from_formats( storage_format, "NHWC")) ind = tf.transpose(ind, perm=get_perm_from_formats( storage_format, "NHWC")) # default_shape to NHWC storage format default_shape = [input_shape[0]] + default_shape + \ [input_shape[1]] unpooled = cls._unpool(x, ind, default_shape) if need_trans: unpooled = tf.transpose(unpooled, perm=get_perm_from_formats( "NHWC", storage_format)) if output_shape is not None: pads = cls._get_pads_from_output_shape(unpooled, output_shape) if pads is not None: unpooled = cls._pad_output(unpooled, pads, 0) return [unpooled]
def _common(cls, node, **kwargs): attrs = copy.deepcopy(node.attrs) tensor_dict = kwargs["tensor_dict"] indices = tensor_dict[node.inputs[0]] depth = tensor_dict[node.inputs[1]] axis = attrs.get("axis", -1) # poocess negative axis axis = axis if axis >= 0 else len(tf_shape(indices)) + axis + 1 # cast indices to tf.int64 and depth to tf.int32 if dtype is not # supported natively by Tensorflow. It is fairly safe since indices # and depth are integers indices = tf.cast(indices, tf.int64) if indices.dtype not in [ tf.uint8, tf.int32, tf.int64 ] else indices depth = tf.cast(depth, tf.int32) if depth.dtype not in [tf.int32 ] else depth # depth can be either a scalar or a 1D tensor of size 1 according # to ONNX schema, although operators doc states only scalar. # So we support both now. depth = tf.squeeze(depth) if len(tf_shape(depth)) == 1 else depth # process negative indices indices = cls.process_neg_indices(depth, indices) off_value = tensor_dict[node.inputs[2]][0] on_value = tensor_dict[node.inputs[2]][1] attrs["dtype"] = on_value.dtype attrs["axis"] = axis return [ cls.make_tensor_from_onnx_node( node, inputs=[indices, depth, on_value, off_value], attrs=attrs, **kwargs) ]
def _common(cls, node, **kwargs): attrs = copy.deepcopy(node.attrs) tensor_dict = kwargs["tensor_dict"] indices = tensor_dict[node.inputs[0]] depth = tensor_dict[node.inputs[1]] axis = attrs.get("axis", -1) # poocess negative axis axis = axis if axis >= 0 else len(tf_shape(indices)) + axis + 1 # process tf.one_hot unsupported datatype for indices indices = tf.cast( indices, cls.indices_cast_map[indices.dtype] ) if indices.dtype in cls.indices_cast_map else indices # process tf.one_hot unsupported datatype for depth depth = tf.cast(depth, cls.depth_cast_map[ depth.dtype]) if depth.dtype in cls.depth_cast_map else depth # depth can be either a scalar or a 1D tensor of size 1 according # to ONNX schema, although operators doc states only scalar. # So we support both now. depth = tf.squeeze(depth) if len(tf_shape(depth)) == 1 else depth # process negative indices indices = cls.process_neg_indices(depth, indices) off_value = tensor_dict[node.inputs[2]][0] on_value = tensor_dict[node.inputs[2]][1] attrs["dtype"] = on_value.dtype attrs["axis"] = axis return [ cls.make_tensor_from_onnx_node( node, inputs=[indices, depth, on_value, off_value], attrs=attrs, **kwargs) ]
def version_10(cls, node, **kwargs): inp = kwargs["tensor_dict"][node.inputs[0]] dtype = inp.dtype shape = tf_shape(inp) zero = tf.zeros(shape, dtype) dn = node.attrs.get("detect_negative", 1) dp = node.attrs.get("detect_positive", 1) # detecting only positive infinity, zero out elements < 0 if dn == 0: inp = tf.maximum(zero, inp) # detecting only negative infinity, zero out elements > 0 if dp == 0: inp = tf.minimum(zero, inp) return [cls.make_tensor_from_onnx_node(node, inputs=[inp], **kwargs)]
def chk_idx_out_of_bounds_along_axis(cls, data, axis, indices): """ Check indices out of bounds for ScatterElement In Tensorflow GPU version, if an out of bound index is found, the index is ignored for ScatterND/TensorScatterNDUpdate. But ONNX spec state that it is an error if any index values are out of bounds. Therefore the converter need to run this function to verify all the indices are in bounds along the axis before send it to Tensoflow. If out of bound is detected then the caller of this function need to throw InvalidArgumentError exception. """ data_shape = tf.cast(tf_shape(data), indices.dtype) limit = data_shape[axis] cond1 = tf.greater_equal(indices, tf.negative(limit)) cond2 = tf.less(indices, limit) return tf.logical_and(cond1, cond2)
def _common(cls, node, **kwargs): axis = node.attrs.get("axis", 0) keepdims = node.attrs.get("keepdims", 1) select_last_index = node.attrs.get("select_last_index", 0) if select_last_index == 0: arg_max = cls.make_tensor_from_onnx_node(node, **kwargs) else: # reverse the input and apply argmax on that to get last occurrence of max x = kwargs["tensor_dict"][node.inputs[0]] x = tf.reverse(x, axis=[axis]) arg_max = cls.make_tensor_from_onnx_node(node, inputs=[x], **kwargs) # adjust indices to account for the reverse arg_max = tf_shape(x)[axis] - arg_max - 1 if keepdims == 1: return [tf.expand_dims(arg_max, axis=axis)] return [arg_max]
def convert_NHWC_indices_to_NCHW_indices(argmax): # i - index in NCHW # I - index in NHWC # C - number of channels # b - batch = I // CHW # c - channel = I % C # H - height # W - weight # I = i - c(HW - 1) + (C - 1)(i - bCHW - cHW) # i = (I + c(HW - 1) + (C - 1)(bCHW + cHW))/C # x_shape will always be in NCHW format here, # because maxpool_with_argmax only support 2d input x_shape = tf_shape(x) N = x_shape[0] C = x_shape[1] H = x_shape[2] W = x_shape[3] HW = tf.math.multiply(H, W) CHW = tf.math.multiply(C, HW) argmax_b = tf.math.floordiv(argmax, CHW) argmax_c = tf.math.floormod(argmax, C) new_ind = tf.math.add( argmax, tf.math.multiply(argmax_c, tf.math.subtract(HW, 1))) new_ind = tf.math.add( new_ind, tf.math.multiply( tf.math.subtract(C, 1), tf.math.add(tf.math.multiply(argmax_b, CHW), tf.math.multiply(argmax_c, HW)))) new_ind = tf.math.floordiv(new_ind, C) # add batch dimension into the argmax index batch_offsets = tf.math.multiply(tf.range(N, dtype=new_ind.dtype), CHW) for _ in range(new_ind.shape.rank - 1): batch_offsets = tf.expand_dims(batch_offsets, -1) new_ind = tf.math.add(new_ind, batch_offsets) return new_ind
def version_10(cls, node, **kwargs): x = kwargs["tensor_dict"][node.inputs[0]] x_shape = tf_shape(x) scales = kwargs["tensor_dict"][node.inputs[1]] n_in_scales_is_one = tf.equal(scales[0], 1) c_in_scales_is_one = tf.logical_or(tf.equal(scales[1], 1), tf.equal(scales[3], 1)) assert_n_c_in_scales_are_ones = tf.Assert( tf.logical_and(n_in_scales_is_one, c_in_scales_is_one), [scales]) with tf.control_dependencies([assert_n_c_in_scales_are_ones]): x_in_NCHW_format = tf.equal(scales[1], 1) h_w_scale = tf.where(x_in_NCHW_format, scales[2:], scales[1:3]) h_w_shape = tf.where(x_in_NCHW_format, x_shape[2:], x_shape[1:3]) new_h_w_shape = tf.cast( h_w_scale * tf.cast(h_w_shape, scales.dtype), tf.int32) mode = node.attrs.get("mode", "nearest") if mode.lower() == "linear": mode = tf.image.ResizeMethod.BILINEAR else: mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR def process_NCHW_format(x): x_t = tf.transpose(x, perm=[0, 2, 3, 1]) y = tf.image.resize(x_t, size=new_h_w_shape, method=mode) y_t = tf.transpose(y, perm=[0, 3, 1, 2]) return y_t def process_NHWC_format(x): y = tf.image.resize(x, size=new_h_w_shape, method=mode) return y output = tf.cond(x_in_NCHW_format, lambda: process_NCHW_format(x), lambda: process_NHWC_format(x)) return [output]
def _common(cls, node, **kwargs): if cls.SINCE_VERSION < 15: return [cls.make_tensor_from_onnx_node(node, **kwargs)] x = kwargs["tensor_dict"][node.inputs[0]] x_shape = tf_shape(x) x_rank = len(x_shape) start = node.attrs.get("start", 0) if start < 0: start += x_rank # Clip if start is still < 0 start = 0 if start < 0 else start end = node.attrs.get("end", x_rank) if end < 0: end += x_rank # Clip if end is still < 0 end = 0 if end < 0 else end result = cls.make_tensor_from_onnx_node(node, **kwargs) return [tf.slice(result, [start], [end - start])]
def version_13(cls, node, **kwargs): x = kwargs["tensor_dict"][node.inputs[0]] attrs = copy.deepcopy(node.attrs) noop_with_empty_axes = attrs.pop("noop_with_empty_axes", 0) axis = None if len(node.inputs) > 1: axes = kwargs["tensor_dict"][node.inputs[1]] axes_shape = tf_shape(axes) if len(axes_shape) > 1: axis = axes else: axis = axes[0] if axes_shape[0] != 0 else axis # return the input tensor when axis is None and noop_with_empty_axes is True if axis is None and noop_with_empty_axes: return [x] attrs["axis"] = axis # https://github.com/onnx/onnx/issues/585 attrs["keepdims"] = attrs.pop("keepdims", 1) == 1 return [ cls.make_tensor_from_onnx_node(node, inputs=[x], attrs=attrs, **kwargs) ]
def conv(cls, node, input_dict, transpose=False): """ Convolution method for both conv and transposed conv For transposed conv, Attr pads is not used for input, but declares how much output is padded. Here, output means output from transposed conv which already pad output_padding if set. So the pseudo explanation for output should be: output = conv_transpose_output + output_padding - pads And conv_transpose_output shape should be: conv_transpose_output_shape[i] = strides[i] * (input_shape[i] - 1) + kernel_shape[i] """ x = input_dict[node.inputs[0]] x_rank = len(x.get_shape()) x_shape = tf_shape(x, tf.int32) spatial_size = x_rank - 2 storage_format, compute_format = get_data_format(x_rank) compute_c_idx = compute_format.find("C") spatial_format = "".join([d for d in compute_format if d not in ["N", "C"]]) in_weights = input_dict[node.inputs[1]] weights_rank = len(in_weights.get_shape()) if transpose: # Translate weights from (C x M x KH x KW) to (KH x KW X M X C) perm = list(range(2, weights_rank)) + [1, 0] else: # Translate weights from (M x C x KH x KW) to (KH x KW X C X M) perm = list(range(2, weights_rank)) + [1, 0] if "kernel_shape" in node.attrs.keys(): kernel_shape = node.attrs["kernel_shape"] if in_weights.get_shape().is_fully_defined(): assert in_weights.get_shape().as_list()[2:] == kernel_shape, ( "kernel_shape " "attr of convolution does not match the actual weight " "passed to this operation, attr {}, actual {}").format( kernel_shape, in_weights.get_shape().as_list()) else: kernel_shape = tf_shape(in_weights, tf.int32)[2:] weights = tf.transpose(in_weights, perm) dilations = node.attrs.get("dilations", [1] * spatial_size) strides = node.attrs.get("strides", [1] * spatial_size) pads = node.attrs.get("pads", [0, 0] * spatial_size) # Check auto_pad nonexistent or NOTSET first if "auto_pad" not in node.attrs or node.attrs["auto_pad"] == "NOTSET": if not transpose: if pads != [0, 0] * spatial_size: x = PadMixin.get_padding_as_op(x, pads) pad_mode = "VALID" else: pad_mode = "NOTSET" # Then we use auto_pad to setup pad_mode elif node.attrs["auto_pad"] == "SAME_UPPER": pad_mode = "SAME" elif node.attrs["auto_pad"] == "VALID": pad_mode = "VALID" elif node.attrs["auto_pad"] == "SAME_LOWER": pad_mode = PAD_TF_INCOMPATIBLE else: raise ValueError("Invalid auto_pad attribute: {}".format( node.attrs["auto_pad"])) # Currently auto_pad = SAME_LOWER is not supported if pad_mode is PAD_TF_INCOMPATIBLE: if transpose: exception.OP_UNSUPPORTED_EXCEPT( "ConvTranspose with auto_pad `SAME_LOWER`", "Tensorflow") else: exception.OP_UNSUPPORTED_EXCEPT("Conv with auto_pad `SAME_LOWER`", "Tensorflow") group = node.attrs.get("group", 1) weight_shape = weights.get_shape().as_list() # Is this convolution depthwise we can support? depthwise = (x_rank == 4 and len(weight_shape) == 4 and group != 1 and not transpose and not (None in weight_shape)) if depthwise and isinstance(x_shape, np.ndarray): depthwise = bool(group == x_shape[1]) if depthwise is True: # Depthwise convolution. # The convolution kernel layout in tf.depthwise_conv is: # [filter_height, filter_width, in_channels, channel_multiplier] # Weight is now (KH x KW X C/g X M), or more precisely, (KH x KW X C/g X (g * M/g)), # we reshape it to (KH x KW x C x M/g) # NOTE: Assuming weight has fixed shape. depthwise_filter_shape = weight_shape[0:2] + [ -1, weight_shape[3] // group ] weights = tf.reshape(weights, depthwise_filter_shape) if not sys_config.device == 'CUDA': # transpose input to NHWC layout x = tf.transpose(x, perm=get_perm_from_formats(storage_format, compute_format)) weight_groups = [weights] xs = [x] else: weight_groups = tf.split(weights, num_or_size_splits=group, axis=-1) if sys_config.device == 'CUDA': if group == 1: xs = [x] else: xs = tf.split(x, num_or_size_splits=group, axis=1) else: x = tf.transpose(x, perm=get_perm_from_formats(storage_format, compute_format)) if group == 1: xs = [x] else: xs = tf.split(x, num_or_size_splits=group, axis=-1) if transpose: if dilations != [1] * spatial_size: raise RuntimeError("Cannot set non-1 dilation for conv transpose.") convolved = [] # this is a workaround for tensorflow AutoGraph not detecting # corretly x. This is fixed in tf>=2.2.0 x = None for (x, weight) in zip(xs, weight_groups): x_spatial_shape = [ x_shape[storage_format.find(d)] for d in spatial_format ] weights_shape = tf_shape(weights, tf.int32) output_shape = node.attrs.get("output_shape", None) conv_output_shape = [x_shape[storage_format.find("N")]] # calculate output shape if pad_mode == "NOTSET": if output_shape is None: conv_output_shape += [ strides[i] * x_spatial_shape[i] - strides[i] + (kernel_shape[i] - 1) * dilations[i] + 1 for i in list(range(spatial_size)) ] else: conv_output_shape += [ s + pads[i] + pads[spatial_size + i] for i, s in enumerate(output_shape[-2:]) ] conv_output_shape.insert(compute_c_idx, weights_shape[-2]) # make strides to match input rank strides_full = [1] + strides strides_full.insert(compute_c_idx, 1) # get corresponding function in tf if spatial_size == 1: conv_func = tf.nn.conv1d_transpose strides_full = strides[0] elif spatial_size == 2: conv_func = tf.nn.conv2d_transpose elif spatial_size == 3: conv_func = tf.nn.conv3d_transpose else: raise NotImplementedError( "Transposed convolution for {}d is not implemented in Tensorflow" .format(spatial_size)) # use raw input x to do transposed conv conv_rs = conv_func(x, weight, conv_output_shape, strides_full, padding="VALID", data_format=compute_format) # pad output first by output_padding attr if "output_padding" in node.attrs and output_shape is None: output_padding = [[0, 0] ] + [[0, p] for p in node.attrs["output_padding"]] output_padding.insert(compute_c_idx, [0, 0]) conv_rs = tf.pad(conv_rs, output_padding) # remove pads set in pads attr conv_rs_shape = tf_shape(conv_rs, tf.int32) conv_rs_shape_list = [ conv_rs_shape[i] for i in range(conv_rs.shape.rank) ] begin = [0] + pads[:spatial_size] begin.insert(compute_c_idx, 0) size = [ s if d in ["N", "C"] else s - pads[spatial_format.find(d)] - pads[spatial_format.find(d) + spatial_size] for d, s in zip(compute_format, conv_rs_shape_list) ] conv_rs = tf.slice(conv_rs, begin=begin, size=size) convolved.append(conv_rs) else: # No need to check pads if auto_pad is specifically provided. # The assumption is that once auto_pad is provided as either VALID # or SAME_UPPER (SAME_LOWER is currently not supported in TF) the # output_shape will always be inferred. That is, the output_shape # and output_padding will not be used in this case. if pad_mode == "VALID": conv_output_shape += [ strides[i] * (x_spatial_shape[i] - 1) + weights_shape[i] for i in list(range(spatial_size)) ] else: conv_output_shape += [ strides[i] * x_spatial_shape[i] for i in list(range(spatial_size)) ] conv_output_shape.insert(compute_c_idx, weights_shape[-2]) # make strides to match input rank strides_full = [1] + strides strides_full.insert(compute_c_idx, 1) # get corresponding function in tf if spatial_size == 1: conv_func = tf.nn.conv1d_transpose strides_full = strides[0] elif spatial_size == 2: conv_func = tf.nn.conv2d_transpose elif spatial_size == 3: conv_func = tf.nn.conv3d_transpose else: raise NotImplementedError( "Transposed convolution for {}d is not implemented in Tensorflow" .format(spatial_size)) # use raw input x to do transposed conv conv_rs = conv_func(x, weight, conv_output_shape, strides_full, padding=pad_mode, data_format=compute_format) convolved.append(conv_rs) else: # not transpose: if depthwise is True: if compute_format == "NHWC": strides = [1] + strides + [1] elif compute_format == 'NCHW': strides = [1, 1] + strides else: raise ValueError("Invalid compute_format: {}".format(compute_format)) convolved = [ tf.nn.depthwise_conv2d(x, weight, padding=pad_mode, strides=strides, dilations=dilations, data_format=compute_format) for (x, weight) in zip(xs, weight_groups) ] else: convolved = [ tf.nn.convolution(x, weight, padding=pad_mode, strides=strides, dilations=dilations, data_format=compute_format) for (x, weight) in zip(xs, weight_groups) ] if len(node.inputs) == 2: if sys_config.device == 'CUDA': output = tf.concat(convolved, axis=1) else: output = tf.concat(convolved, axis=-1) output = tf.transpose(output, perm=get_perm_from_formats( compute_format, storage_format)) else: bias = input_dict[node.inputs[2]] bias = cls.explicit_broadcast([x, bias], compute_c_idx) if sys_config.device == 'CUDA': output = tf.concat(convolved, axis=1) output = tf.add(output, bias) else: output = tf.concat(convolved, axis=-1) output = tf.add(output, bias) output = tf.transpose(output, perm=get_perm_from_formats( compute_format, storage_format)) return [output]
def version_11(cls, node, **kwargs): # x, roi, scales and sizes are all in NCHW format tensor_dict = kwargs["tensor_dict"] x = tensor_dict[node.inputs[0]] x_shape = tf_shape(x) roi = tensor_dict[node.inputs[1]] scales = tensor_dict[node.inputs[2]] sizes = tensor_dict[node.inputs[3]] if len( node.inputs) == 4 else tf.constant([], dtype=tf.int64) coordinate_transformation_mode = node.attrs.get( "coordinate_transformation_mode", "half_pixel") extrapolation_value = node.attrs.get("extrapolation_value", 0.0) mode = node.attrs.get("mode", "nearest") if mode.lower() == "linear": mode = tf.image.ResizeMethod.BILINEAR tf_resize = tf.compat.v1.image.resize_bilinear elif mode.lower() == "cubic": mode = tf.image.ResizeMethod.BICUBIC tf_resize = tf.compat.v1.image.resize_bicubic else: mode = tf.image.ResizeMethod.NEAREST_NEIGHBOR tf_resize = tf.compat.v1.image.resize_nearest_neighbor if len(node.inputs) == 3: # only scales is defined h_w_scale = scales[2:] h_w_shape = x_shape[2:] new_size = tf.cast(h_w_scale * tf.cast(h_w_shape, scales.dtype), tf.int32) else: # sizes is defined # The number of elements of 'sizes' should be the same as the rank of input 'X' sizes.set_shape(x_shape.shape) new_size = tf.cast(sizes[2:], tf.int32) # Tensorflow require the shape of "size" in the "tf.image.resize" must be known at # graph creation time. However in the dynamic shape situation, the shape of "new_size" # will be "None", the actual shape can only be determine at runtime. But we know # "new_size" should always contain [h, w], therefore the shape must be 2. new_size.set_shape([2]) # get boxes for crop indices = [] x_rank = len(x.get_shape()) for i in range(2, x_rank): indices.insert(i - 2, i) indices.insert(i, i + x_rank) boxes = tf.expand_dims(tf.gather(roi, indices, axis=0), 0) # get box_indices for crop box_indices = tf.cast(tf.range(0, x_shape[0]), dtype=tf.int32) # The input image is in NCHW format. But tf.image.crop_and_resize, # tf.image.resize and tf.compat.v1.image.resize_xx only support # channel last data format. Therefore need to transpose to NHWC # formar first then process resize and then transpose back to # NCHW format. x_t = tf.transpose(x, perm=[0, 2, 3, 1]) if coordinate_transformation_mode == "tf_crop_and_resize": y = tf.image.crop_and_resize(x_t, boxes, box_indices, new_size, mode, extrapolation_value) elif coordinate_transformation_mode == "align_corners": y = tf_resize(x_t, size=new_size, align_corners=True, half_pixel_centers=False) elif coordinate_transformation_mode == "asymmetric": y = tf_resize(x_t, size=new_size, align_corners=False, half_pixel_centers=False) else: # half_pixel or tf_half_pixel_for_nn y = tf.image.resize(x_t, size=new_size, method=mode) output = tf.transpose(y, perm=[0, 3, 1, 2]) return [output]
def _remove_dilations(self): """ This method removes the dilations by extracting the values from the input for every sliding window according to the dilations, strides and kernel size and generates output that can be used by pooling operations with strides = kernel_shape to accomplish dilated pooling Example: Input: [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [ 12, 13, 14, 15]] Kernel: [2, 2] Dilations: [2, 2] Strides: [1, 1] Will return: [[ 0, 2, 1, 3], [ 8, 10, 9, 11], [ 4, 6, 5, 7], [ 12, 14, 13, 15]] After max_pool2d with kernel_shape = strides = [2, 2] the result is: [[ 10, 11], [ 14, 15]] """ input_shape = tf_shape(self.input) in_spatial_shape = input_shape[1:self.spatial_size + 1] channels_count = input_shape[self.spatial_size + 1] # Initialize gather_ind with the range of channels # e.g. [0 1] gather_ind = tf.range(channels_count, dtype=tf.int64) # convert the vector to column vector # in the following logic we use column vectors gather_ind = tf.expand_dims(gather_ind, 1) # initilize the output_shape with zeros # self.output_shape will contain the shape of the # output tensor after the loop below is executed self.output_shape = [0] * (self.spatial_size + 2) self.output_shape[0] = input_shape[0] """ Loop over the input spatial dimensions starting from the last (most internal) going up to the first dimension On every step of the loop calculate the output indices and map them to the input indices using `_calc_input_ind`, then "combine" with the already calculated indices from the previous dimensions using cartesian product. For the following example input: Input: [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [ 12, 13, 14, 15]] Kernel: [2, 2] Dilations: [2, 2] Strides: [1, 1] these are the steps that will be executed: 1. Initilize gather_ind = [[0]] # we have only 1 channel 2. Loop step 0 (axis 1): filter_size = 3 output_size = 4 dim_ind = [[0] [2] [1] [3]] gather_ind = [[0 0] [2 0] [1 0] [3 0]] 3. Loop step 1 (axis 0): filter_size = 3 output_size = 4 dim_ind = [[0] [2] [1] [3]] gather_ind = [[0 0 0] [0 2 0] [0 1 0] [0 3 0] [2 0 0] [2 2 0] [2 1 0] [2 3 0] [1 0 0] [1 2 0] [1 1 0] [1 3 0] [3 0 0] [3 2 0] [3 1 0] [3 3 0]] These are the indices used for gather_nd operation to collect the values from the input data. """ for dim in range(self.spatial_size - 1, -1, -1): filter_size = (self.kernel_shape[dim] - 1) * \ self.dilations[dim] + 1 output_size = (( (in_spatial_shape[dim] - filter_size) // self.strides[dim]) + 1 ) * self.kernel_shape[dim] self.output_shape[dim + 1] = output_size # initialize the output dimension index with the range of the # dimension output size (e.g. 4): [0, 1, 2, 3] dim_ind = tf.range(output_size) # calculate the matching indices in the input data # [0, 1, 2, 3] will calculate to [0, 2, 1, 3] # from the above example dim_ind = self._calc_input_ind(dim_ind, self.kernel_shape[dim], self.dilations[dim], self.strides[dim]) # convert to column vector dim_ind = tf.expand_dims(dim_ind, 1) # "combine" current dimension indices with the previous dimensions # using cartesian product gather_ind = tf_product(dim_ind, gather_ind) # The result from the above loop for 2D data will be: # [[y1, x1, c], [y2, x2, c], ..., [yn, xm, c]] where n is the height, # m is the width and c is the channel number. # set the channels count in the output_shape self.output_shape[self.spatial_size + 1] = channels_count # expand the dimensions to match the input dimensions + 1 for x in range(self.spatial_size): gather_ind = tf.expand_dims(gather_ind, 0) # dublicate the indices for every batch gather_ind = tf.tile(gather_ind, [input_shape[0]] + [1] * (self.spatial_size + 1)) # extract the selected values from the input output = tf.gather_nd(self.input, gather_ind, batch_dims=1) # reshape the output to the correct shape calculated earlier output = tf.reshape(output, self.output_shape) return output