def _VerifyHandValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, expected): """Verifies the output values of the depthwise convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. expected: An array containing the expected operation outputs. """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = np.array([f * 1.0 for f in range(1, total_size_1 + 1)], dtype=np.float32).reshape(tensor_in_sizes) x2 = np.array([f * 1.0 for f in range(1, total_size_2 + 1)], dtype=np.float32).reshape(filter_in_sizes) with self.test_session() as sess: t1 = array_ops.placeholder(shape=tensor_in_sizes, dtype=np.float32) t2 = array_ops.placeholder(shape=filter_in_sizes, dtype=np.float32) with self.test_scope(): conv = nn_ops.depthwise_conv2d_native( t1, t2, strides=[1, stride, stride, 1], padding=padding) value = sess.run(conv, {t1: x1, t2: x2}) print("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv)
def _DepthwiseConv2dNativeBackpropInputGrad(op, grad): """The derivatives for deconvolution. Args: op: the Deconvolution op. grad: the tensor representing the gradient w.r.t. the output Returns: the gradients w.r.t. the input and the filter """ return [ None, nn_ops.depthwise_conv2d_native_backprop_filter( grad, array_ops.shape(op.inputs[1]), op.inputs[2], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=op.get_attr("data_format")), nn_ops.depthwise_conv2d_native(grad, op.inputs[1], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=op.get_attr("data_format")) ]
def op(input_converted, _, padding): outputs = nn_ops.depthwise_conv2d_native( input=input_converted, filter=filter, strides=strides, padding=padding, data_format=data_format) num_outputs = depth_multiplier * num_filters_in if normalizer_fn is not None: normalizer_params_ = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params_) else: if biases_initializer is not None: biases_collections = utils.get_variable_collections( variables_collections, 'biases') biases = variables.model_variable( 'biases', shape=[ num_outputs, ], dtype=dtype, initializer=biases_initializer, regularizer=biases_regularizer, trainable=trainable, collections=biases_collections) outputs = nn.bias_add(outputs, biases, data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return outputs
def _DepthwiseConv2dNativeBackpropInputGrad(op, grad): """The derivatives for deconvolution. Args: op: the Deconvolution op. grad: the tensor representing the gradient w.r.t. the output Returns: the gradients w.r.t. the input and the filter """ return [ None, nn_ops.depthwise_conv2d_native_backprop_filter( grad, array_ops.shape(op.inputs[1]), op.inputs[2], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=op.get_attr("data_format")), nn_ops.depthwise_conv2d_native( grad, op.inputs[1], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=op.get_attr("data_format")) ]
def _VerifyHandValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, expected, use_gpu): """Verifies the output values of the depthwise convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. expected: An array containing the expected operation outputs. use_gpu: Whether to use GPU. """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.cached_session(use_gpu=use_gpu) as sess: t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) conv = nn_ops.depthwise_conv2d_native( t1, t2, strides=[1, stride, stride, 1], padding=padding) value = sess.run(conv) tf_logging.info("value = %r", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv)
def _VerifyHandValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, expected, use_gpu): """Verifies the output values of the depthwise convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. expected: An array containing the expected operation outputs. use_gpu: Whether to use GPU. """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) conv = nn_ops.depthwise_conv2d_native( t1, t2, strides=[1, stride, stride, 1], padding=padding) value = sess.run(conv) tf_logging.info("value = %r", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv)
def op(input_converted, _, padding): return nn_ops.depthwise_conv2d_native(input=input_converted, filter=depthwise_filter, strides=strides, padding=padding, data_format=data_format, name="depthwise")
def depthwise_conv2d(input, filter, strides, padding, name=None): """Depthwise 2-D convolution. Given an input tensor of shape `[batch, in_height, in_width, in_channels]` and a filter tensor of shape `[filter_height, filter_width, in_channels, channel_multiplier]` containing `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies a different filter to each input channel (expanding from 1 channel to `channel_multiplier` channels for each), then concatenates the results together. The output has `in_channels * channel_multiplier` channels. In detail, output[b, i, j, k * channel_multiplier + q] = sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * filter[di, dj, k, q] Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertical strides, `strides = [1, stride, stride, 1]`. Args: input: 4-D with shape `[batch, in_height, in_width, in_channels]`. filter: 4-D with shape `[filter_height, filter_width, in_channels, channel_multiplier]`. strides: 1-D of size 4. The stride of the sliding window for each dimension of `input`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. name: A name for this operation (optional). Returns: A 4-D `Tensor` of shape `[batch, out_height, out_width, in_channels * channel_multiplier].` """ with ops.op_scope([input, filter], name, "depthwise") as name: input = ops.convert_to_tensor(input, name="tensor_in") filter = ops.convert_to_tensor(filter, name="filter_in") # A shape is required to statically compute the number of separable filters. if filter.get_shape().ndims is not None: assert len(filter.get_shape()) == 4 in_channels = filter.get_shape()[2] # Sanity checks, if shape information is available for the inputs. if input.get_shape().ndims is not None: assert len(input.get_shape()) == 4 assert input.get_shape()[3] == in_channels, ( "Mismatched input depth %d and number of depthwise filters %d." % (input.get_shape()[3].value, in_channels)) else: assert input.get_shape().ndims is not None, ( "Either tensor must provide static shape information.") assert input.get_shape().ndims == 4 in_channels = input.get_shape()[3] if in_channels == 1: return nn_ops.conv2d(input, filter, strides, padding, name=name) else: return nn_ops.depthwise_conv2d_native(input, filter, strides, padding, name=name)
def _VerifyHandValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, expected): """Verifies the output values of the depthwise convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. expected: An array containing the expected operation outputs. """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = np.array([f * 1.0 for f in range(1, total_size_1 + 1)], dtype=np.float32).reshape(tensor_in_sizes) x2 = np.array([f * 1.0 for f in range(1, total_size_2 + 1)], dtype=np.float32).reshape(filter_in_sizes) with self.cached_session() as sess: t1 = array_ops.placeholder(shape=tensor_in_sizes, dtype=np.float32) t2 = array_ops.placeholder(shape=filter_in_sizes, dtype=np.float32) with self.test_scope(): conv = nn_ops.depthwise_conv2d_native( t1, t2, strides=[1, stride, stride, 1], padding=padding) value = sess.run(conv, {t1: x1, t2: x2}) print("value = ", value) self.assertArrayNear(expected, np.ravel(value), 1e-5) self.assertShapeEqual(value, conv)
def op(input_converted, _, padding): return nn_ops.depthwise_conv2d_native( input=input_converted, filter=filter, strides=strides, padding=padding, name=name)
def op(input_converted, _, padding): return nn_ops.depthwise_conv2d_native( input=input_converted, filter=depthwise_filter, strides=strides, padding=padding, name="depthwise")
def test_depthwise_conv2d(self, padding, tensor_in_sizes, filter_in_sizes): tensor_in_sizes = tensor_in_sizes filter_in_sizes = filter_in_sizes total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) t2.set_shape(filter_in_sizes) conv = nn_ops.depthwise_conv2d_native(t1, t2, strides=[1, 1, 1, 1], padding=padding) sess_fn = lambda sess: sess.run(conv) if not np.isclose(self.with_ngraph(sess_fn), self.without_ngraph(sess_fn)).all(): raise AssertionError
def depthwise_conv2d(input, filter, strides, padding, name=None): """Depthwise 2-D convolution. Given an input tensor of shape `[batch, in_height, in_width, in_channels]` and a filter tensor of shape `[filter_height, filter_width, in_channels, channel_multiplier]` containing `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies a different filter to each input channel (expanding from 1 channel to `channel_multiplier` channels for each), then concatenates the results together. The output has `in_channels * channel_multiplier` channels. In detail, output[b, i, j, k * channel_multiplier + q] = sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * filter[di, dj, k, q] Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertical strides, `strides = [1, stride, stride, 1]`. Args: input: 4-D with shape `[batch, in_height, in_width, in_channels]`. filter: 4-D with shape `[filter_height, filter_width, in_channels, channel_multiplier]`. strides: 1-D of size 4. The stride of the sliding window for each dimension of `input`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See the [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution) name: A name for this operation (optional). Returns: A 4-D `Tensor` of shape `[batch, out_height, out_width, in_channels * channel_multiplier].` """ with ops.op_scope([input, filter], name, "depthwise") as name: input = ops.convert_to_tensor(input, name="tensor_in") filter = ops.convert_to_tensor(filter, name="filter_in") # A shape is required to statically compute the number of separable filters. if filter.get_shape().ndims is not None: assert len(filter.get_shape()) == 4 in_channels = filter.get_shape()[2] # Sanity checks, if shape information is available for the inputs. if input.get_shape().ndims is not None: assert len(input.get_shape()) == 4 assert input.get_shape()[3] == in_channels, ( "Mismatched input depth %d and number of depthwise filters %d." % ( input.get_shape()[3].value, in_channels)) else: assert input.get_shape().ndims is not None, ( "Either tensor must provide static shape information.") assert input.get_shape().ndims == 4 in_channels = input.get_shape()[3] if in_channels == 1: return nn_ops.conv2d(input, filter, strides, padding, name=name) else: return nn_ops.depthwise_conv2d_native(input, filter, strides, padding, name=name)
def _DepthwiseConv2dNativeBackpropInput(op, grad): return [ None, nn_ops.depthwise_conv2d_native_backprop_filter( grad, array_ops.shape(op.inputs[1]), op.inputs[2], op.get_attr("strides"), op.get_attr("padding")), nn_ops.depthwise_conv2d_native(grad, op.inputs[1], op.get_attr("strides"), op.get_attr("padding")) ]
def _test_convolution(tensor_in_sizes, filter_in_sizes, dilations, strides, padding, data_format, is_depthwise=False): """ One iteration of convolution with given shapes and attributes """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. data_array = [f * 1.0 for f in range(1, total_size_1 + 1)] filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)] with tf.Graph().as_default(): in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype='float32') in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype='float32') strides = [1] + strides + [1] dilations = [1] + dilations + [1] if is_depthwise: out = nn_ops.depthwise_conv2d_native(in_data, in_filter, strides=strides, padding=padding, data_format=data_format) else: out = nn_ops.conv2d(in_data, in_filter, strides=strides, padding=padding, data_format=data_format) # TFLite is NHWC, TVM is NCHW tflite_data_array = np.reshape(data_array, tensor_in_sizes).astype('float32') tvm_data_array = np.transpose(tflite_data_array, axes=(0, 3, 1, 2)) # TFLite output is NHWC, TVM is NCHW, we need transpose compare_tflite_with_tvm(tflite_data_array, tvm_data_array, 'Placeholder:0', [in_data], [out], output_need_transpose=True)
def conv(self, input_tensor): filters = np.random.uniform(low=-10, high=10, size=(2, 3, 3, 1)).astype('f4') bias = np.random.uniform(low=0, high=10, size=(3)).astype('f4') out = nn_ops.depthwise_conv2d_native(input_tensor, filters, strides=[1, 2, 2, 1], dilations=[1, 1, 1, 1], padding='SAME', data_format='NHWC') if has_bias: out = nn_ops.bias_add(out, bias) if activation_fn is not None: out = activation_fn(out) return {'output': out}
def _DepthwiseConv2dNativeBackpropFilterGrad(op, grad): return [ nn_ops.depthwise_conv2d_native_backprop_input( array_ops.shape(op.inputs[0]), grad, op.inputs[2], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=op.get_attr("data_format")), None, nn_ops.depthwise_conv2d_native(op.inputs[0], grad, dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=op.get_attr("data_format")) ]
def _DepthwiseConv2dNativeBackpropFilterGrad(op, grad): return [ nn_ops.depthwise_conv2d_native_backprop_input( array_ops.shape(op.inputs[0]), grad, op.inputs[2], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=op.get_attr("data_format")), None, nn_ops.depthwise_conv2d_native( op.inputs[0], grad, dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=op.get_attr("data_format")) ]
def depthwise_conv2d(input, filter, strides, padding, name=None): """Depthwise 2-D convolution. Given an input tensor of shape `[batch, in_height, in_width, in_channels]` and a filter tensor of shape `[filter_height, filter_width, in_channels, channel_multiplier]` containing `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies a different filter to each input channel (expanding from 1 channel to `channel_multiplier` channels for each), then concatenates the results together. The output has `in_channels * channel_multiplier` channels. In detail, output[b, i, j, k * channel_multiplier + q] = sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * filter[di, dj, k, q] Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertical strides, `strides = [1, stride, stride, 1]`. Args: input: 4-D with shape `[batch, in_height, in_width, in_channels]`. filter: 4-D with shape `[filter_height, filter_width, in_channels, channel_multiplier]`. strides: 1-D of size 4. The stride of the sliding window for each dimension of `input`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See the [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution) name: A name for this operation (optional). Returns: A 4-D `Tensor` of shape `[batch, out_height, out_width, in_channels * channel_multiplier].` """ with ops.name_scope(name, "depthwise", [input, filter]) as name: input = ops.convert_to_tensor(input, name="tensor_in") filter = ops.convert_to_tensor(filter, name="filter_in") return nn_ops.depthwise_conv2d_native( input, filter, strides, padding, name=name)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, use_gpu): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. use_gpu: Whether to use GPU. """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) conv_native = nn_ops.depthwise_conv2d_native( t1, t2, strides=[1, stride, stride, 1], padding=padding) conv_gold = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) gold_result = sess.run(conv_gold) print("diff matrix:", np.amax(np.ravel(native_result) - np.ravel(gold_result))) self.assertArrayNear(np.ravel(native_result), np.ravel(gold_result), 1e-5) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_gold)
def conv(self, input_tensor): filters = np.random.uniform(low=-10, high=10, size=(2, 3, 3, 1)).astype('f4') bias = np.random.uniform(low=0, high=10, size=(3)).astype('f4') scale, offset = [1.0, 1.0, 1.0], [0.5, 0.5, 0.5] mean, variance = scale, offset out = nn_ops.depthwise_conv2d_native(input_tensor, filters, strides=[1, 2, 2, 1], dilations=[1, 1, 1, 1], padding='SAME', data_format='NHWC') if has_bias: out = nn_ops.bias_add(out, bias) if has_bn: # Fusing is supported for non-training case. out, _, _, _, _, _ = nn_ops.fused_batch_norm_v3( out, scale, offset, mean, variance, is_training=False) if activation_fn is not None: out = activation_fn(out) return {'output': out}
def _ConstructAndTestGradient(self, input_shape, filter_shape, output_shape, stride, padding, data_type, test_input, use_gpu): input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): if data_type == dtypes.float32: tolerance = 0.002 else: self.assertEqual(data_type, dtypes.float64) tolerance = 1e-8 input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") depthwise_conv2d = nn_ops.depthwise_conv2d_native( input_tensor, filter_tensor, [1, stride, stride, 1], padding, name="depthwise_conv2d") self.assertEqual(output_shape, depthwise_conv2d.get_shape()) if test_input: err = gradient_checker.compute_gradient_error(input_tensor, input_shape, depthwise_conv2d, output_shape) else: err = gradient_checker.compute_gradient_error(filter_tensor, filter_shape, depthwise_conv2d, output_shape) print("depthwise conv_2d gradient error = ", err) self.assertLess(err, tolerance)
def separable_conv2d(input, depthwise_filter, pointwise_filter, strides, padding, name=None): """2-D convolution with separable filters. Performs a depthwise convolution that acts separately on channels followed by a pointwise convolution that mixes channels. Note that this is separability between dimensions `[1, 2]` and `3`, not spatial separability between dimensions `1` and `2`. In detail, output[b, i, j, k] = sum_{di, dj, q, r] input[b, strides[1] * i + di, strides[2] * j + dj, q] * depthwise_filter[di, dj, q, r] * pointwise_filter[0, 0, q * channel_multiplier + r, k] `strides` controls the strides for the depthwise convolution only, since the pointwise convolution has implicit strides of `[1, 1, 1, 1]`. Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertical strides, `strides = [1, stride, stride, 1]`. Args: input: 4-D `Tensor` with shape `[batch, in_height, in_width, in_channels]`. depthwise_filter: 4-D `Tensor` with shape `[filter_height, filter_width, in_channels, channel_multiplier]`. Contains `in_channels` convolutional filters of depth 1. pointwise_filter: 4-D `Tensor` with shape `[1, 1, channel_multiplier * in_channels, out_channels]`. Pointwise filter to mix channels after `depthwise_filter` has convolved spatially. strides: 1-D of size 4. The strides for the depthwise convolution for each dimension of `input`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See the [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution) name: A name for this operation (optional). Returns: A 4-D `Tensor` of shape `[batch, out_height, out_width, out_channels]`. Raises: ValueError: If channel_multiplier * in_channels > out_channels, which means that the separable convolution is overparameterized. """ with ops.op_scope([input, depthwise_filter, pointwise_filter], name, "separable_conv2d") as name: input = ops.convert_to_tensor(input, name="tensor_in") depthwise_filter = ops.convert_to_tensor(depthwise_filter, name="depthwise_filter") pointwise_filter = ops.convert_to_tensor(pointwise_filter, name="pointwise_filter") if pointwise_filter.get_shape().ndims is not None: assert len(pointwise_filter.get_shape()) == 4 assert pointwise_filter.get_shape()[0] == 1 assert pointwise_filter.get_shape()[1] == 1 if depthwise_filter.get_shape().ndims and input.get_shape().ndims: channel_multiplier = depthwise_filter.get_shape()[3] in_channels = input.get_shape()[3] out_channels = pointwise_filter.get_shape()[3] if channel_multiplier * in_channels > out_channels: raise ValueError( ("Refusing to perform an overparameterized separable " "convolution: channel_multiplier * in_channels = " "%d * %d = %d > %d = out_channels" % (channel_multiplier, in_channels, channel_multiplier * in_channels, out_channels))) # The layout of the ops in the graph are expected to be as follows: # depthwise_conv2d // Conv2D op corresponding to native deptwise conv. # separable_conv2d // Conv2D op corresponding to the pointwise conv. depthwise = nn_ops.depthwise_conv2d_native(input, depthwise_filter, strides, padding, name="depthwise") return nn_ops.conv2d(depthwise, pointwise_filter, [1, 1, 1, 1], padding="VALID", name=name)
def _ConstructAndTestGradient(self, input_shape, filter_shape, output_shape, stride, padding, data_type, test_input, use_gpu, data_format="NHWC"): input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): if data_type == dtypes.float16: tolerance = 0.002 elif data_type == dtypes.float32: tolerance = 0.002 else: self.assertEqual(data_type, dtypes.float64) tolerance = 1e-8 input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") native_input = input_tensor strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2]) input_shape = [ input_shape[0], input_shape[3], input_shape[1], input_shape[2] ] output_shape = [ output_shape[0], output_shape[3], output_shape[1], output_shape[2] ] strides = [1, 1, stride, stride] depthwise_conv2d = nn_ops.depthwise_conv2d_native( native_input, filter_tensor, strides, padding, data_format=data_format, name="depthwise_conv2d") self.assertEqual(output_shape, depthwise_conv2d.get_shape()) if test_input: err = gradient_checker.compute_gradient_error( native_input, input_shape, depthwise_conv2d, output_shape) else: err = gradient_checker.compute_gradient_error(filter_tensor, filter_shape, depthwise_conv2d, output_shape) print("data_type:", data_type, "use_gpu:", use_gpu, ", error = ", err) self.assertLess(err, tolerance)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, grouped_conv=False, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. grouped_conv: Whether to use cuDNN 7's grouped convolution. data_format: The data_format of the input. "NHWC" or "NCHW". """ input_size = 1 filter_size = 1 for s in tensor_in_sizes: input_size *= s for s in filter_in_sizes: filter_size *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)] x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)] ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-2, dtypes.float32: 1e-8, dtypes.float64: 1e-13, }[data_type] t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with sess.graph._kernel_label_map( {"DepthwiseConv2dNative": "cudnn_grouped_convolution"} if grouped_conv else {}): conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) try: native_result = sess.run(conv_native) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'" ): tf_logging.warn("Skipping grouped convolution test") return raise e conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) interface_result = sess.run(conv_interface) tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f", data_type, use_gpu, grouped_conv, np.amax(np.absolute(native_result - interface_result))) self.assertArrayNear(np.ravel(native_result), np.ravel(interface_result), tolerance) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def _ConstructAndTestGradient(self, input_shape, filter_shape, output_shape, stride, padding, data_type, test_input, use_gpu, grouped_conv=False, data_format="NHWC"): input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-0, dtypes.float32: 8e-4, dtypes.float64: 1e-12, }[data_type] input_tensor = constant_op.constant(input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant(filter_data, shape=filter_shape, dtype=data_type, name="filter") native_input = input_tensor strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2]) input_shape = [ input_shape[0], input_shape[3], input_shape[1], input_shape[2] ] output_shape = [ output_shape[0], output_shape[3], output_shape[1], output_shape[2] ] strides = [1, 1, stride, stride] with sess.graph._kernel_label_map({ "DepthwiseConv2dNative": "cudnn_grouped_convolution", "DepthwiseConv2dNativeBackpropInput": "cudnn_grouped_convolution", "DepthwiseConv2dNativeBackpropFilter": "cudnn_grouped_convolution", } if grouped_conv else {}): depthwise_conv2d = nn_ops.depthwise_conv2d_native( native_input, filter_tensor, strides, padding, data_format=data_format, name="depthwise_conv2d") self.assertEqual(output_shape, depthwise_conv2d.get_shape()) try: if test_input: err = gradient_checker.compute_gradient_error( native_input, input_shape, depthwise_conv2d, output_shape) else: err = gradient_checker.compute_gradient_error( filter_tensor, filter_shape, depthwise_conv2d, output_shape) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if grouped_conv and e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'" ): tf_logging.warn("Skipping grouped convolution test") return raise e tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, error = %f", data_type, use_gpu, grouped_conv, err) self.assertLess(err, tolerance)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: if data_type == dtypes.float16: tolerance = 1e-5 elif data_type == dtypes.float32: tolerance = 1e-5 else: self.assertEqual(data_type, dtypes.float64) tolerance = 1e-8 t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) interface_result = sess.run(conv_interface) print("data_type:", data_type, "use_gpu:", use_gpu, "max diff = ", np.amax(np.absolute(native_result - interface_result))) self.assertArrayNear( np.ravel(native_result), np.ravel(interface_result), tolerance) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = np.array([f * 1.0 for f in range(1, total_size_1 + 1)], dtype=data_type).reshape(tensor_in_sizes) x2 = np.array([f * 1.0 for f in range(1, total_size_2 + 1)], dtype=data_type).reshape(filter_in_sizes) with self.test_session() as sess: if data_type == np.float32: tolerance = 1e-4 else: self.assertEqual(data_type, np.float64) tolerance = 1e-8 t1 = array_ops.placeholder(shape=tensor_in_sizes, dtype=data_type) t2 = array_ops.placeholder(shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with self.test_scope(): conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) with ops.device("CPU"): conv_interface = ReferenceDepthwiseConv2D( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native, {t1: x1, t2: x2}) interface_result = sess.run(conv_interface, {t1: x1, t2: x2}) print("data_type:", data_type, "max diff = ", np.amax(np.absolute(native_result - interface_result))) self.assertAllClose( np.ravel(native_result), np.ravel(interface_result), rtol=tolerance)
def _ConstructAndTestGradient(self, input_shape, filter_shape, output_shape, stride, padding, data_type, test_input, use_gpu, grouped_conv=False, data_format="NHWC"): input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-0, dtypes.float32: 8e-4, dtypes.float64: 1e-12, }[data_type] input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") native_input = input_tensor strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2]) input_shape = [ input_shape[0], input_shape[3], input_shape[1], input_shape[2] ] output_shape = [ output_shape[0], output_shape[3], output_shape[1], output_shape[2] ] strides = [1, 1, stride, stride] with sess.graph._kernel_label_map({ "DepthwiseConv2dNative": "cudnn_grouped_convolution", "DepthwiseConv2dNativeBackpropInput": "cudnn_grouped_convolution", "DepthwiseConv2dNativeBackpropFilter": "cudnn_grouped_convolution", } if grouped_conv else {}): depthwise_conv2d = nn_ops.depthwise_conv2d_native( native_input, filter_tensor, strides, padding, data_format=data_format, name="depthwise_conv2d") self.assertEqual(output_shape, depthwise_conv2d.get_shape()) try: if test_input: err = gradient_checker.compute_gradient_error( native_input, input_shape, depthwise_conv2d, output_shape) else: err = gradient_checker.compute_gradient_error( filter_tensor, filter_shape, depthwise_conv2d, output_shape) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if grouped_conv and e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"): tf_logging.warn("Skipping grouped convolution test") return raise e tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, error = %f", data_type, use_gpu, grouped_conv, err) self.assertLess(err, tolerance)
def separable_conv2d(input, depthwise_filter, pointwise_filter, strides, padding, name=None): """2-D convolution with separable filters. Performs a depthwise convolution that acts separately on channels followed by a pointwise convolution that mixes channels. Note that this is separability between dimensions `[1, 2]` and `3`, not spatial separability between dimensions `1` and `2`. In detail, output[b, i, j, k] = sum_{di, dj, q, r] input[b, strides[1] * i + di, strides[2] * j + dj, q] * depthwise_filter[di, dj, q, r] * pointwise_filter[0, 0, q * channel_multiplier + r, k] `strides` controls the strides for the depthwise convolution only, since the pointwise convolution has implicit strides of `[1, 1, 1, 1]`. Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertical strides, `strides = [1, stride, stride, 1]`. Args: input: 4-D `Tensor` with shape `[batch, in_height, in_width, in_channels]`. depthwise_filter: 4-D `Tensor` with shape `[filter_height, filter_width, in_channels, channel_multiplier]`. Contains `in_channels` convolutional filters of depth 1. pointwise_filter: 4-D `Tensor` with shape `[1, 1, channel_multiplier * in_channels, out_channels]`. Pointwise filter to mix channels after `depthwise_filter` has convolved spatially. strides: 1-D of size 4. The strides for the depthwise convolution for each dimension of `input`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. name: A name for this operation (optional). Returns: A 4-D `Tensor` of shape `[batch, out_height, out_width, out_channels]`. """ with ops.op_scope([input, depthwise_filter, pointwise_filter], name, "separable_conv2d") as name: input = ops.convert_to_tensor(input, name="tensor_in") depthwise_filter = ops.convert_to_tensor(depthwise_filter, name="depthwise_filter") pointwise_filter = ops.convert_to_tensor(pointwise_filter, name="pointwise_filter") if pointwise_filter.get_shape().ndims is not None: assert len(pointwise_filter.get_shape()) == 4 assert pointwise_filter.get_shape()[0] == 1 assert pointwise_filter.get_shape()[1] == 1 if depthwise_filter.get_shape().ndims and input.get_shape().ndims: channel_multiplier = depthwise_filter.get_shape()[3] in_channels = input.get_shape()[3] out_channels = pointwise_filter.get_shape()[3] # This would mean the separable convolutions is over-parametrized. assert channel_multiplier * in_channels < out_channels # The layout of the ops in the graph are expected to be as follows: # depthwise_conv2d // Conv2D op corresponding to native deptwise conv. # separable_conv2d // Conv2D op corresponding to the pointwise conv. depthwise = nn_ops.depthwise_conv2d_native(input, depthwise_filter, strides, padding, name="depthwise") return nn_ops.conv2d(depthwise, pointwise_filter, [1, 1, 1, 1], padding="VALID", name=name)
def _ConstructAndTestGradient(self, input_shape, filter_shape, output_shape, stride, padding, data_type, test_input, use_gpu, data_format="NHWC"): input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): if data_type == dtypes.float32: tolerance = 0.002 else: self.assertEqual(data_type, dtypes.float64) tolerance = 1e-8 input_tensor = constant_op.constant(input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant(filter_data, shape=filter_shape, dtype=data_type, name="filter") native_input = input_tensor strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2]) input_shape = [ input_shape[0], input_shape[3], input_shape[1], input_shape[2] ] output_shape = [ output_shape[0], output_shape[3], output_shape[1], output_shape[2] ] strides = [1, 1, stride, stride] depthwise_conv2d = nn_ops.depthwise_conv2d_native( native_input, filter_tensor, strides, padding, data_format=data_format, name="depthwise_conv2d") self.assertEqual(output_shape, depthwise_conv2d.get_shape()) if test_input: err = gradient_checker.compute_gradient_error( native_input, input_shape, depthwise_conv2d, output_shape) else: err = gradient_checker.compute_gradient_error( filter_tensor, filter_shape, depthwise_conv2d, output_shape) print("data_type:", data_type, "use_gpu:", use_gpu, ", error = ", err) self.assertLess(err, tolerance)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, use_gpu, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. use_gpu: Whether to use GPU. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: with sess.graph._kernel_label_map( {"DepthwiseConv2dNative": "neon"}): t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) interface_result = sess.run(conv_interface) print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes, ", stride:", stride, ", padding: ", padding, ", max diff: ", np.amax(np.absolute(native_result - interface_result))) self.assertAllClose(np.ravel(native_result), np.ravel(interface_result), 1e-5) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, grouped_conv=False, data_format="NHWC", dilations=None): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. grouped_conv: Whether to use cuDNN 7's grouped convolution. data_format: The data_format of the input. "NHWC" or "NCHW". dilations: A list of 2 elements, representing the dilations. """ input_size = 1 filter_size = 1 for s in tensor_in_sizes: input_size *= s for s in filter_in_sizes: filter_size *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)] x1 = np.array(x1).reshape(tensor_in_sizes) x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)] x2 = np.array(x2).reshape(filter_in_sizes) # Compute reference result strides = [1, stride, stride, 1] np_result = _DepthwiseConv2dNumpy(x1, x2, strides, padding, "NHWC", dilations) ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-2, dtypes.float32: 1e-5, dtypes.float64: 1e-12, }[data_type] t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] # depthwise_conv2d_native does not support dilations except on TPUs. if dilations is None: with sess.graph._kernel_label_map({ "DepthwiseConv2dNative": "cudnn_grouped_convolution" } if grouped_conv else {}): conv_native = nn_ops.depthwise_conv2d_native( t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) try: # The Numpy array from calling depthwise_conv2d_native native_result = self.evaluate(conv_native) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if e.message.startswith( "No OpKernel was registered to support Op " "'DepthwiseConv2dNative'"): tf_logging.warn("Skipping grouped convolution test") return raise e conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=strides, padding=padding, data_format=data_format, dilations=dilations) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_interface = array_ops.transpose(conv_interface, [0, 2, 3, 1]) # The Numpy array from calling depthwise_conv2d interface_result = self.evaluate(conv_interface) if dilations is None: self.assertAllClose(native_result, np_result, atol=tolerance, rtol=0.) self.assertAllClose(interface_result, np_result, atol=tolerance, rtol=0.)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, grouped_conv=False, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. grouped_conv: Whether to use cuDNN 7's grouped convolution. data_format: The data_format of the input. "NHWC" or "NCHW". """ input_size = 1 filter_size = 1 for s in tensor_in_sizes: input_size *= s for s in filter_in_sizes: filter_size *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)] x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)] ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-2, dtypes.float32: 1e-5, dtypes.float64: 1e-12, }[data_type] t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with sess.graph._kernel_label_map({ "DepthwiseConv2dNative": "cudnn_grouped_convolution" } if grouped_conv else {}): conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) try: native_result = sess.run(conv_native) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"): tf_logging.warn("Skipping grouped convolution test") return raise e conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) interface_result = sess.run(conv_interface) tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f", data_type, use_gpu, grouped_conv, np.amax(np.absolute(native_result - interface_result))) self.assertArrayNear( np.ravel(native_result), np.ravel(interface_result), tolerance) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, use_gpu, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. use_gpu: Whether to use GPU. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: with sess.graph._kernel_label_map({"DepthwiseConv2dNative": "neon"}): t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) interface_result = sess.run(conv_interface) print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes, ", stride:", stride, ", padding: ", padding, ", max diff: ", np.amax(np.absolute(native_result - interface_result))) self.assertAllClose( np.ravel(native_result), np.ravel(interface_result), 1e-5) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def separable_conv2d(input, depthwise_filter, pointwise_filter, strides, padding, name=None): """2-D convolution with separable filters. Performs a depthwise convolution that acts separately on channels followed by a pointwise convolution that mixes channels. Note that this is separability between dimensions `[1, 2]` and `3`, not spatial separability between dimensions `1` and `2`. In detail, output[b, i, j, k] = sum_{di, dj, q, r] input[b, strides[1] * i + di, strides[2] * j + dj, q] * depthwise_filter[di, dj, q, r] * pointwise_filter[0, 0, q * channel_multiplier + r, k] `strides` controls the strides for the depthwise convolution only, since the pointwise convolution has implicit strides of `[1, 1, 1, 1]`. Must have `strides[0] = strides[3] = 1`. For the most common case of the same horizontal and vertical strides, `strides = [1, stride, stride, 1]`. Args: input: 4-D `Tensor` with shape `[batch, in_height, in_width, in_channels]`. depthwise_filter: 4-D `Tensor` with shape `[filter_height, filter_width, in_channels, channel_multiplier]`. Contains `in_channels` convolutional filters of depth 1. pointwise_filter: 4-D `Tensor` with shape `[1, 1, channel_multiplier * in_channels, out_channels]`. Pointwise filter to mix channels after `depthwise_filter` has convolved spatially. strides: 1-D of size 4. The strides for the depthwise convolution for each dimension of `input`. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See the [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution) name: A name for this operation (optional). Returns: A 4-D `Tensor` of shape `[batch, out_height, out_width, out_channels]`. Raises: ValueError: If channel_multiplier * in_channels > out_channels, which means that the separable convolution is overparameterized. """ with ops.name_scope(name, "separable_conv2d", [input, depthwise_filter, pointwise_filter]) as name: input = ops.convert_to_tensor(input, name="tensor_in") depthwise_filter = ops.convert_to_tensor( depthwise_filter, name="depthwise_filter") pointwise_filter = ops.convert_to_tensor( pointwise_filter, name="pointwise_filter") pointwise_filter_shape = pointwise_filter.get_shape().with_rank(4) pointwise_filter_shape[0].assert_is_compatible_with(1) pointwise_filter_shape[1].assert_is_compatible_with(1) channel_multiplier = depthwise_filter.get_shape().with_rank(4)[3] in_channels = input.get_shape().with_rank(4)[3] out_channels = pointwise_filter_shape[3] # If any of channel numbers is unknown, then the comparison below returns # None. See TensorShape.__gt__(). if channel_multiplier * in_channels > out_channels: raise ValueError( "Refusing to perform an overparameterized separable " "convolution: channel_multiplier * in_channels = " "%d * %d = %d > %d = out_channels" % (channel_multiplier, in_channels, channel_multiplier * in_channels, out_channels)) # The layout of the ops in the graph are expected to be as follows: # depthwise_conv2d // Conv2D op corresponding to native deptwise conv. # separable_conv2d // Conv2D op corresponding to the pointwise conv. depthwise = nn_ops.depthwise_conv2d_native( input, depthwise_filter, strides, padding, name="depthwise") return nn_ops.conv2d( depthwise, pointwise_filter, [1, 1, 1, 1], padding="VALID", name=name)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = np.array([f * 1.0 for f in range(1, total_size_1 + 1)], dtype=data_type).reshape(tensor_in_sizes) x2 = np.array([f * 1.0 for f in range(1, total_size_2 + 1)], dtype=data_type).reshape(filter_in_sizes) with self.cached_session() as sess: if data_type == np.float32: tolerance = 1e-4 else: self.assertEqual(data_type, np.float64) tolerance = 1e-8 t1 = array_ops.placeholder(shape=tensor_in_sizes, dtype=data_type) t2 = array_ops.placeholder(shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with self.test_scope(): conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) with ops.device("CPU"): conv_interface = ReferenceDepthwiseConv2D( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native, {t1: x1, t2: x2}) interface_result = sess.run(conv_interface, {t1: x1, t2: x2}) print("data_type:", data_type, "max diff = ", np.amax(np.absolute(native_result - interface_result))) self.assertAllClose(np.ravel(native_result), np.ravel(interface_result), rtol=tolerance)