def call(self, inputs, **kwargs): # region Getting parameters for extraction if self.rank == 1: sizes = (1, 1, *self.kernel_size, 1) strides = (1, 1, *self.strides, 1) rates = (1, 1, *self.dilation_rate, 1) else: sizes = (1, *self.kernel_size, 1) strides = (1, *self.strides, 1) rates = (1, *self.dilation_rate, 1) # endregion # region Extraction if self.rank == 1: expanded_inputs = tf.expand_dims(inputs, axis=1) outputs = extract_image_patches(expanded_inputs, sizes=sizes, strides=strides, rates=rates, padding=self.padding) outputs = tf.squeeze(outputs, axis=1) elif self.rank == 2: outputs = extract_image_patches(inputs, sizes=sizes, strides=strides, rates=rates, padding=self.padding) elif self.rank == 3: outputs = extract_volume_patches(inputs, ksizes=sizes, strides=strides, padding=self.padding) else: raise AttributeError("Invalid rank : self.rank is {}.".format(self.rank)) # endregion if len(outputs.shape) != len(inputs.shape): raise ValueError(outputs.shape, inputs.shape, self.name) return outputs
def _VerifyValues(self, image, ksizes, strides, rates, padding, patches): """Tests input-output pairs for the ExtractImagePatches op. Args: image: Input tensor with shape: [batch, in_rows, in_cols, depth]. ksizes: Patch size specified as: [ksize_rows, ksize_cols]. strides: Output strides, specified as [stride_rows, stride_cols]. rates: Atrous rates, specified as [rate_rows, rate_cols]. padding: Padding type. patches: Expected output. """ ksizes = [1] + ksizes + [1] strides = [1] + strides + [1] rates = [1] + rates + [1] with self.test_session(): image_placeholder = array_ops.placeholder(dtypes.float32) with self.test_scope(): out_tensor = array_ops.extract_image_patches( image_placeholder, ksizes=ksizes, strides=strides, rates=rates, padding=padding, name="im2col") feed_dict = {image_placeholder: image} self.assertAllClose(patches, out_tensor.eval(feed_dict=feed_dict))
def _compute_new_cov(self, idx=0): if idx != 0: raise ValueError("ConvInputKroneckerFactor only supports idx = 0") with maybe_colocate_with(self._inputs): filter_height, filter_width, in_channels, _ = self._filter_shape # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], strides=self._strides, rates=[1, 1, 1, 1], padding=self._padding) flatten_size = (filter_height * filter_width * in_channels) # patches_flat below is the matrix [[A_l]] from the KFC paper (tilde # omitted over A for clarity). It has shape M|T| x J|Delta| (eq. 14), # where M = minibatch size, |T| = number of spatial locations, # |Delta| = number of spatial offsets, and J = number of input maps # for convolutional layer l. patches_flat = array_ops.reshape(patches, [-1, flatten_size]) # We append a homogenous coordinate to patches_flat if the layer has # bias parameters. This gives us [[A_l]]_H from the paper. if self._has_bias: patches_flat = append_homog(patches_flat) # We call compute_cov without passing in a normalizer. compute_cov uses # the first dimension of patches_flat i.e. M|T| as the normalizer by # default. Hence we end up computing 1/M|T| * [[A_l]]^T [[A_l]], with # shape J|Delta| x J|Delta|. This is related to hat{Omega}_l from # the paper but has a different scale here for consistency with # ConvOutputKroneckerFactor. # (Tilde omitted over A for clarity.) return compute_cov(patches_flat)
def _compute_new_cov(self, idx=0): if idx != 0: raise ValueError("ConvInputKroneckerFactor only supports idx = 0") # TODO(jamesmartens): factor this patches stuff out into a utility function with _maybe_colocate_with(self._inputs, self._colocate_cov_ops_with_inputs): filter_height, filter_width, in_channels, _ = self._filter_shape # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], strides=self._strides, rates=[1, 1, 1, 1], padding=self._padding) flatten_size = (filter_height * filter_width * in_channels) patches_flat = array_ops.reshape(patches, [-1, flatten_size]) if self._has_bias: patches_flat = _append_homog(patches_flat) return _compute_cov(patches_flat)
def _compute_new_cov(self, idx=0): with _maybe_colocate_with(self._outputs_grads[idx]): if self._patches is None: filter_height, filter_width, _, _ = self._filter_shape # TODO(b/64144716): there is potential here for a big savings in terms # of memory use. patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], strides=self._strides, rates=[1, 1, 1, 1], padding=self._padding) if self._has_bias: patches = _append_homog(patches) self._patches = patches outputs_grad = self._outputs_grads[idx] batch_size = array_ops.shape(self._patches)[0] new_cov = self._convdiag_sum_of_squares(self._patches, outputs_grad) new_cov /= math_ops.cast(batch_size, new_cov.dtype) return new_cov
def _compute_new_cov(self, idx=0): if idx != 0: raise ValueError("ConvInputKroneckerFactor only supports idx = 0") with _maybe_colocate_with(self._inputs): filter_height, filter_width, in_channels, _ = self._filter_shape # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], strides=self._strides, rates=[1, 1, 1, 1], padding=self._padding) flatten_size = (filter_height * filter_width * in_channels) # patches_flat below is the matrix [[A_l]] from the KFC paper (tilde # omitted over A for clarity). It has shape M|T| x J|Delta| (eq. 14), # where M = minibatch size, |T| = number of spatial locations, # |Delta| = number of spatial offsets, and J = number of input maps # for convolutional layer l. patches_flat = array_ops.reshape(patches, [-1, flatten_size]) # We append a homogenous coordinate to patches_flat if the layer has # bias parameters. This gives us [[A_l]]_H from the paper. if self._has_bias: patches_flat = _append_homog(patches_flat) # We call _compute_cov without passing in a normalizer. _compute_cov uses # the first dimension of patches_flat i.e. M|T| as the normalizer by # default. Hence we end up computing 1/M|T| * [[A_l]]^T [[A_l]], with # shape J|Delta| x J|Delta|. This is related to hat{Omega}_l from # the paper but has a different scale here for consistency with # ConvOutputKroneckerFactor. # (Tilde omitted over A for clarity.) return _compute_cov(patches_flat)
def extract(in_val, ksizes=ksizes, strides=strides, rates=rates, padding=padding): return array_ops.extract_image_patches( in_val, ksizes, strides, rates, padding)
def _VariableShapeGradient(self, test_shape_pattern): """Use test_shape_pattern to infer which dimensions are of variable size. """ # Testing shape gradient requires graph mode. with ops.Graph().as_default(): # Set graph seed for determinism. random_seed = 42 random_seed_lib.set_random_seed(random_seed) with self.test_session(): for test_case in self._TEST_CASES: np.random.seed(random_seed) in_shape = test_case['in_shape'] test_shape = [ x if x is None else y for x, y in zip(test_shape_pattern, in_shape) ] in_val = array_ops.placeholder(shape=test_shape, dtype=dtypes.float32) feed_dict = {in_val: np.random.random(in_shape)} for padding in ['VALID', 'SAME']: out_val = array_ops.extract_image_patches( in_val, test_case['ksizes'], test_case['strides'], test_case['rates'], padding) out_val_tmp = out_val.eval(feed_dict=feed_dict) out_shape = out_val_tmp.shape err = gradient_checker.compute_gradient_error( in_val, in_shape, out_val, out_shape) self.assertLess(err, 1e-4)
def _VerifyValues(self, image, ksizes, strides, rates, padding, patches): """Tests input-output pairs for the ExtractImagePatches op. Args: image: Input tensor with shape: [batch, in_rows, in_cols, depth]. ksizes: Patch size specified as: [ksize_rows, ksize_cols]. strides: Output strides, specified as [stride_rows, stride_cols]. rates: Atrous rates, specified as [rate_rows, rate_cols]. padding: Padding type. patches: Expected output. """ ksizes = [1] + ksizes + [1] strides = [1] + strides + [1] rates = [1] + rates + [1] with self.session(): image_placeholder = array_ops.placeholder(dtypes.float32) with self.test_scope(): out_tensor = array_ops.extract_image_patches(image_placeholder, ksizes=ksizes, strides=strides, rates=rates, padding=padding, name="im2col") feed_dict = {image_placeholder: image} self.assertAllClose(patches, out_tensor.eval(feed_dict=feed_dict))
def __init__(self, inputs, outputs_grads, filter_shape, strides, padding, has_bias=False, colocate_cov_ops_with_inputs=False): """Creates a ConvDiagonalFactor object. Args: inputs: Tensor of shape [batch_size, height, width, in_channels]. Input activations to this layer. outputs_grads: Tensor of shape [batch_size, height, width, out_channels]. Per-example gradients to the loss with respect to the layer's output preactivations. filter_shape: Tuple of 4 ints: (kernel_height, kernel_width, in_channels, out_channels). Represents shape of kernel used in this layer. strides: The stride size in this layer (1-D Tensor of length 4). padding: The padding in this layer (1-D of Tensor length 4). has_bias: Python bool. If True, the layer is assumed to have a bias parameter in addition to its filter parameter. colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with their inputs. """ self._filter_shape = filter_shape self._has_bias = has_bias self._outputs_grads = outputs_grads self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs self._orig_tensors_name = scope_string_from_name((inputs, ) + tuple(outputs_grads)) # Note that we precompute the required operations on the inputs since the # inputs don't change with the 'idx' argument to _compute_new_cov. (Only # the target entry of _outputs_grads changes with idx.) with _maybe_colocate_with(inputs, self._colocate_cov_ops_with_inputs): filter_height, filter_width, _, _ = self._filter_shape # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. patches = array_ops.extract_image_patches( inputs, ksizes=[1, filter_height, filter_width, 1], strides=strides, rates=[1, 1, 1, 1], padding=padding) if has_bias: patches = _append_homog(patches) self._patches = patches super(ConvDiagonalFactor, self).__init__()
def __init__(self, inputs, outputs_grads, filter_shape, strides, padding, has_bias=False, colocate_cov_ops_with_inputs=False): """Creates a ConvDiagonalFactor object. Args: inputs: Tensor of shape [batch_size, height, width, in_channels]. Input activations to this layer. outputs_grads: Tensor of shape [batch_size, height, width, out_channels]. Per-example gradients to the loss with respect to the layer's output preactivations. filter_shape: Tuple of 4 ints: (kernel_height, kernel_width, in_channels, out_channels). Represents shape of kernel used in this layer. strides: The stride size in this layer (1-D Tensor of length 4). padding: The padding in this layer (1-D of Tensor length 4). has_bias: Python bool. If True, the layer is assumed to have a bias parameter in addition to its filter parameter. colocate_cov_ops_with_inputs: Whether to colocate cov_update ops with their inputs. """ self._filter_shape = filter_shape self._has_bias = has_bias self._outputs_grads = outputs_grads self._colocate_cov_ops_with_inputs = colocate_cov_ops_with_inputs self._orig_tensors_name = scope_string_from_name( (inputs,) + tuple(outputs_grads)) # Note that we precompute the required operations on the inputs since the # inputs don't change with the 'idx' argument to _compute_new_cov. (Only # the target entry of _outputs_grads changes with idx.) with _maybe_colocate_with(inputs, self._colocate_cov_ops_with_inputs): filter_height, filter_width, _, _ = self._filter_shape # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. patches = array_ops.extract_image_patches( inputs, ksizes=[1, filter_height, filter_width, 1], strides=strides, rates=[1, 1, 1, 1], padding=padding) if has_bias: patches = _append_homog(patches) self._patches = patches super(ConvDiagonalFactor, self).__init__()
def extract_image_patches(image, ksizes, strides, padding, name=None): """Extracts image patches for an N-dimensional convolution. This function is a compatibility wrapper over tf.extract_image_patches(), as ExtractImagePatches isn't yet implemented in XLA. Args: image: Tensor of shape [batch, in_x, in_y, ..., in_channels]. Input images. All dimensions except 'batch' must be defined. ksizes: [filter_x, filter_y, ...]. Spatial shape of filter in each dimension. strides: [stride_x, stride_y, ...]. Spatial stride for filter in each dimension. padding: str. "VALID" or "SAME". name: str or None. name of Op. Returns: result: [batch, out_x, out_y, ..., filter_x, filter_y, ..., in_channels]. Contains image patches to which conv kernel would be applied for each output location. [out_x, out_y, ...] depends on padding. """ if not utils.on_tpu(): return array_ops.extract_image_patches( image, ksizes=([1] + list(ksizes) + [1]), strides=([1] + list(strides) + [1]), rates=[1, 1, 1, 1], padding=padding, name=name) with tf_ops.name_scope(name, "extract_image_patches", [image, ksizes, strides, padding]): batch = image.shape.as_list()[0] in_channels = image.shape.as_list()[-1] # Map each input feature to a location in the output. out_channels = np.prod(ksizes) * in_channels filters = linalg_ops.eye(out_channels), filters = array_ops.reshape(filters, ksizes + [in_channels, out_channels]) result = nn.convolution(image, filters, padding, strides=strides) out_spatial = result.shape.as_list()[1:-1] result = array_ops.reshape(result, [batch or -1] + out_spatial + ksizes + [in_channels]) return result
def extract_pointwise_conv2d_patches(inputs, filter_shape, name=None, data_format=None): """Extract patches for a 1x1 conv2d. Args: inputs: 4-D Tensor of shape [batch_size, height, width, in_channels]. filter_shape: List of 4 ints. Shape of filter to apply with conv2d() name: None or str. Name for Op. data_format: None or str. Format for data. See 'data_format' in tf.nn.conv2d() for details. Returns: Tensor of shape [batch_size, ..spatial_input_shape.., ..spatial_filter_shape.., in_channels] Raises: ValueError: if inputs is not 4-D. ValueError: if filter_shape is not [1, 1, ?, ?] ValueError: if data_format is not channels-last. """ if inputs.shape.ndims != 4: raise ValueError("inputs must have 4 dims.") if len(filter_shape) != 4: raise ValueError("filter_shape must have 4 dims.") if filter_shape[0] != 1 or filter_shape[1] != 1: raise ValueError( "filter_shape must have shape 1 along spatial dimensions.") if not is_data_format_channel_last(data_format): raise ValueError("data_format must be channels last.") with ops.name_scope(name, "extract_pointwise_conv2d_patches", [inputs, filter_shape]): ksizes = [1, 1, 1, 1] # Spatial shape is 1x1. strides = [1, 1, 1, 1] # Operate on all pixels. rates = [1, 1, 1, 1] # Dilation has no meaning with spatial shape = 1. padding = "VALID" # Doesn't matter. result = array_ops.extract_image_patches(inputs, ksizes, strides, rates, padding) batch_size, input_height, input_width, in_channels = inputs.shape.as_list( ) filter_height, filter_width, in_channels, _ = filter_shape return array_ops.reshape(result, [ batch_size, input_height, input_width, filter_height, filter_width, in_channels ])
def testConstructGradientWithLargeImages(self): batch_size = 4 height = 1024 width = 1024 ksize = 5 images = variable_scope.get_variable('inputs', (batch_size, height, width, 1)) patches = array_ops.extract_image_patches(images, ksizes=[1, ksize, ksize, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') # Github issue: #20146 # tf.extract_image_patches() gradient very slow at graph construction time gradients = gradients_impl.gradients(patches, images) # Won't time out. self.assertIsNotNone(gradients)
def extract_pointwise_conv2d_patches(inputs, filter_shape, name=None, data_format=None): """Extract patches for a 1x1 conv2d. Args: inputs: 4-D Tensor of shape [batch_size, height, width, in_channels]. filter_shape: List of 4 ints. Shape of filter to apply with conv2d() name: None or str. Name for Op. data_format: None or str. Format for data. See 'data_format' in tf.nn.conv2d() for details. Returns: Tensor of shape [batch_size, ..spatial_input_shape.., ..spatial_filter_shape.., in_channels] Raises: ValueError: if inputs is not 4-D. ValueError: if filter_shape is not [1, 1, ?, ?] ValueError: if data_format is not channels-last. """ if inputs.shape.ndims != 4: raise ValueError("inputs must have 4 dims.") if len(filter_shape) != 4: raise ValueError("filter_shape must have 4 dims.") if filter_shape[0] != 1 or filter_shape[1] != 1: raise ValueError("filter_shape must have shape 1 along spatial dimensions.") if not is_data_format_channel_last(data_format): raise ValueError("data_format must be channels last.") with ops.name_scope(name, "extract_pointwise_conv2d_patches", [inputs, filter_shape]): ksizes = [1, 1, 1, 1] # Spatial shape is 1x1. strides = [1, 1, 1, 1] # Operate on all pixels. rates = [1, 1, 1, 1] # Dilation has no meaning with spatial shape = 1. padding = "VALID" # Doesn't matter. result = array_ops.extract_image_patches(inputs, ksizes, strides, rates, padding) batch_size, input_height, input_width, in_channels = inputs.shape.as_list() filter_height, filter_width, in_channels, _ = filter_shape return array_ops.reshape(result, [ batch_size, input_height, input_width, filter_height, filter_width, in_channels ])
def _compute_new_cov(self, idx=0): if idx != 0: raise ValueError("ConvInputKroneckerFactor only supports idx = 0") # TODO(jamesmartens): factor this patches stuff out into a utility function filter_height, filter_width, in_channels, _ = self._filter_shape patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], strides=self._strides, rates=[1, 1, 1, 1], padding=self._padding) flatten_size = (filter_height * filter_width * in_channels) patches_flat = array_ops.reshape(patches, [-1, flatten_size]) if self._has_bias: patches_flat = _append_homog(patches_flat) return _compute_cov(patches_flat)
def testGradient(self): # Set graph seed for determinism. random_seed = 42 random_seed_lib.set_random_seed(random_seed) with self.cached_session(): for test_case in self._TEST_CASES: np.random.seed(random_seed) in_shape = test_case['in_shape'] in_val = constant_op.constant(np.random.random(in_shape), dtype=dtypes.float32) for padding in ['VALID', 'SAME']: out_val = array_ops.extract_image_patches( in_val, test_case['ksizes'], test_case['strides'], test_case['rates'], padding) out_shape = out_val.get_shape().as_list() err = gradient_checker.compute_gradient_error( in_val, in_shape, out_val, out_shape) self.assertLess(err, 1e-4)
def _compute_new_cov(self, idx=0): if idx != 0: raise ValueError("ConvInputKroneckerFactor only supports idx = 0") with _maybe_colocate_with(self._inputs, self._colocate_cov_ops_with_inputs): filter_height, filter_width, in_channels, _ = self._filter_shape patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], strides=self._strides, rates=[1, 1, 1, 1], padding=self._padding) flatten_size = (filter_height * filter_width * in_channels) patches_flat = array_ops.reshape(patches, [-1, flatten_size]) if self._has_bias: patches_flat = _append_homog(patches_flat) return _compute_cov(patches_flat)
def avg_pool(value, ksize, strides, padding, quantizer, data_format="NHWC", name=None): """Performs the average pooling on the input (quantized version). Each entry in `output` is the mean of the corresponding size `ksize` window in `value`. Args: value: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type `float32`, `float64`, `qint8`, `quint8`, or `qint32`. ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor. strides: A list of ints that has length >= 4. The stride of the sliding window for each dimension of the input tensor. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See the @{tf.nn.convolution$comment here} quantizer: The quantizer which is applied after every step. data_format: A string. 'NHWC' and 'NCHW' are supported. name: Optional name for the operation. Returns: A `Tensor` with the same type as `value`. The average pooled output tensor. """ kelems = ksize[1] * ksize[2] with ops.name_scope(name, "AvgPool", [value]) as name: value = ops.convert_to_tensor(value, name="input") output = array_ops.extract_image_patches(value, ksize, strides, [1, 1, 1, 1], padding) output = array_ops.reshape(output, [ output.shape.dims[0].value, output.shape.dims[1].value, output.shape.dims[2].value, kelems, value.shape.dims[3].value ]) output = math_ops.reduce_sum(output, axis=3) output = quantizer.quantize(output) output = output / kelems output = quantizer.quantize(output) return output
def testGradient(self): # Set graph seed for determinism. random_seed = 42 random_seed_lib.set_random_seed(random_seed) with self.test_session(): for test_case in self._TEST_CASES: np.random.seed(random_seed) in_shape = test_case['in_shape'] in_val = constant_op.constant( np.random.random(in_shape), dtype=dtypes.float32) for padding in ['VALID', 'SAME']: out_val = array_ops.extract_image_patches(in_val, test_case['ksizes'], test_case['strides'], test_case['rates'], padding) out_shape = out_val.get_shape().as_list() err = gradient_checker.compute_gradient_error(in_val, in_shape, out_val, out_shape) print('extract_image_patches gradient err: %.4e' % err) self.assertLess(err, 1e-4)
def _VerifyValues(self, image, ksizes, strides, rates, padding, patches): """Tests input-output pairs for the ExtractImagePatches op. Args: image: Input tensor with shape: [batch, in_rows, in_cols, depth]. ksizes: Patch size specified as: [ksize_rows, ksize_cols]. strides: Output strides, specified as [stride_rows, stride_cols]. rates: Atrous rates, specified as [rate_rows, rate_cols]. padding: Padding type. patches: Expected output. """ ksizes = [1] + ksizes + [1] strides = [1] + strides + [1] rates = [1] + rates + [1] out_tensor = array_ops.extract_image_patches( constant_op.constant(image), ksizes=ksizes, strides=strides, rates=rates, padding=padding, name="im2col") self.assertAllClose(patches, self.evaluate(out_tensor))
def make_covariance_update_op(self, ema_decay): with maybe_colocate_with(self._inputs): filter_height, filter_width, _, _ = self._filter_shape # TODO(b/64144716): there is potential here for a big savings in terms # of memory use. patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], strides=self._strides, rates=[1, 1, 1, 1], padding=self._padding) if self._has_bias: patches = append_homog(patches) self._patches = patches op = super(ConvDiagonalFactor, self).make_covariance_update_op(ema_decay) self._patches = None return op
def testConstructGradientWithLargeImages(self, use_tape): with test_util.AbstractGradientTape(use_tape=use_tape) as tape: batch_size = 4 # Prevent OOM by setting reasonably large image size (b/171808681). height = 512 width = 512 ksize = 5 shape = (batch_size, height, width, 1) images = variables.Variable( np.random.uniform(size=np.prod(shape)).reshape(shape), name='inputs') tape.watch(images) patches = array_ops.extract_image_patches( images, ksizes=[1, ksize, ksize, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') # Github issue: #20146 # tf.image.extract_image_patches() gradient very slow at graph # construction time. gradients = tape.gradient(patches, images) # Won't time out. self.assertIsNotNone(gradients)