def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = array_ops.zeros((2, 2, 2, 2)) inputs = array_ops.zeros((2, 2, 2, 2)) outputs = array_ops.zeros((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), 'SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(([grads], ), damping) sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(16, dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseTuple(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = random_ops.random_normal((2, 2, 2, 2)) inputs = random_ops.random_normal((2, 2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), 'SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 block.instantiate_factors(([grads], ), 0.5) # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32), np.arange(2, 4).reshape(2, 1).astype(np.float32)) output = block.multiply_inverse( (array_ops.constant(vector[0]), array_ops.constant(vector[1]))) output = sess.run(output) self.assertAllClose([0.136455, 0.27291], output[0][0]) self.assertAllClose([0.27291, 0.409365], output[1])
def testMultiplyInverseNotTupleWithBias(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = [random_ops.random_normal((2, 2, 2, 2))] inputs = random_ops.random_normal((2, 2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params=params, padding='SAME') block.register_additional_tower(inputs, outputs) self.assertTrue(block._has_bias) grads = outputs**2 block.instantiate_factors(((grads, ), ), 0.5) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = np.arange(1, 19).reshape(9, 2).astype(np.float32) output = block.multiply_inverse(array_ops.constant(vector)) self.assertAllClose([0.136455, 0.27291], sess.run(output)[0])
def register_convolution(self, params, inputs, outputs, padding, strides=None, dilation_rate=None, data_format=None, approx=None, reuse=VARIABLE_SCOPE): """Register a call to tf.nn.convolution(). Args: params: Tensor or 2-tuple of Tensors corresponding to weight and bias of this layer. Weight matrix should have shape [..filter_spatial_size.., in_channels, out_channels]. Bias should have shape [out_channels]. inputs: Tensor of shape [batch_size, ..input_spatial_size.., in_channels]. Inputs to layer. outputs: Tensor of shape [batch_size, ..output_spatial_size.., out_channels]. Output produced by layer. padding: string. see tf.nn.conv2d for valid values. strides: List of ints of length len(..input_spatial_size..). Strides for convolution kernel in spatial dimensions. dilation_rate: List of ints of length len(..input_spatial_size..). Dilations along spatial dimension. data_format: str or None. Format of data. approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. Raises: ValueError: For improper value to 'approx'. KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ # TODO(b/74793309): Have this use _get_block_type like the other # registration functions? assert approx is None or approx == APPROX_KRONECKER_NAME block = self.register_block( params, fb.ConvKFCBasicFB( layer_collection=self, params=params, padding=padding, strides=strides, dilation_rate=dilation_rate, data_format=data_format), reuse=reuse) block.register_additional_minibatch(inputs, outputs) self._add_uses(params, 1)
def register_conv2d(self, params, strides, padding, inputs, outputs, approx=APPROX_KRONECKER_NAME): if approx == APPROX_KRONECKER_NAME: self.register_block(params, fb.ConvKFCBasicFB(self, params, inputs, outputs, strides, padding)) elif approx == APPROX_DIAGONAL_NAME: self.register_block(params, fb.ConvDiagonalFB(self, params, inputs, outputs, strides, padding))
def _testConvKFCBasicFBInitParams(self, params): with ops.Graph().as_default(): random_seed.set_random_seed(200) if isinstance(params, (list, tuple)): params = [array_ops.constant(param) for param in params] else: params = array_ops.constant(params) inputs = random_ops.random_normal((2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, inputs, outputs, [1, 1, 1], 'SAME') self.assertAllEqual(outputs, block.tensors_to_compute_grads())