def testMultiplyInverseNotTupleWithBias(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = [random_ops.random_normal((2, 2, 2, 2))] inputs = random_ops.random_normal((2, 2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), 'SAME') block.register_additional_minibatch(inputs, outputs) self.assertTrue(block._has_bias) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = np.arange(1, 19).reshape(9, 2).astype(np.float32) output = block.multiply_inverse(array_ops.constant(vector)) self.assertAllClose([0.136455, 0.27291], sess.run(output)[0])
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) block.register_additional_minibatch(32) grads = (array_ops.constant([2., 3.]), array_ops.constant(4.)) damping = 0.5 block.instantiate_factors((grads,), damping) block._factor.instantiate_cov_variables() block.register_inverse() block._factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(state_ops.assign(block._factor._cov, _make_psd(3))) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def testRegisterBlocks(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) lc = layer_collection.LayerCollection() lc.register_fully_connected(array_ops.constant(1), array_ops.constant(2), array_ops.constant(3)) lc.register_fully_connected( array_ops.constant(1), array_ops.constant(2), array_ops.constant(3), approx=layer_collection.APPROX_DIAGONAL_NAME) lc.register_conv2d(array_ops.constant(4), [1, 1, 1, 1], 'SAME', array_ops.ones((1, 1, 1, 1)), array_ops.constant(3)) lc.register_conv2d(array_ops.constant(4), [1, 1, 1, 1], 'SAME', array_ops.ones((1, 1, 1, 1)), array_ops.constant(3), approx=layer_collection.APPROX_DIAGONAL_NAME) lc.register_generic(array_ops.constant(5), 16, approx=layer_collection.APPROX_FULL_NAME) lc.register_generic(array_ops.constant(6), 16, approx=layer_collection.APPROX_DIAGONAL_NAME) self.assertEqual(6, len(lc.get_blocks()))
def testShouldRegisterSingleParamRegistered(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1, )) lc = layer_collection.LayerCollection() lc.fisher_blocks = {x: '1'} with self.assertRaises(ValueError): lc.register_block(x, 'foo')
def testDefaultLayerCollection(self): with ops.Graph().as_default(): # Can't get default if there isn't one set. with self.assertRaises(ValueError): layer_collection.get_default_layer_collection() # Can't set default twice. lc = layer_collection.LayerCollection() layer_collection.set_default_layer_collection(lc) with self.assertRaises(ValueError): layer_collection.set_default_layer_collection(lc) # Same as one set. self.assertTrue( lc is layer_collection.get_default_layer_collection()) # Can set to None. layer_collection.set_default_layer_collection(None) with self.assertRaises(ValueError): layer_collection.get_default_layer_collection() # as_default() is the same as setting/clearing. with lc.as_default(): self.assertTrue( lc is layer_collection.get_default_layer_collection()) with self.assertRaises(ValueError): layer_collection.get_default_layer_collection()
def testRegisterCategoricalPredictiveDistributionBatchSize1(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) logits = random_ops.random_normal((1, 2)) lc = layer_collection.LayerCollection() lc.register_categorical_predictive_distribution(logits, seed=200)
def testMultiplyInverseTuple(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) inputs = array_ops.constant([[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]) outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_minibatch(inputs, outputs) grads = outputs**2 block.instantiate_factors(([grads], ), 0.5) # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = ( np.arange(2, 6).reshape(2, 2).astype(np.float32), # np.arange(1, 3).reshape(2, 1).astype(np.float32)) output = block.multiply_inverse( (array_ops.constant(vector[0]), array_ops.constant(vector[1]))) output = sess.run(output) self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]], output[0]) self.assertAllClose([0.343146, 0.686291], output[1])
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) block.register_additional_minibatch(32) grads = (params[0]**2, math_ops.sqrt(params[1])) damping = 0.5 block.instantiate_factors((grads, ), damping) cov = array_ops.reshape(array_ops.constant([2., 3., 4.]), [-1, 1]) sess.run(state_ops.assign(block._factor._cov, cov)) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseNotTuple(self): with ops.Graph().as_default(), self.cached_session() as sess: random_seed.set_random_seed(200) inputs = array_ops.constant([[1., 2.], [3., 4.]]) outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads, ), ), 0.5) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = np.arange(2, 6).reshape(2, 2).astype(np.float32) output = block.multiply_inverse(array_ops.constant(vector)) self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]], sess.run(output))
def testFullFBInitTensorTuple(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params, 32) self.assertAllEqual(params, block.tensors_to_compute_grads())
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = array_ops.zeros((2, 2, 2, 2)) inputs = array_ops.zeros((2, 2, 2, 2)) outputs = array_ops.zeros((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, inputs, outputs, (1, 1, 1, 1), 'SAME') grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors((grads, ), damping) sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(16, dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat) self.assertAllClose(output_flat, explicit)
def setUp(self): self._graph = ops.Graph() with self._graph.as_default(): self.layer_collection = lc.LayerCollection() self.inputs = random_ops.random_normal((2, 2), dtype=dtypes.float32) self.weights = variable_scope.get_variable("w", shape=(2, 2), dtype=dtypes.float32) self.bias = variable_scope.get_variable( "b", initializer=init_ops.zeros_initializer(), shape=(2, 1)) self.output = math_ops.matmul(self.inputs, self.weights) + self.bias # Only register the weights. self.layer_collection.register_fully_connected( params=(self.weights, ), inputs=self.inputs, outputs=self.output) self.outputs = math_ops.tanh(self.output) self.targets = array_ops.zeros_like(self.outputs) self.layer_collection.register_categorical_predictive_distribution( logits=self.outputs, targets=self.targets)
def testMultiplyInverse(self): with ops.Graph().as_default(), self.cached_session() as sess: random_seed.set_random_seed(200) params = random_ops.random_normal((3, 3, 8, 2)) inputs = random_ops.random_normal((32, 5, 5, 8)) outputs = random_ops.random_normal((32, 5, 5, 16)) layer_collection = lc.LayerCollection() block = fb.DepthwiseConvKFCBasicFB(layer_collection, params=params, strides=[1, 1, 1, 1], padding='SAME') block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(([grads], ), 0.5) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() # Ensure inverse update op doesn't crash. sess.run(tf_variables.global_variables_initializer()) sess.run([ factor.make_inverse_update_ops() for factor in layer_collection.get_factors() ]) # Ensure inverse-vector multiply doesn't crash. output = block.multiply_inverse(params) sess.run(output) # Ensure same shape. self.assertAllEqual(output.shape, params.shape)
def testOptimizerInitInvalidMomentumRegistration(self): with self.assertRaises(ValueError): optimizer.KfacOptimizer(0.1, 0.2, 0.3, lc.LayerCollection(), momentum_type='foo')
def testOptimizerInit(self): with ops.Graph().as_default(): layer_collection = lc.LayerCollection() inputs = array_ops.ones((2, 1)) * 2 weights_val = np.ones((1, 1), dtype=np.float32) * 3. weights = variable_scope.get_variable( 'w', initializer=array_ops.constant(weights_val)) bias = variable_scope.get_variable( 'b', initializer=init_ops.zeros_initializer(), shape=(1, 1)) output = math_ops.matmul(inputs, weights) + bias layer_collection.register_fully_connected((weights, bias), inputs, output) logits = math_ops.tanh(output) targets = array_ops.constant([[0.], [1.]]) output = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets)) layer_collection.register_categorical_predictive_distribution( logits) optimizer.KfacOptimizer(0.1, 0.2, 0.3, layer_collection, momentum=0.5, momentum_type='regular')
def testRegisterNormalPredictiveDistribution(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) predictions = array_ops.constant( [[1., 2.], [3., 4]], dtype=dtypes.float32) lc = layer_collection.LayerCollection() lc.register_normal_predictive_distribution(predictions, 1., seed=200) single_loss = sess.run(lc.total_sampled_loss()) lc2 = layer_collection.LayerCollection() lc2.register_normal_predictive_distribution(predictions, 1., seed=200) lc2.register_normal_predictive_distribution(predictions, 1., seed=200) double_loss = sess.run(lc2.total_sampled_loss()) self.assertAlmostEqual(2 * single_loss, double_loss)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) input_dim, output_dim = 3, 2 inputs = array_ops.zeros([32, input_dim]) outputs = array_ops.zeros([32, output_dim]) params = array_ops.zeros([input_dim, output_dim]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_minibatch(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(([grads], ), damping) sess.run(state_ops.assign(block._input_factor._cov, _make_psd(3))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(6, dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseTuple(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = random_ops.random_normal((2, 2, 2, 2)) inputs = random_ops.random_normal((2, 2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), 'SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 block.instantiate_factors(([grads], ), 0.5) # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32), np.arange(2, 4).reshape(2, 1).astype(np.float32)) output = block.multiply_inverse( (array_ops.constant(vector[0]), array_ops.constant(vector[1]))) output = sess.run(output) self.assertAllClose([0.136455, 0.27291], output[0][0]) self.assertAllClose([0.27291, 0.409365], output[1])
def testRegisterSingleParamRegisteredInTuple(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) lc = layer_collection.LayerCollection() lc.fisher_blocks = {(x, y): '1'} lc.register_block(x, 'foo') self.assertEqual(set(['1']), set(lc.get_blocks()))
def testRegisterSingleParamNotRegistered(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) lc = layer_collection.LayerCollection() lc.fisher_blocks = { variable_scope.get_variable('y', initializer=array_ops.constant(1,)): '1' } lc.register_block(x, 'foo')
def testIdentifySubsetPreviouslyRegisteredTensor(self): x = variable_scope.get_variable('x', shape=()) y = variable_scope.get_variable('y', shape=()) lc = layer_collection.LayerCollection() lc.define_linked_parameters((x, y)) with self.assertRaises(ValueError): lc.define_linked_parameters(x)
def testFullFBInitSingleTensor(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) block.register_additional_minibatch(32) self.assertAllEqual(params, block.tensors_to_compute_grads())
def testFullyConnectedSeriesFBInit(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) inputs = array_ops.constant([1., 2.]) outputs = array_ops.constant([3., 4.]) block = fb.FullyConnectedSeriesFB(lc.LayerCollection()) block.register_additional_tower([inputs], [outputs]) self.assertAllEqual([[outputs]], block.tensors_to_compute_grads())
def testInstantiateFactors(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params, 32) grads = (params[0]**2, math_ops.sqrt(params[1])) block.instantiate_factors(grads, 0.5)
def testFullyConnectedKFACBasicFBInit(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) inputs = array_ops.constant([1., 2.]) outputs = array_ops.constant([3., 4.]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), inputs, outputs) self.assertAllEqual(outputs, block.tensors_to_compute_grads())
def testIdentifyLinkedParametersSomeRegisteredInOtherTuples(self): x = variable_scope.get_variable('x', shape=()) y = variable_scope.get_variable('y', shape=()) z = variable_scope.get_variable('z', shape=()) lc = layer_collection.LayerCollection() lc.define_linked_parameters((x, y)) with self.assertRaises(ValueError): lc.define_linked_parameters((x, z))
def testInstantiateFactors(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) block.register_additional_minibatch(32) grads = (params[0]**2, math_ops.sqrt(params[1])) block.instantiate_factors(grads, 0.5)
def testRegisterBlocksMultipleRegistrations(self): with ops.Graph().as_default(): random_seed.set_random_seed(200) lc = layer_collection.LayerCollection() key = array_ops.constant(1) lc.register_fully_connected(key, array_ops.constant(2), array_ops.constant(3)) with self.assertRaises(ValueError): lc.register_generic(key, 16)
def testRegisterTupleParamSomeRegistered(self): x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) z = variable_scope.get_variable('z', initializer=array_ops.constant(1,)) lc = layer_collection.LayerCollection() lc.fisher_blocks = {x: '1', z: '2'} lc.register_block((x, y), 'foo') self.assertEqual(set(['2', 'foo']), set(lc.get_blocks()))
def testLossFunctionWithoutName(self): """Ensure loss functions get unique names if 'name' not specified.""" with ops.Graph().as_default(): logits = linalg_ops.eye(2) lc = layer_collection.LayerCollection() # Create a new loss function with default names. lc.register_categorical_predictive_distribution(logits) lc.register_categorical_predictive_distribution(logits) self.assertEqual(2, len(lc.losses))