def testMultiplyInverseAgainstExplicit(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) params = tf.zeros((2, 2, 2, 2)) inputs = tf.zeros((2, 2, 2, 2)) outputs = tf.zeros((2, 2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') block.register_additional_tower(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(((grads,),), damping) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() sess.run(tf.assign(block._input_factor._cov, _make_psd(8))) sess.run(tf.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(16, dtype=np.float32) vector = utils.column_to_tensors(params, tf.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) params = (tf.constant([1., 2.]), tf.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) block.register_additional_tower(32) grads = (tf.constant([2., 3.]), tf.constant(4.)) damping = 0.5 block.instantiate_factors((grads,), damping) block._factor.instantiate_cov_variables() block.register_inverse() block._factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf.assign(block._factor._cov, _make_psd(3))) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, tf.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) input_dim, output_dim = 3, 2 inputs = tf.zeros([32, input_dim]) outputs = tf.zeros([32, output_dim]) params = tf.zeros([input_dim, output_dim]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_tower(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(((grads,),), damping) block._input_factor.instantiate_cov_variables() block._output_factor.instantiate_cov_variables() sess.run(tf.global_variables_initializer()) sess.run(block._input_factor._cov.add_to_average(_make_psd(3))) sess.run(block._output_factor._cov.add_to_average(_make_psd(2))) block.register_inverse() block._input_factor.instantiate_inv_variables() block._output_factor.instantiate_inv_variables() sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(6, dtype=np.float32) vector = utils.column_to_tensors(params, tf.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) params = (tf.constant([1., 2.]), tf.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) block.register_additional_tower(32) grads = (params[0]**2, tf.sqrt(params[1])) damping = 0.5 block.instantiate_factors((grads,), damping) block._factor.instantiate_cov_variables() cov = tf.reshape(tf.constant([2., 3., 4.]), [-1, 1]) sess.run(tf.global_variables_initializer()) sess.run(block._factor._cov.add_to_average(cov)) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, tf.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def testTensorsToColumn(self): with tf.Graph().as_default(), self.test_session() as sess: tf.set_random_seed(200) vector = tf.constant(np.array([[0., 1.], [2., 3.]])) output = utils.tensors_to_column(vector) self.assertListEqual([4, 1], output.get_shape().as_list()) self.assertAllClose(sess.run(output), np.array([0., 1., 2., 3.])[:, None]) vector = self._fully_connected_layer_params() output = utils.tensors_to_column(vector) self.assertListEqual([6, 1], output.get_shape().as_list()) self.assertAllClose( sess.run(output), np.array([1., 2., 4., 3., 1., 2.])[:, None]) vector = list(vector) vector.append(tf.constant([[6.], [7.], [8.], [9.]])) output = utils.tensors_to_column(vector) self.assertListEqual([10, 1], output.get_shape().as_list()) self.assertAllClose( sess.run(output), np.array([1., 2., 4., 3., 1., 2., 6., 7., 8., 9.])[:, None])