def testColumnToTensors(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) vector_template = array_ops.constant(np.array([[0., 1.], [2., 3.]])) colvec = array_ops.constant(np.arange(4.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertAllClose(output, np.array([[0., 1.], [2., 3.]])) vector_template = self._fully_connected_layer_params() colvec = array_ops.constant(np.arange(6.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertIsInstance(output, tuple) self.assertEqual(len(output), 2) a, b = output self.assertAllClose(a, np.array([[0., 1.], [2., 3.]])) self.assertAllClose(b, np.array([4., 5.])) vector_template = list(vector_template) vector_template.append(array_ops.constant([[6.], [7.], [8.], [9.]])) colvec = array_ops.constant(np.arange(10.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertIsInstance(output, tuple) self.assertEqual(len(output), 3) a, b, c = output self.assertAllClose(a, np.array([[0., 1.], [2., 3.]])) self.assertAllClose(b, np.array([4., 5.])) self.assertAllClose(c, np.array([[6.], [7.], [8.], [9.]]))
def testColumnToTensors(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) vector_template = array_ops.constant(np.array([[0., 1.], [2., 3.]])) colvec = array_ops.constant(np.arange(4.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertAllClose(output, np.array([[0., 1.], [2., 3.]])) vector_template = self._fully_connected_layer_params() colvec = array_ops.constant(np.arange(6.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertIsInstance(output, tuple) self.assertEqual(len(output), 2) a, b = output self.assertAllClose(a, np.array([[0., 1.], [2., 3.]])) self.assertAllClose(b, np.array([4., 5.])) vector_template = list(vector_template) vector_template.append(array_ops.constant([[6.], [7.], [8.], [9.]])) colvec = array_ops.constant(np.arange(10.)[:, None]) output = sess.run(utils.column_to_tensors(vector_template, colvec)) self.assertIsInstance(output, tuple) self.assertEqual(len(output), 3) a, b, c = output self.assertAllClose(a, np.array([[0., 1.], [2., 3.]])) self.assertAllClose(b, np.array([4., 5.])) self.assertAllClose(c, np.array([[6.], [7.], [8.], [9.]]))
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) block.register_additional_minibatch(32) grads = (array_ops.constant([2., 3.]), array_ops.constant(4.)) damping = 0.5 block.instantiate_factors((grads,), damping) block._factor.instantiate_cov_variables() block.register_inverse() block._factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(state_ops.assign(block._factor._cov, _make_psd(3))) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) input_dim, output_dim = 3, 2 inputs = array_ops.zeros([32, input_dim]) outputs = array_ops.zeros([32, output_dim]) params = array_ops.zeros([input_dim, output_dim]) block = fb.FullyConnectedKFACBasicFB( lc.LayerCollection(), inputs, outputs, has_bias=False) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors((grads,), damping) sess.run(state_ops.assign(block._input_factor._cov, _make_psd(3))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(6, dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = array_ops.zeros((2, 2, 2, 2)) inputs = array_ops.zeros((2, 2, 2, 2)) outputs = array_ops.zeros((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), 'SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(([grads], ), damping) sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(16, dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat) self.assertAllClose(output_flat, explicit)
def multiply_inverse(self, vector): vector_flat = utils.tensors_to_column(vector) print("vector_flat: %s" % vector_flat) out_flat = self._factor.left_multiply_inverse( vector_flat, self._damping) print("out_flat: %s" % out_flat) return utils.column_to_tensors(vector, out_flat)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) block.register_additional_tower(32) grads = (array_ops.constant([2., 3.]), array_ops.constant(4.)) damping = 0.5 block.instantiate_factors((grads, ), damping) block._factor.instantiate_cov_variables() block.register_inverse() block._factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(state_ops.assign(block._factor._cov, _make_psd(3))) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) block.register_additional_minibatch(32) grads = (params[0]**2, math_ops.sqrt(params[1])) damping = 0.5 block.instantiate_factors((grads, ), damping) cov = array_ops.reshape(array_ops.constant([2., 3., 4.]), [-1, 1]) sess.run(state_ops.assign(block._factor._cov, cov)) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) input_dim, output_dim = 3, 2 inputs = array_ops.zeros([32, input_dim]) outputs = array_ops.zeros([32, output_dim]) params = array_ops.zeros([input_dim, output_dim]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), inputs, outputs, has_bias=False) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors((grads, ), damping) sess.run(state_ops.assign(block._input_factor._cov, _make_psd(3))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(6, dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = array_ops.zeros((2, 2, 2, 2)) inputs = array_ops.zeros((2, 2, 2, 2)) outputs = array_ops.zeros((2, 2, 2, 2)) block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), 'SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(([grads],), damping) sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) v_flat = np.arange(16, dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat) self.assertAllClose(output_flat, explicit)
def testMultiplyInverseAgainstExplicit(self): with ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params, 32) grads = (params[0]**2, math_ops.sqrt(params[1])) damping = 0.5 block.instantiate_factors((grads,), damping) cov = array_ops.reshape(array_ops.constant([2., 3., 4.]), [-1, 1]) sess.run(state_ops.assign(block._factor._cov, cov)) sess.run(block._factor.make_inverse_update_ops()) v_flat = np.array([4., 5., 6.], dtype=np.float32) vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) output = block.multiply_inverse(vector) output_flat = sess.run(utils.tensors_to_column(output)).ravel() full = sess.run(block.full_fisher_block()) explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) self.assertAllClose(output_flat, explicit)
def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) out_flat = (math_ops.matmul(self._factor.get_cov(), vector_flat) + self._damping * vector_flat) return utils.column_to_tensors(vector, out_flat)
def multiply_matpower(self, vector, exp): vector_flat = utils.tensors_to_column(vector) out_flat = self._factor.left_multiply_matpower( vector_flat, exp, self._damping_func) return utils.column_to_tensors(vector, out_flat)
def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) out_flat = vector_flat * (self._factor.get_cov() + self._damping) return utils.column_to_tensors(vector, out_flat)
def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) out_flat = (math_ops.matmul(self._factor.get_cov(), vector_flat) + self._damping * vector_flat) return utils.column_to_tensors(vector, out_flat)
def multiply_inverse(self, vector): inverse = self._factor.get_inverse(self._damping) out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector)) return utils.column_to_tensors(vector, out_flat)
def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) out_flat = vector_flat * (self._factor.get_cov() + self._damping) return utils.column_to_tensors(vector, out_flat)
def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) out_flat = self._factor.left_multiply( vector_flat, self._damping) return utils.column_to_tensors(vector, out_flat)
def multiply_inverse(self, vector): inverse = self._factor.get_inverse(self._damping) out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector)) return utils.column_to_tensors(vector, out_flat)
def multiply_matpower(self, vector, exp): vector_flat = utils.tensors_to_column(vector) out_flat = self._factor.left_multiply_matpower( vector_flat, exp, self._damping_func) return utils.column_to_tensors(vector, out_flat)