def testGradient(self): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float32) with self.test_session(): t = tf.convert_to_tensor(x) su = tf.reduce_mean(t, [1, 2]) jacob_t, jacob_n = gradient_checker.ComputeGradient(t, s, su, [2, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = tf.reduce_mean(t, [0, 1, 2, 3]) jacob_t, jacob_n = gradient_checker.ComputeGradient(t, s, su, [1], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = tf.reduce_mean(t, []) jacob_t, jacob_n = gradient_checker.ComputeGradient(t, s, su, [2, 3, 4, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
def testGradientMatchesSegmentSum(self): # Strategy: compute the gradient for UnsortedSegmentSum and SegmentSum # and compare the outputs, which should be identical. # NB: for this test to work, indices must be valid for SegmentSum, namely # it must be sorted, the indices must be contiguous, and num_segments # must be max(indices) + 1. indices = [0, 0, 1, 1, 1, 2, 3, 4, 5] n = len(indices) num_cols = 2 shape = [n, num_cols] num_segments = max(indices) + 1 with self.test_session(): tf_x, np_x = self._input(shape, dtype=tf.float64) # Results from UnsortedSegmentSum unsorted_s = tf.unsorted_segment_sum(data=tf_x, segment_ids=indices, num_segments=num_segments) unsorted_jacob_t, unsorted_jacob_n = gradient_checker.ComputeGradient( tf_x, shape, unsorted_s, [num_segments, num_cols], x_init_value=np_x.astype(np.double), delta=1) # Results from SegmentSum sorted_s = tf.segment_sum(data=tf_x, segment_ids=indices) sorted_jacob_t, sorted_jacob_n = gradient_checker.ComputeGradient( tf_x, shape, sorted_s, [num_segments, num_cols], x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(unsorted_jacob_t, sorted_jacob_t, rtol=1e-3, atol=1e-3) self.assertAllClose(unsorted_jacob_n, sorted_jacob_n, rtol=1e-3, atol=1e-3)
def testGradient(self): s = [2, 3, 4, 2] # NOTE(kearnes): divide by 20 so product is a reasonable size x = np.arange(1.0, 49.0).reshape(s).astype(np.float32) / 20. with self.test_session(): t = tf.convert_to_tensor(x) su = tf.reduce_prod(t, []) jacob_t, jacob_n = gradient_checker.ComputeGradient( t, s, su, [2, 3, 4, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = tf.reduce_prod(t, [1, 2]) jacob_t, jacob_n = gradient_checker.ComputeGradient( t, s, su, [2, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = tf.reduce_prod(t, [0, 1, 2, 3]) jacob_t, jacob_n = gradient_checker.ComputeGradient( t, s, su, [1], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) # NOTE(kearnes): the current gradient calculation gives NaNs for 0 inputs x = np.arange(0.0, 48.0).reshape(s).astype(np.float32) / 20. with self.test_session(): t = tf.convert_to_tensor(x) su = tf.reduce_prod(t, []) jacob_t, _ = gradient_checker.ComputeGradient( t, s, su, [2, 3, 4, 2], x_init_value=x, delta=1) with self.assertRaisesOpError("Tensor had NaN values"): tf.check_numerics(jacob_t, message="_ProdGrad NaN test").op.run()
def _compareMulGradient(self, data): # data is a float matrix of shape [n, 4]. data[:, 0], data[:, 1], # data[:, 2], data[:, 3] are real parts of x, imaginary parts of # x, real parts of y and imaginary parts of y. with self.test_session(): inp = tf.convert_to_tensor(data) xr, xi, yr, yi = tf.split(1, 4, inp) def vec(x): # Reshape to a vector return tf.reshape(x, [-1]) xr, xi, yr, yi = vec(xr), vec(xi), vec(yr), vec(yi) def cplx(r, i): # Combine to a complex vector return tf.complex(r, i) x, y = cplx(xr, xi), cplx(yr, yi) # z is x times y in complex plane. z = x * y # Defines the loss function as the sum of all coefficients of z. loss = tf.reduce_sum(tf.real(z) + tf.imag(z)) epsilon = 0.005 jacob_t, jacob_n = gc.ComputeGradient(inp, list(data.shape), loss, [1], x_init_value=data, delta=epsilon) self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
def Test(self): with self.test_session(): np.random.seed(1) m = np.random.uniform( low=1.0, high=100.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) a = tf.constant(m) epsilon = np.finfo(dtype_).eps # Optimal stepsize for central difference is O(epsilon^{1/3}). delta = epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build tol = 1e-3 if len(shape_) == 2: c = tf.matrix_determinant(a) else: c = tf.batch_matrix_determinant(a) out_shape = shape_[:-2] # last two dimensions hold matrices theoretical, numerical = gc.ComputeGradient(a, shape_, c, out_shape, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
def _testGradient(self, x, a): with self.test_session(): inx = tf.convert_to_tensor(x) xs = list(x.shape) ina = tf.convert_to_tensor(a) y = tf.pad(inx, ina) # Expected y's shape to be: ys = list(np.array(x.shape) + np.sum(np.array(a), axis=1)) jacob_t, jacob_n = gc.ComputeGradient(inx, xs, y, ys, x_init_value=x) self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
def testGradient(self): shape = [4, 4] indices = [0, 1, 2, 2] for tf_op in [tf.segment_sum, tf.segment_mean, tf.segment_min, tf.segment_max]: with self.test_session(): tf_x, np_x = self._input(shape, dtype=tf.float64) s = tf_op(data=tf_x, segment_ids=indices) jacob_t, jacob_n = gradient_checker.ComputeGradient( tf_x, shape, s, [3, 4], x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
def _compareGradient(self, shape, sum_shape, reduction_axes): if reduction_axes is not None and np.shape(reduction_axes) == (1, ): # Test scalar reduction_axes argument self._compareGradient(shape, sum_shape, reduction_axes[0]) x = np.arange(1.0, 49.0).reshape(shape).astype(np.float64) with self.test_session(): t = tf.convert_to_tensor(x) su = tf.reduce_sum(t, reduction_axes) jacob_t, jacob_n = gradient_checker.ComputeGradient(t, shape, su, sum_shape, x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
def _compareGradientY(self, func, x, y): with self.test_session(): inx = tf.convert_to_tensor(x) iny = tf.convert_to_tensor(y) out = func(inx, iny) s = list(np.shape(x)) jacob_t, jacob_n = gc.ComputeGradient(iny, s, out, s, x_init_value=y) if x.dtype == np.float32: self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) elif x.dtype == np.float64: self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
def _compareCpu(self, x, np_func, tf_func): np_ans = np_func(x) with self.test_session(use_gpu=False): inx = tf.convert_to_tensor(x) y = tf_func(inx) tf_cpu = y.eval() self.assertShapeEqual(np_ans, y) self.assertAllClose(np_ans, tf_cpu) if x.dtype == np.float32: s = list(np.shape(x)) jacob_t, jacob_n = gc.ComputeGradient(inx, s, y, s, x_init_value=x) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) elif x.dtype == np.float64: s = list(np.shape(x)) jacob_t, jacob_n = gc.ComputeGradient(inx, s, y, s, x_init_value=x) self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
def _compareGradientX(self, x, y, np_func, tf_func): z = np_func(x, y) zs = list(z.shape) with self.test_session(): inx = tf.convert_to_tensor(x) iny = tf.convert_to_tensor(y) out = tf_func(inx, iny) xs = list(x.shape) jacob_t, jacob_n = gc.ComputeGradient(inx, xs, out, zs, x_init_value=x) if x.dtype == np.float32: self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) elif x.dtype == np.float64: self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
def testGradient(self): num_cols = 2 indices_flat = np.array([0, 4, 0, 8, 3, 8, 4, 7, 7, 3]) num_segments = max(indices_flat) + 3 for indices in indices_flat, indices_flat.reshape(5, 2): shape = indices.shape + (num_cols, ) with self.test_session(): tf_x, np_x = self._input(shape, dtype=tf.float64) s = tf.unsorted_segment_sum(data=tf_x, segment_ids=indices, num_segments=num_segments) jacob_t, jacob_n = gradient_checker.ComputeGradient( tf_x, shape, s, [num_segments, num_cols], x_init_value=np_x.astype(np.double), delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
def _checkGrad(self, x, y, adj_x, adj_y): assert 3 == x.ndim assert 3 == y.ndim with self.test_session(): inx = tf.convert_to_tensor(x) iny = tf.convert_to_tensor(y) z = tf.batch_matmul(inx, iny, adj_x, adj_y) loss = tf.reduce_sum(z) epsilon = 1e-2 ((x_jacob_t, x_jacob_n), (y_jacob_t, y_jacob_n)) = gc.ComputeGradient( [inx, iny], [x.shape, y.shape], loss, [1], x_init_value=[x, y], delta=epsilon) tf.logging.info("x_jacob_t = %s", x_jacob_t.reshape(x.shape)) tf.logging.info("x_jacob_n = %s", x_jacob_n.reshape(x.shape)) self.assertAllClose(x_jacob_t, x_jacob_n, rtol=1e-2, atol=epsilon) tf.logging.info("y_jacob_t = %s", y_jacob_t.reshape(y.shape)) tf.logging.info("y_jacob_n = %s", y_jacob_n.reshape(y.shape)) self.assertAllClose(y_jacob_t, y_jacob_n, rtol=1e-2, atol=epsilon)
def _compareGradient(self, x): # x[:, 0] is real, x[:, 1] is imag. We combine real and imag into # complex numbers. Then, we extract real and imag parts and # computes the squared sum. This is obviously the same as sum(real # * real) + sum(imag * imag). We just want to make sure the # gradient function is checked. with self.test_session(): inx = tf.convert_to_tensor(x) real, imag = tf.split(1, 2, inx) real, imag = tf.reshape(real, [-1]), tf.reshape(imag, [-1]) cplx = tf.complex(real, imag) cplx = tf.conj(cplx) loss = tf.reduce_sum(tf.square(tf.real(cplx))) + tf.reduce_sum( tf.square(tf.imag(cplx))) epsilon = 1e-3 jacob_t, jacob_n = gc.ComputeGradient(inx, list(x.shape), loss, [1], x_init_value=x, delta=epsilon) self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
def Test(self): with self.test_session(): np.random.seed(1) m = np.random.uniform( low=1.0, high=100.0, size=np.prod(shape)).reshape(shape).astype(dtype) a = tf.constant(m) epsilon = np.finfo(dtype).eps # Optimal stepsize for central difference is O(epsilon^{1/3}). delta = epsilon**(1.0 / 3.0) tol = 1e-3 if len(shape) == 2: ainv = tf.matrix_inverse(a) else: ainv = tf.batch_matrix_inverse(a) theoretical, numerical = gc.ComputeGradient(a, shape, ainv, shape, delta=delta) self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)