def ConstructAndTestGradient(self, batch, input_rows, input_cols, filter_rows, filter_cols, in_depth, out_depth, stride, padding, test_input, use_gpu): input_shape = [batch, input_rows, input_cols, in_depth] filter_shape = [filter_rows, filter_cols, in_depth, out_depth] # TODO(yangke): re-factor the computation of output shape. if padding == "VALID": output_rows = (input_rows - filter_rows + stride) // stride output_cols = (input_cols - filter_cols + stride) // stride else: output_rows = (input_rows + stride - 1) // stride output_cols = (input_cols + stride - 1) // stride output_shape = [batch, output_rows, output_cols, out_depth] input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] with self.test_session(use_gpu=use_gpu): # Conv2DGrad functions are not compiled for double due to # a problem in the way Eigen's Conv2DGrad works for double. # So we disable the DOUBLE path. We should re-enable this # when double support returns for CPU and/or GPU. # data_type = tf.float64 # tolerance = 1e-8 data_type = tf.float32 tolerance = 0.002 input_tensor = tf.constant(input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = tf.constant(filter_data, shape=filter_shape, dtype=data_type, name="filter") conv = tf.nn.conv2d(input_tensor, filter_tensor, [1, stride, stride, 1], padding, name="conv") self.assertEqual(output_shape, conv.get_shape()) if test_input: err = gc.ComputeGradientError(input_tensor, input_shape, conv, output_shape) else: err = gc.ComputeGradientError(filter_tensor, filter_shape, conv, output_shape) print("conv_2d gradient error = ", err) self.assertLess(err, tolerance)
def _testBatchNormGradient(self, param_index, tag, scale_after_normalization, err_tolerance=1e-11): x_shape = [3, 5, 4, 5] param_shape = [5] np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) m_val = np.random.random_sample(param_shape).astype(np.float64) v_val = np.random.random_sample(param_shape).astype(np.float64) beta_val = np.random.random_sample(param_shape).astype(np.float64) gamma_val = np.random.random_sample(param_shape).astype(np.float64) with self.test_session(): x = constant_op.constant(x_val, name="x") m = constant_op.constant(m_val, name="m") v = constant_op.constant(v_val, name="v") beta = constant_op.constant(beta_val, name="beta") gamma = constant_op.constant(gamma_val, name="gamma") epsilon = 0.001 # If scale_after_normalization is False, backprop for gamma # will be 0. gamma is unchanged. output = nn.batch_norm_with_global_normalization( x, m, v, beta, gamma, epsilon, scale_after_normalization) all_params = [x, m, v, beta, gamma] all_shapes = [ x_shape, param_shape, param_shape, param_shape, param_shape ] err = gc.ComputeGradientError(all_params[param_index], all_shapes[param_index], output, x_shape) print( "Batch normalization %s gradient %s scale err = " % (tag, "with" if scale_after_normalization else "without"), err) self.assertLess(err, err_tolerance)
def test_grad(self): with self.test_session(): shape = (5, ) x = tf.constant([5, 4, 3, 2, 1], dtype=tf.float32) y = gen_zero_out_op_2.zero_out(x) err = gradient_checker.ComputeGradientError(x, shape, y, shape) self.assertLess(err, 1e-4)
def testGradientsEmbeddingLookupSparse(self): vocab_size = 12 batch_size = 4 param_shape = [2, 3] sp_ids, sp_weights, _, _, _ = (self._RandomIdsAndWeights( batch_size, vocab_size)) for num_shards, combiner, dtype, ignore_weights in itertools.product( [1, 3], ["sum", "mean"], [tf.float32, tf.float64], [True, False]): with self.test_session(): x, params, _ = _EmbeddingParams(num_shards, vocab_size, shape=param_shape, dtype=dtype) y = tf.nn.embedding_lookup_sparse( x, sp_ids, None if ignore_weights else sp_weights, combiner=combiner) x_name = [_PName(i) for i in range(num_shards)] x_init_value = [params[x_n + ":0"] for x_n in x_name] x_shape = [i.shape for i in x_init_value] y_shape = [batch_size] + list( params[_PName(0) + ":0"].shape[1:]) err = gc.ComputeGradientError(x, x_shape, y, y_shape, x_init_value=x_init_value) self.assertLess(err, 1e-5 if dtype == tf.float64 else 2e-3)
def _testGradients(self, tr_a, tr_b, sp_a, sp_b, name): with self.test_session(): a = tf.constant(RandMatrix(3, 2, tr_a), dtype=tf.float32) b = tf.constant(RandMatrix(2, 4, tr_b), dtype=tf.float32) m = tf.matmul(a, b, name=name, transpose_a=tr_a, transpose_b=tr_b, a_is_sparse=sp_a, b_is_sparse=sp_b) err = (gc.ComputeGradientError(a, [2, 3] if tr_a else [3, 2], m, [3, 4]) + gc.ComputeGradientError(b, [4, 2] if tr_b else [2, 4], m, [3, 4])) print("sparse_matmul gradient err = ", err) self.assertLess(err, 1e-3)
def testGradientStridedReductionGC(self): with self.test_session(): inp = np.random.rand(4, 2).astype("f") a = tf.constant([float(x) for x in inp.flatten()], shape=[4, 2], dtype=tf.float32) tiled = tf.tile(a, [1, 2]) err = gc.ComputeGradientError(a, [4, 2], tiled, [4, 4]) self.assertLess(err, 1e-3)
def testExpandDimsGradient(self): with self.test_session(): inp = tf.constant(np.random.rand(4, 2).astype("f"), dtype=tf.float32) squeezed = tf.expand_dims(inp, 1) err = gc.ComputeGradientError(inp, [4, 2], squeezed, [4, 1, 2]) self.assertLess(err, 1e-3)
def testGradient(self): sizes = [4, 2] with self.test_session(): logits, targets, _ = self._Inputs(sizes=sizes) loss = nn.sigmoid_cross_entropy_with_logits(logits, targets) err = gc.ComputeGradientError(logits, sizes, loss, sizes) print("logistic loss gradient err = ", err) self.assertLess(err, 1e-7)
def testSqueezeGradientWithSqueezeDims(self): with self.test_session(): inp = np.random.rand(4, 2).astype("f") a = tf.reshape(inp, [4, 1, 2, 1]) squeezed = tf.squeeze(a, [1]) err = gc.ComputeGradientError(a, [4, 1, 2, 1], squeezed, [4, 2, 1]) self.assertLess(err, 1e-3)
def testGradient(self): x_shape = [5, 10] x_np = np.random.randn(*x_shape).astype(np.float64) with self.test_session(): x_tf = constant_op.constant(x_np) y_tf = nn.softmax(x_tf) err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape) eps = 1e-8 self.assertLess(err, eps)
def testGradientBias(self): with self.test_session(): t = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], dtype=tf.float64) b = tf.constant([1.3, 2.4], dtype=tf.float64) bo = tf.nn.bias_add(t, b) err = gradient_checker.ComputeGradientError(b, [2], bo, [3, 2]) print "bias add bias gradient err = ", err self.assertLess(err, 1e-10)
def _ConstructAndTestGradient(self, pool_func, input_sizes, output_sizes, window_rows, window_cols, row_stride, col_stride, padding, use_gpu, x_init_value=None): """Verifies the gradients of the avg pooling function. Args: pool_func: Function to be called, co.MaxPool, co.AvgPool, or the Lua version. input_sizes: Input tensor dimensions. output_sizes: Output tensor dimensions. window_rows: kernel size in row dim window_cols: kernel size in col dim row_stride: Row Stride. col_stride: Col Stride. padding: Padding type. use_gpu: whether we are running on GPU x_init_value: Values to be passed to the gradient checker. """ total_size = 1 for s in input_sizes: total_size *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x = [f * 1.0 for f in range(1, total_size + 1)] with self.test_session(use_gpu=use_gpu): input_tensor = tf.constant(x, shape=input_sizes, name="input") if pool_func == tf.nn.avg_pool: func_name = "avg_pool" err_margin = 1e-4 else: if x_init_value is None: x_init_value = np.asfarray( np.arange(1, total_size + 1), dtype=np.float32).reshape(input_sizes) func_name = "max_pool" err_margin = 1e-3 t = pool_func(input_tensor, ksize=[1, window_rows, window_rows, 1], strides=[1, row_stride, col_stride, 1], padding=padding, name=func_name) err = gc.ComputeGradientError(input_tensor, input_sizes, t, output_sizes, x_init_value=x_init_value, delta=1e-2) print("%s gradient error = " % func_name, err) self.assertLess(err, err_margin)
def testGradientTensor4D(self): with self.test_session(): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float32) t = tf.constant(x, shape=s, dtype=tf.float32) b = tf.constant([1.3, 2.4], dtype=tf.float32) bo = tf.nn.bias_add(t, b) err = gradient_checker.ComputeGradientError(t, s, bo, s, x_init_value=x) print "bias add tensor gradient err = ", err self.assertLess(err, 1e-3)
def testGradientInput1(self): with self.test_session(use_gpu=False): x = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], dtype=tf.float64, name="x") y = tf.constant([1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7], shape=[2, 4], dtype=tf.float64, name="y") m = tf.matmul(x, y, name="matmul") err = gc.ComputeGradientError(y, [2, 4], m, [3, 4]) print("matmul input1 gradient err = ", err) self.assertLess(err, 1e-10)
def testGradients(self): t = [tf.float32, tf.float64] for src_t in t: for dst_t in t: with self.test_session(): x = tf.constant(1.0, src_t) z = tf.identity(x) y = tf.cast(z, dst_t) err = gc.ComputeGradientError(x, [1], y, [1]) self.assertLess(err, 1e-3)
def testFloatReshapeGradThreeDimensions(self): x = np.arange(1., 25.).reshape([1, 24]).astype(np.float32) s = list(np.shape(x)) with self.test_session(): input_tensor = tf.constant(x, shape=[2, 3, 4]) reshape_out = tf.reshape(input_tensor, [1, 8, 3]) err = gc.ComputeGradientError(input_tensor, s, reshape_out, s, x_init_value=x) print("Reshape gradient error = " % err) self.assertLess(err, 1e-3)
def testGradients(self): np.random.seed(7) for use_gpu in False, True: for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) shapes = [shape[1:]] * shape[0] with self.test_session(use_gpu=use_gpu): xs = map(tf.constant, data) c = tf.pack(xs) err = gradient_checker.ComputeGradientError(xs, shapes, c, shape) self.assertLess(err, 1e-6)
def testGradient(self): x_shape = [20, 7, 3] np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) with self.test_session(): x = constant_op.constant(x_val, name="x") output = nn.l2_loss(x) err = gc.ComputeGradientError(x, x_shape, output, [1]) print("L2Loss gradient err = %g " % err) err_tolerance = 1e-11 self.assertLess(err, err_tolerance)
def testL2NormalizeGradient(self): x_shape = [20, 7, 3] np.random.seed(1) x_np = np.random.random_sample(x_shape).astype(np.float64) for dim in range(len(x_shape)): with self.test_session(): x_tf = constant_op.constant(x_np, name="x") y_tf = nn.l2_normalize(x_tf, dim) err = gc.ComputeGradientError(x_tf, x_shape, y_tf, x_shape) print("L2Normalize gradient err = %g " % err) self.assertLess(err, 1e-4)
def testGradient(self): with self.test_session(): x = tf.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], name="x") y = tf.nn.softplus(x, name="softplus") x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gc.ComputeGradientError(x, [2, 5], y, [2, 5], x_init_value=x_init) print("softplus (float) gradient err = ", err) self.assertLess(err, 1e-4)
def testGradients(self): for use_gpu in False, True: for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) shapes = [shape[1:]] * shape[0] for i in xrange(shape[0]): with self.test_session(use_gpu=use_gpu): x = tf.constant(data) cs = tf.unpack(x, num=shape[0]) err = gradient_checker.ComputeGradientError( x, shape, cs[i], shapes[i]) self.assertLess(err, 1e-6)
def _RunAndVerifyGradientResult(self, input_shape, multiples): with self.test_session(): # Random values inp = np.random.rand(*input_shape) a = tf.constant([float(x) for x in inp.flatten()], shape=input_shape, dtype=tf.float64) tiled = tf.tile(a, multiples) grad_shape = list(np.array(multiples) * np.array(inp.shape)) err = gc.ComputeGradientError(a, list(input_shape), tiled, grad_shape, x_init_value=inp) print "tile(float) error = ", err self.assertLess(err, 1e-3)
def testGradients(self): np.random.seed(7) for use_gpu in False, True: for shape in (2, ), (3, ), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) shapes = [shape[1:]] * shape[0] with self.test_session(use_gpu=use_gpu): # TODO(irving): Remove list() once we handle maps correctly xs = list(map(tf.constant, data)) c = tf.pack(xs) err = gradient_checker.ComputeGradientError( xs, shapes, c, shape) self.assertLess(err, 1e-6)
def testGradient(self): with self.test_session(): l = tf.constant([0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], shape=[3, 4], dtype=tf.float64, name="l") f = tf.constant([0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4], shape=[3, 4], dtype=tf.float64, name="f") x = tf.nn.softmax_cross_entropy_with_logits(f, l, name="xent") err = gc.ComputeGradientError(f, [3, 4], x, [3]) print "cross entropy gradient err = ", err self.assertLess(err, 5e-8)
def testGradient(self): x_shape = [2, 6, 4, 3] f_shape = [3, 3, 2, 3] y_shape = [2, 12, 8, 2] strides = [1, 2, 2, 1] np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) f_val = np.random.random_sample(f_shape).astype(np.float64) with self.test_session(): x = constant_op.constant(x_val, name="x", dtype=types.float32) f = constant_op.constant(f_val, name="f", dtype=types.float32) output = nn.deconv2d(x, f, y_shape, strides=strides, padding="SAME") err = gc.ComputeGradientError([x, f], [x_shape, f_shape], output, y_shape) print("DeConv gradient err = %g " % err) err_tolerance = 0.0005 self.assertLess(err, err_tolerance)
def testGradFromResizeToSmallerInBothDims(self): in_shape = [1, 4, 6, 1] out_shape = [1, 2, 3, 1] x = np.arange(0, 24).reshape(in_shape).astype(np.float32) with self.test_session(): input_tensor = tf.constant(x, shape=in_shape) resize_out = tf.image.resize_nearest_neighbor( input_tensor, out_shape[1:3]) err = gc.ComputeGradientError(input_tensor, in_shape, resize_out, out_shape, x_init_value=x) self.assertLess(err, 1e-3)
def _VerifyInput1(self, transpose_a, transpose_b): shape_x = [3, 2] shape_y = [2, 4] if transpose_a: shape_x = list(reversed(shape_x)) if transpose_b: shape_y = list(reversed(shape_y)) with self.test_session(use_gpu=False): x = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=shape_x, dtype=tf.float64, name="x") y = tf.constant([1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7], shape=shape_y, dtype=tf.float64, name="y") m = tf.matmul(x, y, transpose_a, transpose_b, name="matmul") err = gc.ComputeGradientError(y, shape_y, m, [3, 4]) print("matmul input1 gradient err = ", err) self.assertLess(err, 1e-10)
def testGradientDouble(self): with self.test_session(): x = tf.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], dtype=tf.float64, name="x") y = tf.nn.relu(x, name="relu") x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float64, order="F") err = gc.ComputeGradientError(x, [2, 5], y, [2, 5], x_init_value=x_init) print("relu (double) gradient err = ", err) self.assertLess(err, 1e-10)
def testGradGradFloat(self): with self.test_session(): x = tf.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], name="x") y = tf.nn.relu(x, name="relu") z = tf.gradients(y, x) x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gc.ComputeGradientError(x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("relu (float) gradient of gradient err = ", err) self.assertLess(err, 1e-4)
def _testGlobalGradient(self, from_y="mean"): with self.test_session(): x_shape = [3, 5, 4, 2] x_val = np.random.random_sample(x_shape).astype(np.float64) x = constant_op.constant(x_val) x.set_shape(x_shape) axes = [0, 1, 2] y_shape = [2] # Depth of x out_mean, out_var = nn.moments(x, axes) if from_y == "mean": y = out_mean elif from_y == "var": y = out_var err = gc.ComputeGradientError(x, x_shape, y, y_shape) print("Moments %s gradient err = %g" % (from_y, err)) self.assertLess(err, 1e-11)