コード例 #1
0
ファイル: nn_grad.py プロジェクト: adsar/tensorflow
def _BatchNormWithGlobalNormalizationGrad(op, grad):
    """Return the gradients for the 5 inputs of BatchNormWithGlobalNormalization.

  We do not backprop anything for the mean and var intentionally as they are
  not being trained with backprop in the operation.

  Args:
    op: The BatchNormOp for which we need to generate gradients.
    grad: Tensor.  The gradients passed to the BatchNormOp.

  Returns:
    dx: Backprop for input, which is (grad * (g * rsqrt(v + epsilon)))
    dm: Backprop for mean, which is
        sum_over_rest(grad * g) * (-1 / rsqrt(v + epsilon))
    dv: Backprop for variance, which is
        sum_over_rest(grad * g * (x - m)) * (-1/2) * (v + epsilon) ^ (-3/2)
    db: Backprop for beta, which is grad reduced in all except the
        last dimension.
    dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon)))
  """
    dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad(
        op.inputs[0],
        op.inputs[1],
        op.inputs[2],
        op.inputs[4],
        grad,
        op.get_attr("variance_epsilon"),
        op.get_attr("scale_after_normalization"),
    )
    return dx, dm, dv, db, dg
コード例 #2
0
def _BatchNormWithGlobalNormalizationGrad(op, grad):
    """Return the gradients for the 5 inputs of BatchNormWithGlobalNormalization.

  We do not backprop anything for the mean and var intentionally as they are
  not being trained with backprop in the operation.

  Args:
    op: The BatchNormOp for which we need to generate gradients.
    grad: Tensor.  The gradients passed to the BatchNormOp.

  Returns:
    dx: Backprop for input, which is (grad * (g * rsqrt(v + epsilon)))
    dm: Backprop for mean, which is
        sum_over_rest(grad * g) * (-1 / rsqrt(v + epsilon))
    dv: Backprop for variance, which is
        sum_over_rest(grad * g * (x - m)) * (-1/2) * (v + epsilon) ^ (-3/2)
    db: Backprop for beta, which is grad reduced in all except the
        last dimension.
    dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon)))
  """
    dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad(
        op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[4], grad,
        op.get_attr("variance_epsilon"),
        op.get_attr("scale_after_normalization"))
    return dx, dm, dv, db, dg
コード例 #3
0
ファイル: nn_test.py プロジェクト: adam-erickson/tensorflow
 def testBatchNormGradImpl(self):
     x_shape = [7, 5, 4, 6]
     param_shape = [6]
     np.random.seed(1)  # Make it reproducible.
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     m_val = np.random.random_sample(param_shape).astype(np.float32)
     v_val = np.random.random_sample(param_shape).astype(np.float32)
     beta_val = np.random.random_sample(param_shape).astype(np.float32)
     gamma_val = np.random.random_sample(param_shape).astype(np.float32)
     backprop_val = np.random.random_sample(x_shape).astype(np.float32)
     for use_gpu in [False, True]:
         with self.test_session(use_gpu=use_gpu) as sess:
             x = constant_op.constant(x_val, name="x")
             m = constant_op.constant(m_val, name="m")
             v = constant_op.constant(v_val, name="v")
             beta = constant_op.constant(beta_val, name="beta")
             gamma = constant_op.constant(gamma_val, name="gamma")
             backprop = constant_op.constant(backprop_val, name="backprop")
             epsilon = 0.001
             for scale_after_normalization in [True, False]:
                 dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad(
                     x, m, v, gamma, backprop, epsilon, scale_after_normalization
                 )
                 on = self._opsBatchNorm(x, m, v, beta, gamma, epsilon, scale_after_normalization)
                 odx, odm, odv, odb, odg = gradients.gradients([on], [x, m, v, beta, gamma], [backprop])
                 if scale_after_normalization:
                     all_grads = sess.run([dx, dm, dv, db, dg, odx, odm, odv, odb, odg])
                     to_check = ["dx", "dm", "dv", "db", "dg"]
                 else:
                     all_grads = sess.run([dx, dm, dv, db, odx, odm, odv, odb])
                     to_check = ["dx", "dm", "dv", "db"]
                 for i, n in enumerate(to_check):
                     print(n)
                     self.assertAllClose(all_grads[i + len(to_check)], all_grads[i], atol=0.000001)
コード例 #4
0
    def testBatchNormGradImpl(self):
        x_shape = [7, 5, 4, 6]
        param_shape = [6]
        np.random.seed(1)  # Make it reproducible.
        x_val = np.random.random_sample(x_shape).astype(np.float32)
        m_val = np.random.random_sample(param_shape).astype(np.float32)
        v_val = np.random.random_sample(param_shape).astype(np.float32)
        beta_val = np.random.random_sample(param_shape).astype(np.float32)
        gamma_val = np.random.random_sample(param_shape).astype(np.float32)
        backprop_val = np.random.random_sample(x_shape).astype(np.float32)
        for use_gpu in [False, True]:
            with self.test_session(use_gpu=use_gpu) as sess:
                x = tf.constant(x_val, name="x")
                m = tf.constant(m_val, name="m")
                v = tf.constant(v_val, name="v")
                beta = tf.constant(beta_val, name="beta")
                gamma = tf.constant(gamma_val, name="gamma")
                backprop = tf.constant(backprop_val, name="backprop")
                epsilon = 0.001
                for scale_after_normalization in [True, False]:
                    # _batch_norm_with_global_normalization_grad is deprecated in v9
                    tf.get_default_graph().graph_def_versions.producer = 8
                    grad = gen_nn_ops._batch_norm_with_global_normalization_grad(
                        x, m, v, gamma, backprop, epsilon,
                        scale_after_normalization)
                    dx, dm, dv, db, dg = grad
                    self.assertEqual(grad.dx, dx)
                    self.assertEqual(grad.dm, dm)
                    self.assertEqual(grad.dv, dv)
                    self.assertEqual(grad.db, db)
                    self.assertEqual(grad.dg, dg)

                    on = self._opsBatchNorm(x, m, v, beta, gamma, epsilon,
                                            scale_after_normalization, True)
                    odx, odm, odv, odb, odg = tf.gradients(
                        [on], [x, m, v, beta, gamma], [backprop])
                    if scale_after_normalization:
                        all_grads = sess.run(
                            [dx, dm, dv, db, dg, odx, odm, odv, odb, odg])
                        to_check = ["dx", "dm", "dv", "db", "dg"]
                    else:
                        all_grads = sess.run(
                            [dx, dm, dv, db, odx, odm, odv, odb])
                        to_check = ["dx", "dm", "dv", "db"]
                    for i, _ in enumerate(to_check):
                        self.assertAllClose(all_grads[i + len(to_check)],
                                            all_grads[i],
                                            atol=0.000001)