Beispiel #1
0
def normalize_moments(counts, mean_ss, variance_ss, shift, name=None):
  """Calculate the mean and variance of based on the sufficient statistics.

  Args:
    counts: A `Tensor` containing a the total count of the data (one value).
    mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly
      shifted) sum of the elements to average over.
    variance_ss: A `Tensor` containing the variance sufficient statistics: the
      (possibly shifted) squared sum of the data to compute the variance over.
    shift: A `Tensor` containing the value by which the data is shifted for
      numerical stability, or `None` if no shift was performed.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
  with ops.op_scope([counts, mean_ss, variance_ss, shift], name, "normalize"):
    divisor = math_ops.inv(counts, name="divisor")
    if shift is not None:
      shifted_mean = math_ops.mul(mean_ss, divisor, name="shifted_mean")
      mean = math_ops.add(shifted_mean, shift, name="mean")
    else:  # no shift.
      shifted_mean = math_ops.mul(mean_ss, divisor, name="mean")
      mean = shifted_mean
    variance = math_ops.sub(
        math_ops.mul(variance_ss, divisor),
        math_ops.square(shifted_mean),
        name="variance")
  return (mean, variance)
Beispiel #2
0
def _TanGrad(op, grad):
  """Returns grad * 1/sec^2(x)."""
  x = op.inputs[0]
  with ops.control_dependencies([grad.op]):
    secx = math_ops.inv(math_ops.cos(x))
    secx2 = math_ops.square(secx)
    return grad * secx2
Beispiel #3
0
def normalize_moments(counts, mean_ss, variance_ss, shift, name=None):
  """Calculate the mean and variance of based on the sufficient statistics.

  Args:
    counts: A `Tensor` containing a the total count of the data (one value).
    mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly
      shifted) sum of the elements to average over.
    variance_ss: A `Tensor` containing the variance sufficient statistics: the
      (possibly shifted) squared sum of the data to compute the variance over.
    shift: A `Tensor` containing the value by which the data is shifted for
      numerical stability, or `None` if no shift was performed.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
  with ops.op_scope([counts, mean_ss, variance_ss, shift], name, "normalize"):
    divisor = math_ops.inv(counts, name="divisor")
    if shift is not None:
      shifted_mean = math_ops.mul(mean_ss, divisor, name="shifted_mean")
      mean = math_ops.add(shifted_mean, shift, name="mean")
    else:  # no shift.
      shifted_mean = math_ops.mul(mean_ss, divisor, name="mean")
      mean = shifted_mean
    variance = math_ops.sub(
        math_ops.mul(variance_ss, divisor),
        math_ops.square(shifted_mean),
        name="variance")
  return (mean, variance)
Beispiel #4
0
def _TanGrad(op, grad):
    """Returns grad * 1/sec^2(x)."""
    x = op.inputs[0]
    with ops.control_dependencies([grad.op]):
        secx = math_ops.inv(math_ops.cos(x))
        secx2 = math_ops.square(secx)
        return grad * secx2
Beispiel #5
0
def _AtanGrad(op, grad):
  """Returns grad * 1/ (1 + x^2)"""
  x = op.inputs[0]
  with ops.control_dependencies([grad.op]):
    x2 = math_ops.square(x)
    one = constant_op.constant(1, dtype=grad.dtype)
    inv = math_ops.inv(math_ops.add(one, x2))
    return grad * inv
Beispiel #6
0
def _AtanGrad(op, grad):
    """Returns grad * 1/ (1 + x^2)"""
    x = op.inputs[0]
    with ops.control_dependencies([grad.op]):
        x2 = math_ops.square(x)
        one = constant_op.constant(1, dtype=grad.dtype)
        inv = math_ops.inv(math_ops.add(one, x2))
        return grad * inv
Beispiel #7
0
 def dropout(self, input_, keep_prob):
     with ops.op_scope([input_], None, "dropout") as name:
         rands = keep_prob + random_ops.random_uniform(
             array_ops.shape(input_))
         floored = math_ops.floor(rands)
         ret = input_ * math_ops.inv(keep_prob) * floored
         ret.set_shape(input_.get_shape())
         return ret
Beispiel #8
0
def moments(x, axes, name=None, keep_dims=False):
  """Calculate the mean and variance of `x`.

  The mean and variance are calculated by aggregating the contents of `x`
  across `axes`.  If `x` is 1-D and `axes = [0]` this is just the mean
  and variance of a vector.

  When using these moments for batch normalization (see
  `tf.nn.batch_normalization`):
    * for so-called "global normalization", used with convolutional filters with
      shape `[batch, height, width, depth]`, pass `axes=[0, 1, 2]`.
    * for simple batch normalization pass `axes=[0]` (batch only).

  Args:
    x: A `Tensor`.
    axes: array of ints.  Axes along which to compute mean and
      variance.
    keep_dims: produce moments with the same dimensionality as the input.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
  with ops.op_scope([x, axes], name, "moments"):
    x = ops.convert_to_tensor(x, name="x")
    x_shape = x.get_shape()
    if all(x_shape[d].value is not None for d in axes):
      # The shape is known in the relevant axes, so we can statically
      # compute the divisor.
      divisor = 1.0
      for d in set(axes):
        divisor *= x.get_shape()[d].value
      divisor = constant_op.constant(1.0 / divisor, x.dtype, name="divisor")
    else:
      divisor = constant_op.constant(1.0, dtype=x.dtype)
      x_dynamic_shape = array_ops.shape(x)
      for d in set(axes):
        divisor *= math_ops.cast(x_dynamic_shape[d], x.dtype)
      divisor = math_ops.inv(divisor, name="divisor")
    constant_axes = constant_op.constant(axes, name="axes")
    # Note: We do not use Mean here because it is very slow on GPU.
    mean = math_ops.mul(
        math_ops.reduce_sum(x,
                            constant_axes,
                            keep_dims=True),
        divisor,
        name="mean")
    var = math_ops.mul(
        math_ops.reduce_sum(
            math_ops.squared_difference(x, mean),
            constant_axes,
            keep_dims=keep_dims),
        divisor,
        name="variance")
    if keep_dims:
      return mean, var
    else:
      return array_ops.squeeze(mean, squeeze_dims=axes), var
Beispiel #9
0
def _AcosGrad(op, grad):
  """Returns grad * -1/sqrt(1-x^2)."""
  x = op.inputs[0]
  with ops.control_dependencies([grad.op]):
    x2 = math_ops.square(x)
    one = constant_op.constant(1, dtype=grad.dtype)
    den = math_ops.sqrt(math_ops.sub(one, x2))
    inv = math_ops.inv(den)
    return -grad * inv
Beispiel #10
0
def _AcosGrad(op, grad):
    """Returns grad * -1/sqrt(1-x^2)."""
    x = op.inputs[0]
    with ops.control_dependencies([grad.op]):
        x2 = math_ops.square(x)
        one = constant_op.constant(1, dtype=grad.dtype)
        den = math_ops.sqrt(math_ops.sub(one, x2))
        inv = math_ops.inv(den)
        return -grad * inv
Beispiel #11
0
def _SegmentMeanGrad(op, grad):
    """Gradient for SegmentMean."""
    input_rank = array_ops.rank(op.inputs[0])
    ones_shape = array_ops.concat(
        0, [array_ops.shape(op.inputs[1]), array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1)]
    )
    ones = array_ops.fill(ones_shape, constant_op.constant(1, dtype=grad.dtype))
    scaled_grad = grad * math_ops.inv(math_ops.segment_sum(ones, op.inputs[1]))
    return array_ops.gather(scaled_grad, op.inputs[1]), None
Beispiel #12
0
def moments(x, axes, name=None):
    """Calculate the mean and variance of `x`.

  The mean and variance are calculated by aggregating the contents of `x`
  across `axes`.  If `x` is 1-D and `axes = [0]` this is just the mean
  and variance of a vector.

  For so-called "global normalization" needed for convolutional filters pass
  `axes=[0, 1, 2]` (batch, height, width).  For batch normalization pass
  `axes=[0]` (batch).

  Args:
    x: A `Tensor`.
    axes: array of ints.  Axes along which to compute mean and
      variance.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
    with ops.op_scope([x, axes], name, "moments"):
        x = ops.convert_to_tensor(x, name="x")
        x_shape = x.get_shape()
        if all(x_shape[d].value is not None for d in axes):
            # The shape is known in the relevant axes, so we can statically
            # compute the divisor.
            divisor = 1.0
            for d in set(axes):
                divisor *= x.get_shape()[d].value
            divisor = constant_op.constant(1.0 / divisor,
                                           x.dtype,
                                           name="divisor")
        else:
            divisor = constant_op.constant(1.0, dtype=x.dtype)
            x_dynamic_shape = array_ops.shape(x)
            for d in set(axes):
                divisor *= math_ops.cast(x_dynamic_shape[d], x.dtype)
            divisor = math_ops.inv(divisor, name="divisor")
        axes = constant_op.constant(axes, name="axes")
        # Note: We do not use Mean here because it is very slow on GPU.
        # Note 2: The expression below is potentially more stable.
        # It is however a bit slower and stability doesn't appear to be an issue.
        # mean = math_ops.reduce_sum(math_ops.mul(x, divisor), axes, name="mean")
        # var = math_ops.reduce_sum(math_ops.mul(math_ops.square(x - mean),
        #                                        divisor), axes,
        #                    name="variance")
        mean = math_ops.mul(math_ops.reduce_sum(x, axes), divisor, name="mean")
        # Give x-mean a specific name, so the caller might take advantage of it.
        # The caller should have a fallback plan, however: this tensor may not be
        # available if this function implementation changes.
        x_centered = math_ops.sub(x, mean, name="x_centered")
        var = math_ops.mul(math_ops.reduce_sum(math_ops.square(x_centered),
                                               axes),
                           divisor,
                           name="variance")
        return mean, var
Beispiel #13
0
def _SegmentMeanGrad(op, grad):
  """Gradient for SegmentMean."""
  input_rank = array_ops.rank(op.inputs[0])
  ones_shape = array_ops.concat(
      0, [array_ops.shape(op.inputs[1]),
          array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1)])
  ones = array_ops.fill(ones_shape,
                        constant_op.constant(1, dtype=grad.dtype))
  scaled_grad = grad * math_ops.inv(math_ops.segment_sum(ones, op.inputs[1]))
  return array_ops.gather(scaled_grad, op.inputs[1]), None
Beispiel #14
0
def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):
    """Computes dropout.

  With probability `keep_prob`, outputs the input element scaled up by
  `1 / keep_prob`, otherwise outputs `0`.  The scaling is so that the expected
  sum is unchanged.

  By default, each element is kept or dropped independently.  If `noise_shape`
  is specified, it must be
  [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
  to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]`
  will make independent decisions.  For example, if `shape(x) = [k, l, m, n]`
  and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be
  kept independently and each row and column will be kept or not kept together.

  Args:
    x: A tensor.
    keep_prob: A scalar `Tensor` with the same type as x. The probability
      that each element is kept.
    noise_shape: A 1-D `Tensor` of type `int32`, representing the
      shape for randomly generated keep/drop flags.
    seed: A Python integer. Used to create random seeds. See
      [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
      for behavior.
    name: A name for this operation (optional).

  Returns:
    A Tensor of the same shape of `x`.

  Raises:
    ValueError: If `keep_prob` is not in `(0, 1]`.
  """
    with ops.op_scope([x], name, "dropout") as name:
        x = ops.convert_to_tensor(x, name="x")
        if isinstance(keep_prob, float) and not 0 < keep_prob <= 1:
            raise ValueError(
                "keep_prob must be a scalar tensor or a float in the "
                "range (0, 1], got %g" % keep_prob)
        keep_prob = ops.convert_to_tensor(keep_prob,
                                          dtype=x.dtype,
                                          name="keep_prob")
        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

        noise_shape = noise_shape if noise_shape is not None else array_ops.shape(
            x)
        # uniform [keep_prob, 1.0 + keep_prob)
        random_tensor = keep_prob
        random_tensor += random_ops.random_uniform(noise_shape,
                                                   seed=seed,
                                                   dtype=x.dtype)
        # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
        binary_tensor = math_ops.floor(random_tensor)
        ret = x * math_ops.inv(keep_prob) * binary_tensor
        ret.set_shape(x.get_shape())
        return ret
Beispiel #15
0
def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):
  """Computes dropout.

  With probability `keep_prob`, outputs the input element scaled up by
  `1 / keep_prob`, otherwise outputs `0`.  The scaling is so that the expected
  sum is unchanged.

  By default, each element is kept or dropped independently.  If `noise_shape`
  is specified, it must be
  [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
  to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]`
  will make independent decisions.  For example, if `shape(x) = [k, l, m, n]`
  and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be
  kept independently and each row and column will be kept or not kept together.

  Args:
    x: A tensor.
    keep_prob: A scalar `Tensor` with the same type as x. The probability
      that each element is kept.
    noise_shape: A 1-D `Tensor` of type `int32`, representing the
      shape for randomly generated keep/drop flags.
    seed: A Python integer. Used to create random seeds. See
      [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed)
      for behavior.
    name: A name for this operation (optional).

  Returns:
    A Tensor of the same shape of `x`.

  Raises:
    ValueError: If `keep_prob` is not in `(0, 1]`.
  """
  with ops.op_scope([x], name, "dropout") as name:
    x = ops.convert_to_tensor(x, name="x")
    if isinstance(keep_prob, float) and not 0 < keep_prob <= 1:
      raise ValueError("keep_prob must be a scalar tensor or a float in the "
                       "range (0, 1], got %g" % keep_prob)
    keep_prob = ops.convert_to_tensor(keep_prob,
                                      dtype=x.dtype,
                                      name="keep_prob")
    keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

    noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x)
    # uniform [keep_prob, 1.0 + keep_prob)
    random_tensor = keep_prob
    random_tensor += random_ops.random_uniform(noise_shape,
                                               seed=seed,
                                               dtype=x.dtype)
    # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
    binary_tensor = math_ops.floor(random_tensor)
    ret = x * math_ops.inv(keep_prob) * binary_tensor
    ret.set_shape(x.get_shape())
    return ret
Beispiel #16
0
def moments(x, axes, name=None):
  """Calculate the mean and variance of `x`.

  The mean and variance are calculated by aggregating the contents of `x`
  across `axes`.  If `x` is 1-D and `axes = [0]` this is just the mean
  and variance of a vector.

  For so-called "global normalization" needed for convolutional filters pass
  `axes=[0, 1, 2]` (batch, height, width).  For batch normalization pass
  `axes=[0]` (batch).

  Args:
    x: A `Tensor`.
    axes: array of ints.  Axes along which to compute mean and
      variance.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
  with ops.op_scope([x, axes], name, "moments"):
    x = ops.convert_to_tensor(x, name="x")
    x_shape = x.get_shape()
    if all(x_shape[d].value is not None for d in axes):
      # The shape is known in the relevant axes, so we can statically
      # compute the divisor.
      divisor = 1.0
      for d in set(axes):
        divisor *= x.get_shape()[d].value
      divisor = constant_op.constant(1.0 / divisor, x.dtype, name="divisor")
    else:
      divisor = constant_op.constant(1.0, dtype=x.dtype)
      x_dynamic_shape = array_ops.shape(x)
      for d in set(axes):
        divisor *= math_ops.cast(x_dynamic_shape[d], x.dtype)
      divisor = math_ops.inv(divisor, name="divisor")
    axes = constant_op.constant(axes, name="axes")
    # Note: We do not use Mean here because it is very slow on GPU.
    # Note 2: The expression below is potentially more stable.
    # It is however a bit slower and stability doesn't appear to be an issue.
    # mean = math_ops.reduce_sum(math_ops.mul(x, divisor), axes, name="mean")
    # var = math_ops.reduce_sum(math_ops.mul(math_ops.square(x - mean),
    #                                        divisor), axes,
    #                    name="variance")
    mean = math_ops.mul(math_ops.reduce_sum(x, axes), divisor, name="mean")
    # Give x-mean a specific name, so the caller might take advantage of it.
    # The caller should have a fallback plan, however: this tensor may not be
    # available if this function implementation changes.
    x_centered = math_ops.sub(x, mean, name="x_centered")
    var = math_ops.mul(math_ops.reduce_sum(math_ops.square(x_centered), axes),
                       divisor, name="variance")
    return mean, var
def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):
    with ops.op_scope([x], name, "dropout") as name:
        x = ops.convert_to_tensor(x, name="x")

        noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x)
        # uniform [keep_prob, 1.0 + keep_prob)
        random_tensor = keep_prob
        random_tensor += random_ops.random_uniform(noise_shape,
                                                   seed=seed,
                                                   dtype=x.dtype)
        # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
        binary_tensor = math_ops.floor(random_tensor)
        ret = x * math_ops.inv(tf.reduce_mean(keep_prob, reduction_indices=[1], keep_dims=True)) * binary_tensor
        ret.set_shape(x.get_shape())
        return ret
Beispiel #18
0
def per_image_whitening(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Note that this implementation is limited:
  *  It only whitens based on the statistics of an individual image.
  *  It does not take into account the covariance structure.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The whitened image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=False)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.inv(
      math_ops.sqrt(math_ops.cast(num_pixels, dtypes.float32)))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.sub(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image
Beispiel #19
0
def per_image_whitening(image):
    """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Note that this implementation is limited:
  *  It only whitens based on the statistics of an individual image.
  *  It does not take into account the covariance structure.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The whitened image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
    image = ops.convert_to_tensor(image, name='image')
    _Check3DImage(image, require_static=False)
    num_pixels = math_ops.reduce_prod(array_ops.shape(image))

    image = math_ops.cast(image, dtype=dtypes.float32)
    image_mean = math_ops.reduce_mean(image)

    variance = (math_ops.reduce_mean(math_ops.square(image)) -
                math_ops.square(image_mean))
    variance = gen_nn_ops.relu(variance)
    stddev = math_ops.sqrt(variance)

    # Apply a minimum normalization that protects us against uniform images.
    min_stddev = math_ops.inv(
        math_ops.sqrt(math_ops.cast(num_pixels, dtypes.float32)))
    pixel_value_scale = math_ops.maximum(stddev, min_stddev)
    pixel_value_offset = image_mean

    image = math_ops.sub(image, pixel_value_offset)
    image = math_ops.div(image, pixel_value_scale)
    return image
Beispiel #20
0
def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
    """Gradient for SelfAdjointEigV2."""
    e = op.outputs[0]
    v = op.outputs[1]
    # a = op.inputs[0], which satisfies
    # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i]
    with ops.control_dependencies([grad_e.op, grad_v.op]):
        if grad_v is not None:
            # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0).
            # Notice that because of the term involving f, the gradient becomes
            # infinite (or NaN in practice) when eigenvalues are not unique.
            # Mathematically this should not be surprising, since for (k-fold)
            # degenerate eigenvalues, the corresponding eigenvectors are only defined
            # up to arbitrary rotation in a (k-dimensional) subspace.
            f = array_ops.matrix_set_diag(
                math_ops.inv(
                    array_ops.expand_dims(e, -2) -
                    array_ops.expand_dims(e, -1)), array_ops.zeros_like(e))
            grad_a = math_ops.batch_matmul(
                v,
                math_ops.batch_matmul(
                    array_ops.matrix_diag(grad_e) +
                    f * math_ops.batch_matmul(v, grad_v, adj_x=True),
                    v,
                    adj_y=True))
        else:
            grad_a = math_ops.batch_matmul(
                v,
                math_ops.batch_matmul(array_ops.matrix_diag(grad_e),
                                      v,
                                      adj_y=True))
        # The forward op only depends on the lower triangular part of a, so here we
        # symmetrize and take the lower triangle
        grad_a = array_ops.matrix_band_part(
            grad_a + array_ops.matrix_transpose(grad_a), -1, 0)
        grad_a = array_ops.matrix_set_diag(
            grad_a, 0.5 * array_ops.matrix_diag_part(grad_a))
        return grad_a
Beispiel #21
0
def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
  """Gradient for SelfAdjointEigV2."""
  e = op.outputs[0]
  v = op.outputs[1]
  # a = op.inputs[0], which satisfies
  # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i]
  with ops.control_dependencies([grad_e.op, grad_v.op]):
    if grad_v is not None:
      # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0).
      # Notice that because of the term involving f, the gradient becomes
      # infinite (or NaN in practice) when eigenvalues are not unique.
      # Mathematically this should not be surprising, since for (k-fold)
      # degenerate eigenvalues, the corresponding eigenvectors are only defined
      # up to arbitrary rotation in a (k-dimensional) subspace.
      f = array_ops.matrix_set_diag(
          math_ops.inv(
              array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)),
          array_ops.zeros_like(e))
      grad_a = math_ops.batch_matmul(
          v,
          math_ops.batch_matmul(
              array_ops.matrix_diag(grad_e) + f * math_ops.batch_matmul(
                  v, grad_v, adj_x=True),
              v,
              adj_y=True))
    else:
      grad_a = math_ops.batch_matmul(
          v,
          math_ops.batch_matmul(
              array_ops.matrix_diag(grad_e), v, adj_y=True))
    # The forward op only depends on the lower triangular part of a, so here we
    # symmetrize and take the lower triangle
    grad_a = array_ops.matrix_band_part(
        grad_a + array_ops.matrix_transpose(grad_a), -1, 0)
    grad_a = array_ops.matrix_set_diag(grad_a, 0.5 *
                                       array_ops.matrix_diag_part(grad_a))
    return grad_a
Beispiel #22
0
def _SqrtGrad(op, grad):
  y = op.outputs[0]  # y = x^(1/2)
  return grad * (.5 * math_ops.inv(y))
Beispiel #23
0
def _LogGrad(op, grad):
  """Returns grad * (1/x)."""
  x = op.inputs[0]
  with ops.control_dependencies([grad.op]):
    return grad * math_ops.inv(x)
Beispiel #24
0
def _RsqrtGrad(op, grad):
  x = op.inputs[0]
  y = op.outputs[0]  # y = x^(-1/2)
  with ops.control_dependencies([grad.op]):
    return grad * ((-0.5) * math_ops.inv(x) * y)
Beispiel #25
0
def _SqrtGrad(op, grad):
  y = op.outputs[0]  # y = x^(1/2)
  with ops.control_dependencies([grad.op]):
    return grad * (.5 * math_ops.inv(y))
Beispiel #26
0
def _RsqrtGrad(op, grad):
    x = op.inputs[0]
    y = op.outputs[0]  # y = x^(-1/2)
    return grad * ((-0.5) * math_ops.inv(x) * y)
Beispiel #27
0
def _LogGrad(op, grad):
    """Returns grad * (1/x)."""
    x = op.inputs[0]
    with ops.control_dependencies([grad.op]):
        return grad * math_ops.inv(x)
Beispiel #28
0
def _SqrtGrad(op, grad):
    y = op.outputs[0]  # y = x^(1/2)
    with ops.control_dependencies([grad.op]):
        return grad * (.5 * math_ops.inv(y))
Beispiel #29
0
def _LogGrad(op, grad):
    """Returns grad * (1/x)."""
    x = op.inputs[0]
    return grad * math_ops.inv(x)
def _Log1pGrad(op, grad):
    """Returns grad * (1/(1 + x))."""
    x = op.inputs[0]
    with ops.control_dependencies([grad.op]):
        x = math_ops.conj(x)
        return grad * math_ops.inv(1 + x)
Beispiel #31
0
def _RsqrtGrad(op, grad):
    x = op.inputs[0]
    y = op.outputs[0]  # y = x^(-1/2)
    with ops.control_dependencies([grad.op]):
        return grad * ((-0.5) * math_ops.inv(x) * y)
Beispiel #32
0
def _gmm_model_grad(op, dl_dp, dl_dgauss, dl_daux2):
    x     = op.inputs[0]
    w     = op.inputs[1]
    mu    = op.inputs[2]
    sigma = op.inputs[3]
    
    p_x   = op.outputs[0]
    gaussians   = op.outputs[1]
    sigma_inv_x_mu = op.outputs[2]
    
    dl_dp = array_ops.expand_dims(dl_dp, -1)
    
    x_shape_np  = x.get_shape()   #array_ops.get_shape(x)
    mu_shape_np = mu.get_shape()  #array_ops.get_shape(mu)
    x_shape  = array_ops.shape(x)
    mu_shape = array_ops.shape(mu)
    
    n_samples = x_shape[0]
    n_params  = mu_shape_np[0]
    n_kernels = mu_shape_np[1]
    n_dims    = mu_shape[2]
    
    #print("x_shape: ", x_shape)
    #print("n_samples: ", n_samples)
    #print("n_dims: ", n_dims)
    #pi= 3.14159265358979323846
    #norm_const = math_ops.inv( math_ops.sqrt((math_ops.pow(2.0*pi, math_ops.to_float(n_dims))) * math_ops.reduce_prod(sigma, 2))) 
            
    sigma_inv = math_ops.inv( sigma ) # 1/x element-wise, shape: [sample_id, kernel_id, sigma...]
            
    #x_mu = array_ops.reshape(x, [n_samples, 1, n_dims]) - mu # shape: [sample_id, kernel_id, x-mu]
            
    #sigma_inv_x_mu = math_ops.mul( x_mu, sigma_inv )
    
    
    #gaussians = math_ops.mul( norm_const, math_ops.exp( -0.5* math_ops.reduce_sum( x_mu * sigma_inv_x_mu, 2 ) ) )
    
    # gradient computation
    # derivative with respect w
    if n_kernels==1:
        dl_dw = 0*w
    else:
        dl_dw = math_ops.mul( dl_dp , gaussians)
        
            
    # derivative with respect mu
    w_gaussians =  math_ops.mul( w, gaussians) 
    # dgmm_dmu: tensor of shape: [samples, kernel, dim]
    dp_dmu = math_ops.mul( array_ops.expand_dims(w_gaussians, -1) , sigma_inv_x_mu)  
    # de_dmu: tensor of shape: [samples, kernel, dim]
    dl_dmu = math_ops.mul( array_ops.expand_dims(dl_dp,-1), dp_dmu)
    
    # derivative with respect sigma
    # dgmm_dmu: tensor of shape: [samples, kernel, dim]
    dp_dsigma = math_ops.pow(sigma_inv_x_mu, 2.0) - sigma_inv
    dp_dsigma = 0.5 * math_ops.mul( array_ops.expand_dims(w_gaussians, -1) , dp_dsigma)  
    # de_dmu: tensor of shape: [samples, kernel, dim]
    dl_dsigma = math_ops.mul( array_ops.expand_dims(dl_dp,-1), dp_dsigma)
    
    # derivative with respect x
    dl_dx = math_ops.reduce_sum(-dl_dmu, 1)
    
    if n_params == 1:         
        dl_dw = math_ops.reduce_sum(dl_dw, 0)
        dl_dw = array_ops.expand_dims(dl_dw, 0)
        
        dl_dmu = math_ops.reduce_sum(dl_dmu, 0)
        dl_dmu = array_ops.expand_dims(dl_dmu, 0)
        
        dl_dsigma = math_ops.reduce_sum(dl_dsigma, 0)
        dl_dsigma = array_ops.expand_dims(dl_dsigma, 0)
    
    return dl_dx, dl_dw, dl_dmu, dl_dsigma
Beispiel #33
0
def _Log1pGrad(op, grad):
  """Returns grad * (1/(1 + x))."""
  x = op.inputs[0]
  with ops.control_dependencies([grad.op]):
    x = math_ops.conj(x)
    return grad * math_ops.inv(1 + x)
Beispiel #34
0
def _LogGrad(op, grad):
  """Returns grad * (1/x)."""
  x = op.inputs[0]
  return grad * math_ops.inv(x)
Beispiel #35
0
def _RsqrtGrad(op, grad):
  x = op.inputs[0]
  y = op.outputs[0]  # y = x^(-1/2)
  return grad * ((-0.5) * math_ops.inv(x) * y)
Beispiel #36
0
def _SqrtGrad(op, grad):
    y = op.outputs[0]  # y = x^(1/2)
    return grad * (.5 * math_ops.inv(y))