def _ForUsingWhile(start,
                   limit,
                   delta,
                   inputs,
                   forbody,
                   name=None,
                   hostmem=None):
  """Helper to implement a For loop using a While."""
  # To support negative delta (e.g., range(100, 0, -3)), we iterate
  # over the range(n) and use iter * delta + start as the real
  # iteration index. (e.g., for i in range(34): iter = i * (-3) +
  # 100).
  d = math_ops.abs(delta)
  # XLA on TPUs doesn't support integer division
  n = math_ops.cast(
      math_ops.cast((math_ops.abs(limit - start) + d - 1), dtypes.float32) /
      math_ops.cast(d, dtypes.float32), dtypes.int32)

  # Carried loop variables ("extra_args") are implicitly added to the input list
  # of the WhileBody function. WhileCond does not call forbody, and so does not
  # depend on any of forbody's extra_args. Since WhileCond and WhileBody
  # must have identical inputs, we have to augment the cond signature to take
  # the same types as the carried loop variables.
  body_sig = [dtypes.int32] * 4 + list(forbody.declared_input_types)[1:]

  cond_name = "%s_Cond" % forbody.name

  @function.Defun(*body_sig, func_name=cond_name)
  def WhileCond(i, n, *args):
    del args
    return i < n

  body_name = "%s_Body" % forbody.name

  @function.Defun(*body_sig, func_name=body_name)
  def WhileBody(i, n, start, delta, *args):
    """A While wrapper for forbody that handles loop-carried captured inputs."""
    for_result = forbody(start + i * delta, *args)
    # Nullary functions return an Operation. Normal functions can't do this
    # because their return values are converted to Tensors.
    if isinstance(for_result, ops.Operation):
      for_result = ()
    # Unary functions return a single Tensor value.
    elif isinstance(for_result, ops.Tensor):
      for_result = (for_result,)
    return (i + 1, n, start, delta) + tuple(for_result)

  if hostmem is not None:
    hostmem = [0, 1, 2, 3] + [(4 + _) for _ in hostmem]
  else:
    hostmem = [0, 1, 2, 3]

  results = While(
      input_=[0, n, start, delta] + inputs,
      cond=WhileCond,
      body=WhileBody,
      name=name,
      hostmem=hostmem)
  # Slice off the loop-carried captured inputs.
  return list(results[4:len(results)])
Example #2
0
 def GetParams(self):
   """Test for rank 2 input in TF-TRT."""
   input_names = ["input", "input2"]
   # Two paths: first with rank 2 input, second with rank 4 input.
   input_dims = [[12, 5], [12, 5, 2, 2]]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     outputs = []
     for i in range(2):
       x = array_ops.placeholder(
           dtype=dtypes.float32, shape=input_dims[i], name=input_names[i])
       c = constant_op.constant(1.0, name="c%d_1" % i)
       q = math_ops.add(x, c, name="add%d_1" % i)
       q = math_ops.abs(q, name="abs%d_1" % i)
       c = constant_op.constant(2.2, name="c%d_2" % i)
       q = math_ops.add(q, c, name="add%d_2" % i)
       q = math_ops.abs(q, name="abs%d_2" % i)
       c = constant_op.constant(3.0, name="c%d_3" % i)
       q = math_ops.add(q, c, name="add%d_3" % i)
       if i == 0:
         for j in range(2):
           q = array_ops.expand_dims(q, -1, name="expand%d_%d" % (i, j))
       q = gen_math_ops.reciprocal(q, name="reciprocal%d" % i)
       outputs.append(q)
     # Combine both paths
     q = math_ops.add(outputs[0], outputs[1], name="add")
     array_ops.squeeze(q, name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=input_names,
       input_dims=input_dims,
       output_names=[output_name],
       expected_output_dims=[tuple(input_dims[1])])
Example #3
0
def _assert_close(expected, actual, rtol=1e-04, name='assert_close'):
  with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
    expected = ops.convert_to_tensor(expected, name='expected')
    actual = ops.convert_to_tensor(actual, name='actual')
    rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
    rtol = ops.convert_to_tensor(rtol, name='rtol')
    return check_ops.assert_less(
        rdiff,
        rtol,
        data=('Condition expected =~ actual did not hold element-wise:'
              'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
              'rtol = ', rtol,),
        name=scope)
Example #4
0
def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
  """Adds a Huber Loss term to the training procedure.

  For each value x in `error=labels-predictions`, the following is calculated:

  ```
    0.5 * x^2                  if |x| <= d
    0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```

  where d is `delta`.

  See: https://en.wikipedia.org/wiki/Huber_loss

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    delta: `float`, the point where the huber loss function
      changes from a quadratic to linear.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "huber_loss",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    error = math_ops.subtract(predictions, labels)
    abs_error = math_ops.abs(error)
    quadratic = math_ops.minimum(abs_error, delta)
    # The following expression is the same in value as
    # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
    # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
    # This is necessary to avoid doubling the gradient, since there is already a
    # nonzero contribution to the gradient from the quadratic term.
    linear = (abs_error - quadratic)
    losses = 0.5 * quadratic**2 + delta * linear
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
def total_variation(logu, name=None):
  """The Total Variation Csiszar-function in log-space.

  A Csiszar-function is a member of,

  ```none
  F = { f:R_+ to R : f convex }.
  ```

  The Total-Variation Csiszar-function is:

  ```none
  f(u) = 0.5 |u - 1|
  ```

  Warning: this function makes non-log-space calculations and may therefore be
  numerically unstable for `|logu| >> 0`.

  Args:
    logu: Floating-type `Tensor` representing `log(u)` from above.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    total_variation_of_u: Floating-type `Tensor` of the Csiszar-function
      evaluated at `u = exp(logu)`.
  """

  with ops.name_scope(name, "total_variation", [logu]):
    logu = ops.convert_to_tensor(logu, name="logu")
    return 0.5 * math_ops.abs(math_ops.expm1(logu))
  def testThatBackpropRuns(self):
    """Run optimization to ensure that gradients can be computed."""

    batch_size = 1
    image_height = 9
    image_width = 12
    image = variables.Variable(
        np.float32(
            np.random.uniform(size=[batch_size, image_height, image_width, 3])))
    control_point_locations = [[3., 3.]]
    control_point_locations = constant_op.constant(
        np.float32(np.expand_dims(control_point_locations, 0)))
    control_point_displacements = [[0.25, -0.5]]
    control_point_displacements = constant_op.constant(
        np.float32(np.expand_dims(control_point_displacements, 0)))
    warped_image, _ = sparse_image_warp.sparse_image_warp(
        image,
        control_point_locations,
        control_point_locations + control_point_displacements,
        num_boundary_points=3)

    loss = math_ops.reduce_mean(math_ops.abs(warped_image - image))
    optimizer = momentum.MomentumOptimizer(0.001, 0.9)
    grad = gradients.gradients(loss, [image])
    grad, _ = clip_ops.clip_by_global_norm(grad, 1.0)
    opt_func = optimizer.apply_gradients(zip(grad, [image]))
    init_op = variables.global_variables_initializer()

    with self.test_session() as sess:
      sess.run(init_op)
      for _ in range(5):
        sess.run([loss, opt_func])
 def testInvalidGlobalStep(self):
   with ops.Graph().as_default() as g, self.test_session(graph=g):
     x = array_ops.placeholder(dtypes.float32, [])
     var = variable_scope.get_variable(
         "test", [], initializer=init_ops.constant_initializer(10))
     loss = math_ops.abs(var * x)
     with self.assertRaises(AttributeError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=constant_op.constant(
               43, dtype=dtypes.int64),
           learning_rate=0.1,
           optimizer="SGD")
     with self.assertRaises(TypeError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=variable_scope.get_variable(
               "global_step", [],
               trainable=False,
               dtype=dtypes.float64,
               initializer=init_ops.constant_initializer(
                   0.0, dtype=dtypes.float64)),
           learning_rate=0.1,
           optimizer="SGD")
     with self.assertRaises(ValueError):
       optimizers_lib.optimize_loss(
           loss,
           global_step=variable_scope.get_variable(
               "global_step", [1],
               trainable=False,
               dtype=dtypes.int64,
               initializer=init_ops.constant_initializer(
                   [0], dtype=dtypes.int64)),
           learning_rate=0.1,
           optimizer="SGD")
Example #8
0
  def test_bad_kernel_approximation(self, initializer, scale, exact_kernel_fn):
    """Approximation is bad when output dimension is small."""
    # Two distinct inputs.
    x = constant_op.constant([[1.0, -1.0, 0.5]])
    y = constant_op.constant([[-1.0, 1.0, 1.0]])

    small_output_dim = 10
    random_seed.set_random_seed(1234)
    # Initialize layer.
    rff_layer = kernel_layers.RandomFourierFeatures(
        output_dim=small_output_dim,
        kernel_initializer=initializer,
        scale=scale,
        name='random_fourier_features')

    # Apply layer to both inputs.
    output_x = math.sqrt(2.0 / small_output_dim) * rff_layer.apply(x)
    output_y = math.sqrt(2.0 / small_output_dim) * rff_layer.apply(y)

    # The inner products of the outputs (on inputs x and y) approximates the
    # real value of the RBF kernel but poorly since the output dimension of the
    # layer is small.
    exact_kernel_value = exact_kernel_fn(x, y)
    approx_kernel_value = kernelized_utils.inner_product(output_x, output_y)
    abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value)
    if not context.executing_eagerly():
      with self.cached_session() as sess:
        keras_backend._initialize_variables(sess)
        abs_error_eval = sess.run([abs_error])
        self.assertGreater(abs_error_eval[0][0], 0.05)
        self.assertLess(abs_error_eval[0][0], 0.5)
    else:
      self.assertGreater(abs_error, 0.05)
      self.assertLess(abs_error, 0.5)
Example #9
0
def _Solve(a, b, c):
    """Return solution of a quadratic minimization.

  The optimization equation is:
       f(a, b, c) = argmin_w{1/2 * a * w^2 + b * w + c * |w|}
  we get optimal solution w*:
       w* = -(b - sign(b)*c)/a if |b| > c else w* = 0

  REQUIRES: Dimensionality of a and b must be same

  Args:
    a: A Tensor
    b: A Tensor
    c: A Tensor with one element.

  Returns:
    A Tensor w, which is solution for the equation
  """
    with ops.name_scope("solve_" + b.op.name):
        c = ops.convert_to_tensor(c)
        k = array_ops.fill(array_ops.shape(b), c)
        zero_t = array_ops.zeros(array_ops.shape(b), dtype=b.dtype)
        w = (c * math_ops.sign(b) - b) / a
        w = math_ops.select(math_ops.less(math_ops.abs(b), k), zero_t, w)
        return w
Example #10
0
 def _log_variance(self):
   # Following calculation is based on law of total variance:
   #
   # Var[Z] = E[Var[Z | V]] + Var[E[Z | V]]
   #
   # where,
   #
   # Z|v ~ interpolate_affine[v](distribution)
   # V ~ mixture_distribution
   #
   # thus,
   #
   # E[Var[Z | V]] = sum{ prob[d] Var[d] : d=0, ..., deg-1 }
   # Var[E[Z | V]] = sum{ prob[d] (Mean[d] - Mean)**2 : d=0, ..., deg-1 }
   v = array_ops.stack([
       # log(self.distribution.variance()) = log(Var[d]) = log(rate[d])
       self._log_rate,
       # log((Mean[d] - Mean)**2)
       2. * math_ops.log(
           math_ops.abs(self.distribution.mean()
                        - self._mean()[..., array_ops.newaxis])),
   ], axis=-1)
   return math_ops.reduce_logsumexp(
       self.mixture_distribution.logits[..., array_ops.newaxis] + v,
       axis=[-2, -1])
  def testGoodKernelApproximationAmortized(self):
    # Parameters.
    num_points = 20
    input_dim = 5
    mapped_dim = 5000
    stddev = 5.0

    points_shape = [1, input_dim]
    points = [
        random_ops.random_uniform(shape=points_shape, maxval=1.0)
        for _ in xrange(num_points)
    ]

    normalized_points = [nn.l2_normalize(point, dim=1) for point in points]
    total_absolute_error = 0.0
    with self.cached_session():
      rffm = RandomFourierFeatureMapper(input_dim, mapped_dim, stddev, seed=0)
      # Cache mappings so that they are not computed multiple times.
      cached_mappings = dict((point, rffm.map(point))
                             for point in normalized_points)
      for x in normalized_points:
        mapped_x = cached_mappings[x]
        for y in normalized_points:
          mapped_y = cached_mappings[y]
          exact_kernel_value = _compute_exact_rbf_kernel(x, y, stddev)
          approx_kernel_value = _inner_product(mapped_x, mapped_y)
          abs_error = math_ops.abs(exact_kernel_value - approx_kernel_value)
          total_absolute_error += abs_error
      self.assertAllClose(
          [[0.0]],
          total_absolute_error.eval() / (num_points * num_points),
          atol=0.02)
Example #12
0
def assert_close(
    x, y, data=None, summarize=None, message=None, name="assert_close"):
  """Assert that that x and y are within machine epsilon of each other.

  Args:
    x: Numeric `Tensor`
    y: Numeric `Tensor`
    data: The tensors to print out if the condition is `False`. Defaults to
      error message and first few entries of `x` and `y`.
    summarize: Print this many entries of each tensor.
    message: A string to prefix to the default message.
    name: A name for this operation (optional).

  Returns:
    Op raising `InvalidArgumentError` if |x - y| > machine epsilon.
  """
  message = message or ""
  x = ops.convert_to_tensor(x, name="x")
  y = ops.convert_to_tensor(y, name="y")

  if x.dtype.is_integer:
    return check_ops.assert_equal(
        x, y, data=data, summarize=summarize, message=message, name=name)

  with ops.name_scope(name, "assert_close", [x, y, data]):
    tol = np.finfo(x.dtype.as_numpy_dtype).resolution
    if data is None:
      data = [
          message,
          "Condition x ~= y did not hold element-wise: x = ", x.name, x, "y = ",
          y.name, y
      ]
    condition = math_ops.reduce_all(math_ops.less_equal(math_ops.abs(x-y), tol))
    return control_flow_ops.Assert(
        condition, data, summarize=summarize)
Example #13
0
def exact_laplacian_kernel(x, y, stddev):
  """Computes exact Laplacian kernel value(s) for tensors x and y using stddev.

  The Laplacian kernel for vectors u, v is defined as follows:
       K(u, v) = exp(-||u-v|| / stddev)
  where the norm is the l1-norm. x, y can be either vectors or matrices. If they
  are vectors, they must have the same dimension. If they are matrices, they
  must have the same number of columns. In the latter case, the method returns
  (as a matrix) K(u, v) values for all pairs (u, v) where u is a row from x and
  v is a row from y.

  Args:
    x: a tensor of rank 1 or 2. It's shape should be either [dim] or [m, dim].
    y: a tensor of rank 1 or 2. It's shape should be either [dim] or [n, dim].
    stddev: The width of the Gaussian kernel.

  Returns:
    A single value (scalar) with shape (1, 1)  if x, y are vectors or a matrix
    of shape (m, n) with entries K(u, v) (where K is the Laplacian kernel) for
    all (u,v) pairs where u, v are rows from x and y respectively.

  Raises:
    InvalidShapeError: if the shapes of x, y are not compatible.
  """
  x_aligned, y_aligned = _align_matrices(x, y)
  diff_l1_norm = math_ops.reduce_sum(
      math_ops.abs(math_ops.subtract(x_aligned, y_aligned)), 2)
  return math_ops.exp(-diff_l1_norm / stddev)
Example #14
0
def absolute_difference(predictions, labels=None, weights=1.0, scope=None):
  """Adds an Absolute Difference loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    predictions: The predicted outputs.
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "absolute_difference",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = math_ops.abs(math_ops.subtract(predictions, labels))
    return compute_weighted_loss(losses, weights, scope=scope)
 def _compute_stft_gradient(signal, frame_length=32, frame_step=16,
                            fft_length=32):
   """Computes the gradient of the STFT with respect to `signal`."""
   stft = spectral_ops.stft(signal, frame_length, frame_step, fft_length)
   magnitude_stft = math_ops.abs(stft)
   loss = math_ops.reduce_sum(magnitude_stft)
   return gradients_impl.gradients([loss], [signal])[0]
Example #16
0
 def testChi2WithAbsDf(self):
   with self.cached_session():
     df_v = np.array([-1.3, -3.2, 5], dtype=np.float64)
     chi2 = chi2_lib.Chi2WithAbsDf(df=df_v)
     self.assertAllClose(
         math_ops.floor(math_ops.abs(df_v)).eval(),
         chi2.df.eval())
Example #17
0
  def __call__(self, shape, dtype=None, partition_info=None):
    if dtype is None:
      dtype = self.dtype
    # Check the shape
    if len(shape) < 2:
      raise ValueError("The tensor to initialize must be "
                       "at least two-dimensional")
    # Flatten the input shape with the last dimension remaining
    # its original shape so it works for conv2d
    num_rows = 1
    for dim in shape[:-1]:
      num_rows *= dim
    num_cols = shape[-1]
    flat_shape = (num_cols, num_rows) if num_rows < num_cols else (num_rows,
                                                                   num_cols)

    # Generate a random matrix
    a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    d = array_ops.diag_part(r)
    ph = d / math_ops.abs(d)
    q *= ph
    if num_rows < num_cols:
      q = array_ops.matrix_transpose(q)
    return self.gain * array_ops.reshape(q, shape)
Example #18
0
  def sample_n(self, n, seed=None, name="sample_n"):
    """Sample `n` observations from the Laplace Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by broadcasting the parameters.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self._loc, self._scale, n]):
        n = ops.convert_to_tensor(n)
        n_val = tensor_util.constant_value(n)
        shape = array_ops.concat(0, ([n], self.batch_shape()))
        # Sample uniformly-at-random from the open-interval (-1, 1).
        uniform_samples = random_ops.random_uniform(
            shape=shape,
            minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                                self.dtype.as_numpy_dtype(0.)),
            maxval=self.dtype.as_numpy_dtype(1.),
            dtype=self.dtype,
            seed=seed)

        # Provide some hints to shape inference
        inferred_shape = tensor_shape.vector(n_val).concatenate(
            self.get_batch_shape())
        uniform_samples.set_shape(inferred_shape)

        return (self._loc - self._scale * math_ops.sign(uniform_samples) *
                math_ops.log(1. - math_ops.abs(uniform_samples)))
Example #19
0
def huber_loss(y_true, y_pred, delta=1.0):
  """Computes Huber loss value.

  For each value x in `error=y_true-y_pred`, the following is calculated:

  ```
  0.5 * x^2                  if |x| <= d
  0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```
  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

  Args:
    y_true: tensor of true targets.
    y_pred: tensor of predicted targets.
    delta: A float, the point where the Huber loss function changes from a
      quadratic to linear.

  Returns:
    Tensor with one scalar loss entry per sample.
  """
  y_pred = math_ops.cast(y_pred, dtype=K.floatx())
  y_true = math_ops.cast(y_true, dtype=K.floatx())
  error = math_ops.subtract(y_pred, y_true)
  abs_error = math_ops.abs(error)
  quadratic = math_ops.minimum(abs_error, delta)
  linear = math_ops.subtract(abs_error, quadratic)
  return math_ops.add(
      math_ops.multiply(
          ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
          math_ops.multiply(quadratic, quadratic)),
      math_ops.multiply(delta, linear))
Example #20
0
  def _resource_apply_sparse(self, grad, var, indices):
    var_dtype = var.dtype.base_dtype
    lr_t = self._decayed_lr(var_dtype)

    beta_1_t = self._get_hyper('beta_1', var_dtype)
    beta_2_t = self._get_hyper('beta_2', var_dtype)
    local_step = math_ops.cast(self.iterations + 1, var_dtype)
    beta_1_power = math_ops.pow(beta_1_t, local_step)
    epsilon_t = self._get_hyper('epsilon', var_dtype)

    # m_t = beta1 * m + (1 - beta1) * g_t
    m = self.get_slot(var, 'm')
    m_slice = array_ops.gather(m, indices)
    m_t_slice = m_slice * beta_1_t + grad * (1 - beta_1_t)
    with ops.control_dependencies([m_t_slice]):
      m_t = self._resource_scatter_update(m, indices, m_t_slice)

    # u_t = max(beta2 * u, abs(g_t))
    v = self.get_slot(var, 'v')
    v_slice = array_ops.gather(v, indices)
    v_t_slice = math_ops.maximum(v_slice * beta_2_t, math_ops.abs(grad))
    with ops.control_dependencies([v_t_slice]):
      v_t = self._resource_scatter_update(v, indices, v_t_slice)
    # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t
    var_slice = -lr_t / (1 - beta_1_power) * (
        m_t_slice / (v_t_slice + epsilon_t))
    with ops.control_dependencies([var_slice]):
      var_update = self._resource_scatter_add(var, indices, var_slice)
    return control_flow_ops.group(*[var_update, m_t, v_t])
Example #21
0
 def _prepare(self, grads_and_vars):
   """"""
   
   if self._lr is None:
     sTy = 0
     sTs = 0
     yTy = 0
     for g_t, x_tm1 in grads_and_vars:
       if g_t is None:
         continue
       with ops.name_scope('update_' + x_tm1.op.name), ops.device(x_tm1.device):
         if isinstance(g_t, ops.Tensor):
           g_tm1 = self.get_slot(x_tm1, 'g')
           s_tm1 = self.get_slot(x_tm1, 's')
           y_t = (g_t-g_tm1)
           sTy += math_ops.reduce_sum(s_tm1*y_t)
           sTs += math_ops.reduce_sum(s_tm1**2)
           yTy += math_ops.reduce_sum(y_t**2)
         else:
           idxs, idxs_ = array_ops.unique(g_t.indices)
           g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
           g_tm1 = self.get_slot(x_tm1, 'g')
           g_tm1_ = array_ops.gather(g_tm1, idxs)
           s_tm1 = self.get_slot(x_tm1, 's')
           s_tm1_ = array_ops.gather(s_tm1, idxs)
           y_t_ = (g_t_-g_tm1_)
           sTy += math_ops.reduce_sum(s_tm1_*y_t_)
           sTs += math_ops.reduce_sum(s_tm1_**2)
           yTy += math_ops.reduce_sum(y_t_**2)
     sTy = math_ops.abs(sTy)
     self._lr = sTs / (sTy + self._eps)
Example #22
0
def absolute_loss(predicted, target, name=None):
  """Computes and returns the per-example absolute loss.

  Computes the per-example absolute value of the difference between
  the target and predicted tensors. The tensors must have the same
  shape.

  Args:
    predicted: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]`
      of predicted values.
    target: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]` of
      target values. The shape of the target tensor should match the
      `predicted` tensor.
    name: A name for the operation (optional).

  Returns:
    A `[batch_size, dim_1, ..., dim_n]` tensor of per-example absolute losses.

  Raises:
    ValueError: If `predicted` and `target` shapes do not match.

  """
  with ops.op_scope([predicted, target], name, "absolute_loss") as scope:
    predicted = ops.convert_to_tensor(predicted, name="predicted")
    target = ops.convert_to_tensor(target, name="target")
    predicted.get_shape().assert_is_compatible_with(target.get_shape())
    return math_ops.abs(target - predicted, name=scope)
Example #23
0
  def __call__(self, shape, dtype=None, partition_info=None):
    if dtype is None:
      dtype = self.dtype
    # Check the shape
    if len(shape) < 2:
      raise ValueError("The tensor to initialize must be "
                       "at least two-dimensional")
    # Flatten the input shape with the last dimension remaining
    # its original shape so it works for conv2d
    num_rows = 1
    for dim in shape[:-1]:
      num_rows *= dim
    num_cols = shape[-1]
    flat_shape = (num_rows, num_cols)

    # Generate a random matrix
    a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    square_len = math_ops.minimum(num_rows, num_cols)
    d = array_ops.diag_part(r[:square_len, :square_len])
    ph = d / math_ops.abs(d)
    q *= ph
    # Pad zeros to Q (if rows smaller than cols)
    if num_rows < num_cols:
      padding = array_ops.zeros([num_rows, num_cols - num_rows], dtype=dtype)
      q = array_ops.concat([q, padding], 1)
    return self.gain * array_ops.reshape(q, shape)
Example #24
0
 def _apply_sparse_shared(self, grad, var, indices,
                          scatter_add, scatter_update):
   beta1_power = self._get_beta_accumulators()
   beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
   lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
   beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
   beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
   epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
   # m_t = beta1 * m + (1 - beta1) * g_t
   m = self.get_slot(var, "m")
   m_slice = array_ops.gather(m, indices)
   m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t)
   with ops.control_dependencies([m_t_slice]):
     m_t = scatter_update(m, indices, m_t_slice)
   # u_t = max(beta2 * u, abs(g_t))
   v = self.get_slot(var, "v")
   v_slice = array_ops.gather(v, indices)
   v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad))
   with ops.control_dependencies([v_t_slice]):
     v_t = scatter_update(v, indices, v_t_slice)
   # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t
   var_slice = -lr_t / (1 - beta1_power) * (m_t_slice /
                                            (v_t_slice + epsilon_t))
   with ops.control_dependencies([var_slice]):
     var_update = scatter_add(var, indices, var_slice)
   return control_flow_ops.group(*[var_update, m_t, v_t])
  def _operator_and_matrix(
      self, build_info, dtype, use_placeholder,
      ensure_self_adjoint_and_pd=False):
    shape = build_info.shape
    # For this test class, we are creating real spectrums.
    # We also want the spectrum to have eigenvalues bounded away from zero.
    #
    # spectrum is bounded away from zero.
    spectrum = linear_operator_test_util.random_sign_uniform(
        shape=self._shape_to_spectrum_shape(shape), minval=1., maxval=2.)
    if ensure_self_adjoint_and_pd:
      spectrum = math_ops.abs(spectrum)
    # If dtype is complex, cast spectrum to complex.  The imaginary part will be
    # zero, so the operator will still be self-adjoint.
    spectrum = math_ops.cast(spectrum, dtype)

    lin_op_spectrum = spectrum

    if use_placeholder:
      lin_op_spectrum = array_ops.placeholder_with_default(spectrum, shape=None)

    operator = linalg.LinearOperatorCirculant(
        lin_op_spectrum,
        is_self_adjoint=True,
        is_positive_definite=True if ensure_self_adjoint_and_pd else None,
        input_output_dtype=dtype)

    mat = self._spectrum_to_circulant_1d(spectrum, shape, dtype=dtype)

    return operator, mat
def _sliced_wasserstein(a, b, random_sampling_count, random_projection_dim):
  """Compute the approximate sliced Wasserstein distance.

  Args:
      a: (matrix) Distribution "a" of samples (row, col).
      b: (matrix) Distribution "b" of samples (row, col).
      random_sampling_count: (int) Number of random projections to average.
      random_projection_dim: (int) Dimension of the random projection space.
  Returns:
      Float containing the approximate distance between "a" and "b".
  """
  s = array_ops.shape(a)
  means = []
  for _ in range(random_sampling_count):
    # Random projection matrix.
    proj = random_ops.random_normal(
        [array_ops.shape(a)[1], random_projection_dim])
    proj *= math_ops.rsqrt(
        math_ops.reduce_sum(math_ops.square(proj), 0, keepdims=True))
    # Project both distributions and sort them.
    proj_a = math_ops.matmul(a, proj)
    proj_b = math_ops.matmul(b, proj)
    proj_a = _sort_rows(proj_a, s[0])
    proj_b = _sort_rows(proj_b, s[0])
    # Pairwise Wasserstein distance.
    wdist = math_ops.reduce_mean(math_ops.abs(proj_a - proj_b))
    means.append(wdist)
  return math_ops.reduce_mean(means)
Example #27
0
    def inner_loss(y_true, y_pred):
        delta = math_ops.abs(math_ops.subtract(y_pred, y_true))
        losses = math_ops.square(delta)
        if clip > 0.0:
            losses = tf.where(delta < clip, 0.5 * losses, delta - 0.5)

        return losses
Example #28
0
 def __call__(self, x):
   regularization = 0.
   if self.l1:
     regularization += math_ops.reduce_sum(self.l1 * math_ops.abs(x))
   if self.l2:
     regularization += math_ops.reduce_sum(self.l2 * math_ops.square(x))
   return regularization
  def __init__(self,
               num_rows,
               multiplier,
               is_non_singular=None,
               is_self_adjoint=None,
               is_positive_definite=None,
               assert_proper_shapes=False,
               name="LinearOperatorScaledIdentity"):
    """Initialize a `LinearOperatorScaledIdentity`.

    The `LinearOperatorScaledIdentity` is initialized with `num_rows`, which
    determines the size of each identity matrix, and a `multiplier`,
    which defines `dtype`, batch shape, and scale of each matrix.

    This operator is able to broadcast the leading (batch) dimensions.

    Args:
      num_rows:  Scalar non-negative integer `Tensor`.  Number of rows in the
        corresponding identity matrix.
      multiplier:  `Tensor` of shape `[B1,...,Bb]`, or `[]` (a scalar).
      is_non_singular:  Expect that this operator is non-singular.
      is_self_adjoint:  Expect that this operator is equal to its hermitian
        transpose.
      is_positive_definite:  Expect that this operator is positive definite.
      assert_proper_shapes:  Python `bool`.  If `False`, only perform static
        checks that initialization and method arguments have proper shape.
        If `True`, and static checks are inconclusive, add asserts to the graph.
      name: A name for this `LinearOperator`

    Raises:
      ValueError:  If `num_rows` is determined statically to be non-scalar, or
        negative.
    """
    self._assert_proper_shapes = assert_proper_shapes

    with ops.name_scope(name, values=[multiplier, num_rows]):
      self._multiplier = ops.convert_to_tensor(multiplier, name="multiplier")

      super(LinearOperatorScaledIdentity, self).__init__(
          dtype=self._multiplier.dtype,
          is_non_singular=is_non_singular,
          is_self_adjoint=is_self_adjoint,
          is_positive_definite=is_positive_definite,
          name=name)

      # Shape [B1,...Bb, 1, 1]
      self._multiplier_matrix = array_ops.expand_dims(
          array_ops.expand_dims(self.multiplier, -1), -1)
      self._multiplier_matrix_conj = math_ops.conj(
          self._multiplier_matrix)
      self._abs_multiplier = math_ops.abs(self.multiplier)

      self._num_rows = linear_operator_util.shape_tensor(
          num_rows, name="num_rows")
      self._num_rows_static = tensor_util.constant_value(self._num_rows)
      self._check_num_rows_possibly_add_asserts()
      self._num_rows_cast_to_dtype = math_ops.cast(self._num_rows, self.dtype)
      self._num_rows_cast_to_real_dtype = math_ops.cast(
          self._num_rows, self.dtype.real_dtype)
Example #30
0
 def __init__(self, df, validate_args=False, allow_nan_stats=True,
              name="Chi2WithAbsDf"):
   with ops.name_scope(name, values=[df]) as ns:
     super(Chi2WithAbsDf, self).__init__(
         df=math_ops.floor(math_ops.abs(df)),
         validate_args=validate_args,
         allow_nan_stats=allow_nan_stats,
         name=ns)
Example #31
0
def _pairwise_comparison(sorted_labels,
                         sorted_logits,
                         sorted_weights,
                         lambda_weight=None):
  r"""Returns pairwise comparison `Tensor`s.

  Given a list of n items, the labels of graded relevance l_i and the logits
  s_i, we sort the items in a list based on s_i and obtain ranks r_i. We form
  n^2 pairs of items. For each pair, we have the following:

                        /
                        | 1   if l_i > l_j
  * `pairwise_labels` = |
                        | 0   if l_i <= l_j
                        \
  * `pairwise_logits` = s_i - s_j
                         /
                         | 0              if l_i <= l_j,
  * `pairwise_weights` = | |l_i - l_j|    if lambda_weight is None,
                         | lambda_weight  otherwise.
                         \

  The `sorted_weights` is item-wise and is applied non-symmetrically to update
  pairwise_weights as
    pairwise_weights(i, j) = w_i * pairwise_weights(i, j).
  This effectively applies to all pairs with l_i > l_j. Note that it is actually
  symmetric when `sorted_weights` are constant per list, i.e., listwise weights.

  Args:
    sorted_labels: A `Tensor` with shape [batch_size, list_size] of labels
      sorted.
    sorted_logits: A `Tensor` with shape [batch_size, list_size] of logits
      sorted.
    sorted_weights: A `Tensor` with shape [batch_size, list_size] of item-wise
      weights sorted.
    lambda_weight: A `_LambdaWeight` object.

  Returns:
    A tuple of (pairwise_labels, pairwise_logits, pairwise_weights) with each
    having the shape [batch_size, list_size, list_size].
  """
  # Compute the difference for all pairs in a list. The output is a Tensor with
  # shape [batch_size, list_size, list_size] where the entry [-1, i, j] stores
  # the information for pair (i, j).
  pairwise_label_diff = array_ops.expand_dims(
      sorted_labels, 2) - array_ops.expand_dims(sorted_labels, 1)
  pairwise_logits = array_ops.expand_dims(
      sorted_logits, 2) - array_ops.expand_dims(sorted_logits, 1)
  pairwise_labels = math_ops.to_float(math_ops.greater(pairwise_label_diff, 0))
  is_label_valid = utils.is_label_valid(sorted_labels)
  valid_pair = math_ops.logical_and(
      array_ops.expand_dims(is_label_valid, 2),
      array_ops.expand_dims(is_label_valid, 1))
  # Only keep the case when l_i > l_j.
  pairwise_weights = pairwise_labels * math_ops.to_float(valid_pair)
  # Apply the item-wise weights along l_i.
  pairwise_weights *= array_ops.expand_dims(sorted_weights, 2)
  if lambda_weight is not None:
    pairwise_weights *= lambda_weight.pair_weights(sorted_labels)
  else:
    pairwise_weights *= math_ops.abs(pairwise_label_diff)
  pairwise_weights = array_ops.stop_gradient(
      pairwise_weights, name='weights_stop_gradient')
  return pairwise_labels, pairwise_logits, pairwise_weights
Example #32
0
 def _loss(logits):
   """The loss of pairwise logits with l_i > l_j."""
   # The following is the same as log(1 + exp(-pairwise_logits)).
   return nn_ops.relu(-logits) + math_ops.log1p(
       math_ops.exp(-math_ops.abs(logits)))
Example #33
0
def _tf_abs(x):
    return math_ops.abs(x)
 def _batch_sqrt_log_det(self):
   # Here we compute the Cholesky factor of "C", then pass the result on.
   abs_diag_chol_c = math_ops.abs(array_ops.matrix_diag_part(
       self._chol_capacitance(batch_mode=True)))
   return self._sqrt_log_det_core(abs_diag_chol_c)
Example #35
0
 def mapemae(y_true, y_pred):
     mae = math_ops.abs(y_true - y_pred)
     mape = mae / K.clip(math_ops.abs(y_true), K.epsilon(), None)
     return scale * K.mean(mape + mae / mae_div, axis=-1)
def _my_mae(y_true, y_pred):
  return keras.backend.mean(math_ops.abs(y_pred - y_true), axis=-1)
Example #37
0
def norm(tensor,
         ord='euclidean',
         axis=None,
         keepdims=None,
         name=None,
         keep_dims=None):
    r"""Computes the norm of vectors, matrices, and tensors.

  This function can compute several different vector norms (the 1-norm, the
  Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and
  matrix norms (Frobenius, 1-norm, 2-norm and inf-norm).

  Args:
    tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128`
    ord: Order of the norm. Supported values are 'fro', 'euclidean',
      `1`, `2`, `np.inf` and any positive real number yielding the corresponding
      p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if
      `tensor` is a matrix and equivalent to 2-norm for vectors.
      Some restrictions apply:
        a) The Frobenius norm `fro` is not defined for vectors,
        b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`,
           `2`, `np.inf` are supported.
      See the description of `axis` on how to compute norms for a batch of
      vectors or matrices stored in a tensor.
    axis: If `axis` is `None` (the default), the input is considered a vector
      and a single vector norm is computed over the entire set of values in the
      tensor, i.e. `norm(tensor, ord=ord)` is equivalent to
      `norm(reshape(tensor, [-1]), ord=ord)`.
      If `axis` is a Python integer, the input is considered a batch of vectors,
      and `axis` determines the axis in `tensor` over which to compute vector
      norms.
      If `axis` is a 2-tuple of Python integers it is considered a batch of
      matrices and `axis` determines the axes in `tensor` over which to compute
      a matrix norm.
      Negative indices are supported. Example: If you are passing a tensor that
      can be either a matrix or a batch of matrices at runtime, pass
      `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
      computed.
    keepdims: If True, the axis indicated in `axis` are kept with size 1.
      Otherwise, the dimensions in `axis` are removed from the output shape.
    name: The name of the op.
    keep_dims: Deprecated alias for `keepdims`.

  Returns:
    output: A `Tensor` of the same type as tensor, containing the vector or
      matrix norms. If `keepdims` is True then the rank of output is equal to
      the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
      if `axis` is an integer, the rank of `output` is one less than the rank
      of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
      than the rank of `tensor`.

  Raises:
    ValueError: If `ord` or `axis` is invalid.

  @compatibility(numpy)
  Mostly equivalent to numpy.linalg.norm.
  Not supported: ord <= 0, 2-norm for matrices, nuclear norm.
  Other differences:
    a) If axis is `None`, treats the flattened `tensor` as a vector
     regardless of rank.
    b) Explicitly supports 'euclidean' norm as the default, including for
     higher order tensors.
  @end_compatibility
  """
    keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
                                                      'keep_dims', keep_dims)
    if keepdims is None:
        keepdims = False

    is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list))
                      and len(axis) == 2)
    if is_matrix_norm:
        axis = tuple(axis)
        if (not isinstance(axis[0], int) or not isinstance(axis[1], int)
                or axis[0] == axis[1]):
            raise ValueError(
                "'axis' must be None, an integer, or a tuple of 2 unique integers"
            )
        supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf]
        if ord not in supported_matrix_norms:
            raise ValueError(
                "'ord' must be a supported matrix norm in %s, got %s" %
                (supported_matrix_norms, ord))
    else:
        if not (isinstance(axis, int) or axis is None):
            raise ValueError(
                "'axis' must be None, an integer, or a tuple of 2 unique integers"
            )

        supported_vector_norms = ['euclidean', 1, 2, np.inf]
        if (not np.isreal(ord)
                or ord <= 0) and ord not in supported_vector_norms:
            raise ValueError("'ord' must be a supported vector norm, got %s" %
                             ord)
        if axis is not None:
            axis = (axis, )

    with ops.name_scope(name, 'norm', [tensor]):
        tensor = ops.convert_to_tensor(tensor)

        if ord in ['fro', 'euclidean', 2, 2.0]:
            if is_matrix_norm and ord in [2, 2.0]:
                rank = array_ops.rank(tensor)
                positive_axis = functional_ops.map_fn(
                    lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda:
                                                    i + rank),
                    ops.convert_to_tensor(axis))
                axes = math_ops.range(rank)
                perm_before = array_ops.concat([
                    array_ops.setdiff1d(axes, positive_axis)[0], positive_axis
                ],
                                               axis=0)
                perm_after = functional_ops.map_fn(
                    lambda i: math_ops.cast(array_ops.squeeze(
                        array_ops.where(math_ops.equal(perm_before, i))),
                                            dtype=dtypes.int32), axes)
                permed = array_ops.transpose(tensor, perm=perm_before)
                matrix_2_norm = array_ops.expand_dims(math_ops.reduce_max(
                    math_ops.abs(
                        gen_linalg_ops.svd(permed, compute_uv=False)[0]),
                    axis=-1,
                    keepdims=True),
                                                      axis=-1)
                result = array_ops.transpose(matrix_2_norm, perm=perm_after)
            else:
                result = math_ops.sqrt(
                    math_ops.reduce_sum(tensor * math_ops.conj(tensor),
                                        axis,
                                        keepdims=True))
        else:
            result = math_ops.abs(tensor)
            if ord == 1:
                sum_axis = None if axis is None else axis[0]
                result = math_ops.reduce_sum(result, sum_axis, keepdims=True)
                if is_matrix_norm:
                    result = math_ops.reduce_max(result,
                                                 axis[-1],
                                                 keepdims=True)
            elif ord == np.inf:
                if is_matrix_norm:
                    result = math_ops.reduce_sum(result,
                                                 axis[1],
                                                 keepdims=True)
                max_axis = None if axis is None else axis[0]
                result = math_ops.reduce_max(result, max_axis, keepdims=True)
            else:
                # General p-norms (positive p only)
                result = math_ops.pow(
                    math_ops.reduce_sum(math_ops.pow(result, ord),
                                        axis,
                                        keepdims=True), 1.0 / ord)
        if not keepdims:
            result = array_ops.squeeze(result, axis)
        return result
Example #38
0
def mean_absolute_percentage_error(y_true, y_pred):
    diff = math_ops.abs(
        (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None))
    return 100. * K.mean(diff, axis=-1)
 def _log_normalization(self):
     return (math_ops.log(math_ops.abs(self.scale)) +
             0.5 * math_ops.log(self.df) + 0.5 * np.log(np.pi) +
             math_ops.lgamma(0.5 * self.df) -
             math_ops.lgamma(0.5 * (self.df + 1.)))
 def _prob(self, x):
     return math_ops.cast(math_ops.abs(x - self.loc) <= self._slack,
                          dtype=self.dtype)
 def f(x):
     # __pow__ can't handle negative base, so we use `abs` here.
     rt = math_ops.abs(x)**(1.0 / 3)
     return array_ops.where_v2(x < 0, -rt, rt)
Example #42
0
def weighted_cel(
    _sentinel=None,
    labels=None,
    logits=None,
    bound=2.0,
    name=None):
  """
  Inspired strongly by tensorflow :sigmoid_cross_entropy_with_logits
  https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/ops/nn_impl.py#L196-L244

  Version with weighted CEL(Cross-Entropy Loss)
  https://arxiv.org/pdf/1705.02315

  Starting from CEL from TF
  For brevity, let `x = logits`, `z = labels`.  The logistic loss is
        z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
      = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
      = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
      = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))                   (4)
      = (1 - z) * x + log(1 + exp(-x))
      = x - x * z + log(1 + exp(-x))
  For x < 0, to avoid overflow in exp(-x), we reformulate the above
        x - x * z + log(1 + exp(-x))
      = log(exp(x)) - x * z + log(1 + exp(-x))
      = - x * z + log(1 + exp(x))
  Hence, to ensure stability and avoid overflow, the implementation uses this
  equivalent formulation
      max(x, 0) - x * z + log(1 + exp(-abs(x)))

  weighted CEL:
  For x > 0 (from (4)):
   = B_p * [z * -log( 1 + exp(-x) )] + B_n * [(1 - z) * (x + log(1 + exp(-x)))]
  For x < 0 (from (4)):
   = B_p * [z * log( exp(x) / (1 + exp(x)) )] + B_n * [(1 - z) *(x + log( exp(x) / (1 + exp(x))))]
   = B_p * [z * log(1 + exp(x)) - x] + B_n * [(1 - z) * log( (1 + exp(x))))]
  Hence, to ensure stability and avoid overflow, the implementation uses this
  equivalent formulation
   = B_p * [z * log(1 + exp(-x)) + min(0,x) ] + B_n * [(1 - z) * (max(0,x) + log( (1 + exp(-x)))))]

  Args:
    _sentinel: Used to prevent positional parameters. Internal, do not use.
    labels: A `Tensor` of the same type and shape as `logits`.
    logits: A `Tensor` of type `float32` or `float64`.
    name: A name for the operation (optional).
  Returns:
    A `Tensor` of the same shape as `logits` with the componentwise
    logistic losses.
  Raises:
    ValueError: If `logits` and `labels` do not have the same shape.
  """
  nn_ops._ensure_xent_args("sigmoid_cross_entropy_with_logits", _sentinel,
                           labels, logits)

  with ops.name_scope(name, "weighted_logistic_loss", [logits, labels]) as name:
    logits = ops.convert_to_tensor(logits, name="logits")
    labels = ops.convert_to_tensor(labels, name="labels")
    try:
      labels.get_shape().merge_with(logits.get_shape())
    except ValueError:
      raise ValueError("logits and labels must have the same shape (%s vs %s)" %
                       (logits.get_shape(), labels.get_shape()))

    cnt_one = tf.cast(tf.reduce_sum(labels),tf.float32)
    cnt_zero = tf.cast(tf.size(logits),tf.float32) - cnt_one
    beta_p = tf.cast((cnt_one + cnt_zero) / cnt_one, tf.float32)
    beta_n = tf.cast((cnt_one + cnt_zero) / cnt_zero, tf.float32)
    beta_n = math_ops.minimum(bound, beta_n)
    beta_p = math_ops.minimum(bound, beta_p)
    zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
    cond = (logits >= zeros)
    relu_logits = array_ops.where(cond, logits, zeros)
    not_relu_logits = array_ops.where(cond, zeros, logits)
    abs_logits = math_ops.abs(logits)
    A = beta_p * (labels * (math_ops.log1p(math_ops.exp(-abs_logits)) - not_relu_logits))
    B = beta_n * ((1.0-labels) * (relu_logits + math_ops.log1p(math_ops.exp(-abs_logits))))
    return math_ops.add(A, B, name=name)
 def _cdf(self, x):
     # Take Abs(scale) to make subsequent where work correctly.
     y = (x - self.loc) / math_ops.abs(self.scale)
     x_t = self.df / (y**2. + self.df)
     neg_cdf = 0.5 * math_ops.betainc(0.5 * self.df, 0.5, x_t)
     return array_ops.where(math_ops.less(y, 0.), neg_cdf, 1. - neg_cdf)
Example #44
0
  def pair_weights(self, sorted_labels):
    """See `_LambdaWeight`."""
    with ops.name_scope(None, 'dcg_lambda_weight', (sorted_labels,)):
      valid_pair, sorted_labels = self._get_valid_pairs_and_clean_labels(
          sorted_labels)
      gain = self._gain_fn(sorted_labels)
      if self._normalized:
        gain *= self._inverse_max_dcg(sorted_labels)
      pair_gain = array_ops.expand_dims(gain, 2) - array_ops.expand_dims(
          gain, 1)
      pair_gain *= math_ops.to_float(valid_pair)

      list_size = array_ops.shape(sorted_labels)[1]
      topn = self._topn or list_size
      rank = math_ops.range(list_size) + 1

      def _discount_for_relative_rank_diff():
        """Rank-based discount in the LambdaLoss paper."""
        # The LambdaLoss is not well defined when topn is active and topn <
        # list_size. We cap the rank of examples to topn + 1 so that the rank
        # differene is capped to topn. This is just a convenient upperbound
        # when topn is active. We need to revisit this.
        capped_rank = array_ops.where(
            math_ops.greater(rank, topn),
            array_ops.ones_like(rank) * (topn + 1), rank)
        rank_diff = math_ops.to_float(
            math_ops.abs(
                array_ops.expand_dims(capped_rank, 1) -
                array_ops.expand_dims(capped_rank, 0)))
        pair_discount = array_ops.where(
            math_ops.greater(rank_diff, 0),
            math_ops.abs(
                self._rank_discount_fn(rank_diff) -
                self._rank_discount_fn(rank_diff + 1)),
            array_ops.zeros_like(rank_diff))
        return pair_discount

      def _discount_for_absolute_rank():
        """Standard discount in the LambdaMART paper."""
        # When the rank discount is (1 / rank) for example, the discount is
        # |1 / r_i - 1 / r_j|. When i or j > topn, the discount becomes 0.
        rank_discount = array_ops.where(
            math_ops.greater(rank, topn),
            array_ops.zeros_like(math_ops.to_float(rank)),
            self._rank_discount_fn(math_ops.to_float(rank)))
        pair_discount = math_ops.abs(
            array_ops.expand_dims(rank_discount, 1) -
            array_ops.expand_dims(rank_discount, 0))
        return pair_discount

      u = _discount_for_relative_rank_diff()
      v = _discount_for_absolute_rank()
      pair_discount = (
          1. - self._smooth_fraction) * u + self._smooth_fraction * v
      pair_weight = math_ops.abs(pair_gain) * pair_discount
      if self._topn is None:
        return pair_weight
      pair_mask = math_ops.logical_or(
          array_ops.expand_dims(math_ops.less_equal(rank, self._topn), 1),
          array_ops.expand_dims(math_ops.less_equal(rank, self._topn), 0))
      return pair_weight * math_ops.to_float(pair_mask)
Example #45
0
 def testChi2WithAbsDf(self):
     with self.test_session():
         df_v = np.array([-1.3, -3.2, 5], dtype=np.float64)
         chi2 = chi2_lib.Chi2WithAbsDf(df=df_v)
         self.assertAllClose(
             math_ops.floor(math_ops.abs(df_v)).eval(), chi2.df.eval())
 def _log_abs_determinant(self):
   return math_ops.reduce_sum(
       math_ops.log(math_ops.abs(self._diag)), reduction_indices=[-1])
 def _assert_non_singular(self):
   return check_ops.assert_positive(
       math_ops.abs(self.multiplier), message="LinearOperator was singular")
Example #48
0
 def log_huber(x, m):
   if math_ops.abs(x) <= m:
     return x**2
   else:
     return m**2 * (1 - 2 * math_ops.log(m) + math_ops.log(x**2))
Example #49
0
 def __call__(self, x):
     return self.l1 * math_ops.reduce_sum(math_ops.abs(x))
Example #50
0
 def _partial_cycle_consistency_loss(data, reconstructed_data):
   # Following the original implementation
   # https://github.com/junyanz/CycleGAN/blob/master/models/cycle_gan_model.lua
   # use L1-norm of pixel-wise error normalized by data size so that
   # `cycle_loss_weight` can be specified independent of image size.
   return math_ops.reduce_mean(math_ops.abs(data - reconstructed_data))
def add_image_comparison_summaries(gan_model,
                                   num_comparisons=2,
                                   display_diffs=False):
    """Adds image summaries to compare triplets of images.

  The first image is the generator input, the second is the generator output,
  and the third is the real data. This style of comparison is useful for
  image translation problems, where the generator input is a corrupted image,
  the generator output is the reconstruction, and the real data is the target.

  Args:
    gan_model: A GANModel tuple.
    num_comparisons: The number of image triplets to display.
    display_diffs: Also display the difference between generated and target.

  Raises:
    ValueError: If real data, generated data, and generator inputs aren't
      images.
    ValueError: If the generator input, real, and generated data aren't all the
      same size.
  """
    if isinstance(gan_model, namedtuples.CycleGANModel):
        saved_params = locals()
        saved_params.pop('gan_model', None)
        with ops.name_scope('cyclegan_x2y_image_comparison_summaries'):
            add_image_comparison_summaries(gan_model.model_x2y, **saved_params)
        with ops.name_scope('cyclegan_y2x_image_comparison_summaries'):
            add_image_comparison_summaries(gan_model.model_y2x, **saved_params)
        return

    _assert_is_image(gan_model.generator_inputs)
    _assert_is_image(gan_model.generated_data)
    _assert_is_image(gan_model.real_data)

    gan_model.generated_data.shape.assert_is_compatible_with(
        gan_model.generator_inputs.shape)
    gan_model.real_data.shape.assert_is_compatible_with(
        gan_model.generated_data.shape)

    image_list = []
    image_list.extend(
        array_ops.unstack(gan_model.generator_inputs[:num_comparisons]))
    image_list.extend(
        array_ops.unstack(gan_model.generated_data[:num_comparisons]))
    image_list.extend(array_ops.unstack(gan_model.real_data[:num_comparisons]))
    if display_diffs:
        generated_list = array_ops.unstack(
            gan_model.generated_data[:num_comparisons])
        real_list = array_ops.unstack(gan_model.real_data[:num_comparisons])
        diffs = [
            math_ops.abs(
                math_ops.to_float(generated) - math_ops.to_float(real))
            for generated, real in zip(generated_list, real_list)
        ]
        image_list.extend(diffs)

    # Reshape image and display.
    summary.image('image_comparison',
                  eval_utils.image_reshaper(image_list,
                                            num_cols=num_comparisons),
                  max_outputs=1)
  def __init__(self,
               num_rows,
               multiplier,
               is_non_singular=None,
               is_self_adjoint=None,
               is_positive_definite=None,
               is_square=True,
               assert_proper_shapes=False,
               name="LinearOperatorScaledIdentity"):
    r"""Initialize a `LinearOperatorScaledIdentity`.

    The `LinearOperatorScaledIdentity` is initialized with `num_rows`, which
    determines the size of each identity matrix, and a `multiplier`,
    which defines `dtype`, batch shape, and scale of each matrix.

    This operator is able to broadcast the leading (batch) dimensions.

    Args:
      num_rows:  Scalar non-negative integer `Tensor`.  Number of rows in the
        corresponding identity matrix.
      multiplier:  `Tensor` of shape `[B1,...,Bb]`, or `[]` (a scalar).
      is_non_singular:  Expect that this operator is non-singular.
      is_self_adjoint:  Expect that this operator is equal to its hermitian
        transpose.
      is_positive_definite:  Expect that this operator is positive definite,
        meaning the quadratic form `x^H A x` has positive real part for all
        nonzero `x`.  Note that we do not require the operator to be
        self-adjoint to be positive-definite.  See:
        https://en.wikipedia.org/wiki/Positive-definite_matrix\
            #Extension_for_non_symmetric_matrices
      is_square:  Expect that this operator acts like square [batch] matrices.
      assert_proper_shapes:  Python `bool`.  If `False`, only perform static
        checks that initialization and method arguments have proper shape.
        If `True`, and static checks are inconclusive, add asserts to the graph.
      name: A name for this `LinearOperator`

    Raises:
      ValueError:  If `num_rows` is determined statically to be non-scalar, or
        negative.
    """
    self._assert_proper_shapes = assert_proper_shapes

    if not is_square:
      raise ValueError("A ScaledIdentity operator is always square.")

    with ops.name_scope(name, values=[multiplier, num_rows]):
      self._multiplier = ops.convert_to_tensor(multiplier, name="multiplier")

      super(LinearOperatorScaledIdentity, self).__init__(
          dtype=self._multiplier.dtype,
          is_non_singular=is_non_singular,
          is_self_adjoint=is_self_adjoint,
          is_positive_definite=is_positive_definite,
          is_square=is_square,
          name=name)

      # Shape [B1,...Bb, 1, 1]
      self._multiplier_matrix = array_ops.expand_dims(
          array_ops.expand_dims(self.multiplier, -1), -1)
      self._multiplier_matrix_conj = math_ops.conj(self._multiplier_matrix)
      self._abs_multiplier = math_ops.abs(self.multiplier)

      self._num_rows = linear_operator_util.shape_tensor(
          num_rows, name="num_rows")
      self._num_rows_static = tensor_util.constant_value(self._num_rows)
      self._check_num_rows_possibly_add_asserts()
      self._num_rows_cast_to_dtype = math_ops.cast(self._num_rows, self.dtype)
      self._num_rows_cast_to_real_dtype = math_ops.cast(self._num_rows,
                                                        self.dtype.real_dtype)
 def _log_abs_determinant(self):
     axis = [-(i + 1) for i in range(self.block_depth)]
     lad = math_ops.reduce_sum(math_ops.log(math_ops.abs(self.spectrum)),
                               axis=axis)
     return math_ops.cast(lad, self.dtype)
def matrix_exponential(input, name=None):  # pylint: disable=redefined-builtin
    r"""Computes the matrix exponential of one or more square matrices.

  exp(A) = \sum_{n=0}^\infty A^n/n!

  The exponential is computed using a combination of the scaling and squaring
  method and the Pade approximation. Details can be found in:
  Nicholas J. Higham, "The scaling and squaring method for the matrix
  exponential revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005.

  The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
  form square matrices. The output is a tensor of the same shape as the input
  containing the exponential for all input submatrices `[..., :, :]`.

  Args:
    input: A `Tensor`. Must be `float16`, `float32`, `float64`, `complex64`,
      or `complex128` with shape `[..., M, M]`.
    name:  A name to give this `Op` (optional).

  Returns:
    the matrix exponential of the input.

  Raises:
    ValueError: An unsupported type is provided as input.

  @compatibility(scipy)
  Equivalent to scipy.linalg.expm
  @end_compatibility
  """
    with ops.name_scope(name, 'matrix_exponential', [input]):
        matrix = ops.convert_to_tensor(input, name='input')
        if matrix.shape[-2:] == [0, 0]:
            return matrix
        batch_shape = matrix.shape[:-2]
        if not batch_shape.is_fully_defined():
            batch_shape = array_ops.shape(matrix)[:-2]

        # reshaping the batch makes the where statements work better
        matrix = array_ops.reshape(
            matrix,
            array_ops.concat(([-1], array_ops.shape(matrix)[-2:]), axis=0))
        l1_norm = math_ops.reduce_max(math_ops.reduce_sum(
            math_ops.abs(matrix),
            axis=array_ops.size(array_ops.shape(matrix)) - 2),
                                      axis=-1)
        const = lambda x: constant_op.constant(x, l1_norm.dtype)

        def _nest_where(vals, cases):
            assert len(vals) == len(cases) - 1
            if len(vals) == 1:
                return array_ops.where(math_ops.less(l1_norm, const(vals[0])),
                                       cases[0], cases[1])
            else:
                return array_ops.where(math_ops.less(l1_norm, const(vals[0])),
                                       cases[0],
                                       _nest_where(vals[1:], cases[1:]))

        if matrix.dtype in [dtypes.float16, dtypes.float32, dtypes.complex64]:
            maxnorm = const(3.925724783138660)
            squarings = math_ops.maximum(
                math_ops.floor(
                    math_ops.log(l1_norm / maxnorm) /
                    math_ops.log(const(2.0))), 0)
            u3, v3 = _matrix_exp_pade3(matrix)
            u5, v5 = _matrix_exp_pade5(matrix)
            u7, v7 = _matrix_exp_pade7(matrix / math_ops.pow(
                constant_op.constant(2.0, dtype=matrix.dtype),
                math_ops.cast(squarings, matrix.dtype))[..., array_ops.newaxis,
                                                        array_ops.newaxis])
            conds = (4.258730016922831e-001, 1.880152677804762e+000)
            u = _nest_where(conds, (u3, u5, u7))
            v = _nest_where(conds, (v3, v5, v7))
        elif matrix.dtype in [dtypes.float64, dtypes.complex128]:
            maxnorm = const(5.371920351148152)
            squarings = math_ops.maximum(
                math_ops.floor(
                    math_ops.log(l1_norm / maxnorm) /
                    math_ops.log(const(2.0))), 0)
            u3, v3 = _matrix_exp_pade3(matrix)
            u5, v5 = _matrix_exp_pade5(matrix)
            u7, v7 = _matrix_exp_pade7(matrix)
            u9, v9 = _matrix_exp_pade9(matrix)
            u13, v13 = _matrix_exp_pade13(matrix / math_ops.pow(
                constant_op.constant(2.0, dtype=matrix.dtype),
                math_ops.cast(squarings, matrix.dtype))[..., array_ops.newaxis,
                                                        array_ops.newaxis])
            conds = (1.495585217958292e-002, 2.539398330063230e-001,
                     9.504178996162932e-001, 2.097847961257068e+000)
            u = _nest_where(conds, (u3, u5, u7, u9, u13))
            v = _nest_where(conds, (v3, v5, v7, v9, v13))
        else:
            raise ValueError(
                'tf.linalg.expm does not support matrices of type %s' %
                matrix.dtype)
        numer = u + v
        denom = -u + v
        result = linalg_ops.matrix_solve(denom, numer)
        max_squarings = math_ops.reduce_max(squarings)

        i = const(0.0)
        c = lambda i, r: math_ops.less(i, max_squarings)

        def b(i, r):
            return i + 1, array_ops.where(math_ops.less(i, squarings),
                                          math_ops.matmul(r, r), r)

        _, result = control_flow_ops.while_loop(c, b, [i, result])
        if not matrix.shape.is_fully_defined():
            return array_ops.reshape(
                result,
                array_ops.concat((batch_shape, array_ops.shape(result)[-2:]),
                                 axis=0))
        return array_ops.reshape(result,
                                 batch_shape.concatenate(result.shape[-2:]))
Example #55
0
def mean_absolute_percentage_error(y_true, y_pred):  # pylint: disable=missing-docstring
  y_pred = ops.convert_to_tensor(y_pred)
  y_true = math_ops.cast(y_true, y_pred.dtype)
  diff = math_ops.abs(
      (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None))
  return 100. * K.mean(diff, axis=-1)
Example #56
0
 def _log_abs_determinant(self):
     return self._num_rows_cast_to_real_dtype * math_ops.log(
         math_ops.abs(self.multiplier))
Example #57
0
 def _iter_condition(i, mat_m, _):
     return math_ops.logical_and(
         i < iter_count,
         math_ops.reduce_max(math_ops.abs(mat_m - identity)) > epsilon)
Example #58
0
def mean_absolute_error(y_true, y_pred):
    return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
Example #59
0
def mean_absolute_error(y_true, y_pred):
  y_pred = ops.convert_to_tensor(y_pred)
  y_true = math_ops.cast(y_true, y_pred.dtype)
  return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
    def __init__(self,
                 loc,
                 atol=None,
                 rtol=None,
                 is_vector=False,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="_BaseDeterministic"):
        """Initialize a batch of `_BaseDeterministic` distributions.

    The `atol` and `rtol` parameters allow for some slack in `pmf`, `cdf`
    computations, e.g. due to floating-point error.

    ```
    pmf(x; loc)
      = 1, if Abs(x - loc) <= atol + rtol * Abs(loc),
      = 0, otherwise.
    ```

    Args:
      loc: Numeric `Tensor`.  The point (or batch of points) on which this
        distribution is supported.
      atol:  Non-negative `Tensor` of same `dtype` as `loc` and broadcastable
        shape.  The absolute tolerance for comparing closeness to `loc`.
        Default is `0`.
      rtol:  Non-negative `Tensor` of same `dtype` as `loc` and broadcastable
        shape.  The relative tolerance for comparing closeness to `loc`.
        Default is `0`.
      is_vector:  Python `bool`.  If `True`, this is for `VectorDeterministic`,
        else `Deterministic`.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError:  If `loc` is a scalar.
    """
        parameters = dict(locals())
        with ops.name_scope(name, values=[loc, atol, rtol]) as name:
            loc = ops.convert_to_tensor(loc, name="loc")
            if is_vector and validate_args:
                msg = "Argument loc must be at least rank 1."
                if loc.get_shape().ndims is not None:
                    if loc.get_shape().ndims < 1:
                        raise ValueError(msg)
                else:
                    loc = control_flow_ops.with_dependencies(
                        [check_ops.assert_rank_at_least(loc, 1, message=msg)],
                        loc)
            self._loc = loc

            super(_BaseDeterministic, self).__init__(
                dtype=self._loc.dtype,
                reparameterization_type=distribution.NOT_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                graph_parents=[self._loc],
                name=name)

            self._atol = self._get_tol(atol)
            self._rtol = self._get_tol(rtol)
            # Avoid using the large broadcast with self.loc if possible.
            if rtol is None:
                self._slack = self.atol
            else:
                self._slack = self.atol + self.rtol * math_ops.abs(self.loc)