Example #1
0
  def _apply_sparse(self, grad, var):
    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
    alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
    beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)

    m = self.get_slot(var, 'm')
    m_t = state_ops.assign(
        m, (m * beta_t) + (grad * (1 - beta_t)), use_locking=self._use_locking)

    sign_g = ops.IndexedSlices(
        math_ops.sign(grad.values), grad.indices, dense_shape=grad.dense_shape)
    sign_gm = ops.IndexedSlices(
        array_ops.gather(math_ops.sign(m_t), sign_g.indices) * sign_g.values,
        sign_g.indices,
        dense_shape=sign_g.dense_shape)

    sign_decayed = math_ops.cast(
        self._sign_decay_t, var.dtype.base_dtype)
    multiplier_values = alpha_t + sign_decayed * sign_gm.values
    multiplier = ops.IndexedSlices(
        multiplier_values, sign_gm.indices, dense_shape=sign_gm.dense_shape)

    final_update = ops.IndexedSlices(
        lr_t * multiplier.values * grad.values,
        multiplier.indices,
        dense_shape=multiplier.dense_shape)

    var_update = state_ops.scatter_sub(
        var,
        final_update.indices,
        final_update.values,
        use_locking=self._use_locking)

    return control_flow_ops.group(* [var_update, m_t])
def random_sign_uniform(shape,
                        minval=None,
                        maxval=None,
                        dtype=dtypes.float32,
                        seed=None):
  """Tensor with (possibly complex) random entries from a "sign Uniform".

  Letting `Z` be a random variable equal to `-1` and `1` with equal probability,
  Samples from this `Op` are distributed like

  ```
  Z * X, where X ~ Uniform[minval, maxval], if dtype is real,
  Z * (X + iY),  where X, Y ~ Uniform[minval, maxval], if dtype is complex.
  ```

  Args:
    shape:  `TensorShape` or Python list.  Shape of the returned tensor.
    minval:  `0-D` `Tensor` giving the minimum values.
    maxval:  `0-D` `Tensor` giving the maximum values.
    dtype:  `TensorFlow` `dtype` or Python dtype
    seed:  Python integer seed for the RNG.

  Returns:
    `Tensor` with desired shape and dtype.
  """
  dtype = dtypes.as_dtype(dtype)

  with ops.name_scope("random_sign_uniform"):
    unsigned_samples = random_uniform(
        shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed)
    if seed is not None:
      seed += 12
    signs = math_ops.sign(
        random_ops.random_uniform(shape, minval=-1., maxval=1., seed=seed))
    return unsigned_samples * math_ops.cast(signs, unsigned_samples.dtype)
Example #3
0
  def __call__(self, shape, dtype=None, partition_info=None):
    if dtype is None:
      dtype = self.dtype
    # Check the shape
    if len(shape) < 3 or len(shape) > 5:
      raise ValueError("The tensor to initialize must be at least "
                       "three-dimensional and at most five-dimensional")

    if shape[-2] > shape[-1]:
      raise ValueError("In_filters cannot be greater than out_filters.")

    # Generate a random matrix
    a = random_ops.random_normal([shape[-1], shape[-1]],
                                 dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    d = array_ops.diag_part(r)
    q *= math_ops.sign(d)
    q = q[:shape[-2], :]
    q *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype))
    if len(shape) == 3:
      weight = array_ops.scatter_nd([[(shape[0]-1)//2]],
                                    array_ops.expand_dims(q, 0), shape)
    elif len(shape) == 4:
      weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2]],
                                    array_ops.expand_dims(q, 0), shape)
    else:
      weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2,
                                      (shape[2]-1)//2]],
                                    array_ops.expand_dims(q, 0), shape)
    return weight
Example #4
0
def _Solve(a, b, c):
    """Return solution of a quadratic minimization.

  The optimization equation is:
       f(a, b, c) = argmin_w{1/2 * a * w^2 + b * w + c * |w|}
  we get optimal solution w*:
       w* = -(b - sign(b)*c)/a if |b| > c else w* = 0

  REQUIRES: Dimensionality of a and b must be same

  Args:
    a: A Tensor
    b: A Tensor
    c: A Tensor with one element.

  Returns:
    A Tensor w, which is solution for the equation
  """
    with ops.name_scope("solve_" + b.op.name):
        c = ops.convert_to_tensor(c)
        k = array_ops.fill(array_ops.shape(b), c)
        zero_t = array_ops.zeros(array_ops.shape(b), dtype=b.dtype)
        w = (c * math_ops.sign(b) - b) / a
        w = math_ops.select(math_ops.less(math_ops.abs(b), k), zero_t, w)
        return w
Example #5
0
  def sample_n(self, n, seed=None, name="sample_n"):
    """Sample `n` observations from the Laplace Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by broadcasting the parameters.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self._loc, self._scale, n]):
        n = ops.convert_to_tensor(n)
        n_val = tensor_util.constant_value(n)
        shape = array_ops.concat(0, ([n], self.batch_shape()))
        # Sample uniformly-at-random from the open-interval (-1, 1).
        uniform_samples = random_ops.random_uniform(
            shape=shape,
            minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                                self.dtype.as_numpy_dtype(0.)),
            maxval=self.dtype.as_numpy_dtype(1.),
            dtype=self.dtype,
            seed=seed)

        # Provide some hints to shape inference
        inferred_shape = tensor_shape.vector(n_val).concatenate(
            self.get_batch_shape())
        uniform_samples.set_shape(inferred_shape)

        return (self._loc - self._scale * math_ops.sign(uniform_samples) *
                math_ops.log(1. - math_ops.abs(uniform_samples)))
Example #6
0
  def __call__(self, shape, dtype=None, partition_info=None):
    if dtype is None:
      dtype = self.dtype
    # Check the shape
    if len(shape) < 2:
      raise ValueError("The tensor to initialize must be "
                       "at least two-dimensional")
    # Flatten the input shape with the last dimension remaining
    # its original shape so it works for conv2d
    num_rows = 1
    for dim in shape[:-1]:
      num_rows *= dim
    num_cols = shape[-1]
    flat_shape = (num_cols, num_rows) if num_rows < num_cols else (num_rows,
                                                                   num_cols)

    # Generate a random matrix
    a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    d = array_ops.diag_part(r)
    q *= math_ops.sign(d)
    if num_rows < num_cols:
      q = array_ops.matrix_transpose(q)
    return self.gain * array_ops.reshape(q, shape)
Example #7
0
  def __call__(self, shape, dtype=dtypes.float32):
    """Returns a tensor object initialized as specified by the initializer.

    Args:
      shape: Shape of the tensor.
      dtype: Optional dtype of the tensor. Only floating point types are
       supported.

    Raises:
      ValueError: If the dtype is not floating point or the input shape is not
       valid.
    """
    dtype = _assert_float_dtype(dtype)
    # Check the shape
    if len(shape) < 2:
      raise ValueError("The tensor to initialize must be "
                       "at least two-dimensional")
    # Flatten the input shape with the last dimension remaining
    # its original shape so it works for conv2d
    num_rows = 1
    for dim in shape[:-1]:
      num_rows *= dim
    num_cols = shape[-1]
    flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))

    # Generate a random matrix
    a = random_ops.random_normal(flat_shape, dtype=dtype, seed=self.seed)
    # Compute the qr factorization
    q, r = gen_linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
    d = array_ops.diag_part(r)
    q *= math_ops.sign(d)
    if num_rows < num_cols:
      q = array_ops.matrix_transpose(q)
    return self.gain * array_ops.reshape(q, shape)
Example #8
0
    def _resource_apply_dense(self, grad, var):
        step, beta1_power, beta2_power = self._get_beta_accumulators()
        beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
        beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)

        if self._initial_total_steps > 0:
            total_steps = math_ops.cast(self._total_steps_t, var.dtype.base_dtype)
            warmup_proportion = math_ops.cast(self._warmup_proportion_t, var.dtype.base_dtype)
            min_lr = math_ops.cast(self._min_lr_t, var.dtype.base_dtype)
            warmup_steps = total_steps * warmup_proportion
            decay_steps = math_ops.maximum(total_steps - warmup_steps, 1)
            decay_rate = (min_lr - lr_t) / decay_steps
            lr_t = tf.where(
                step <= warmup_steps,
                lr_t * (step / warmup_steps),
                lr_t + decay_rate * math_ops.minimum(step - warmup_steps, decay_steps),
            )

        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)

        v = self.get_slot(var, "v")

        if self.clip_gradients:
            clipVal = math_ops.sqrt(
                tf.reduce_sum(v) / (1.0 - beta2_power)) * self.clip_multiplier_t + self.clip_epsilon_t
            grad = clip_ops.clip_by_norm(grad, clipVal)

        sma_inf = 2.0 / (1.0 - beta2_t) - 1.0
        sma_t = sma_inf - 2.0 * step * beta2_power / (1.0 - beta2_power)

        m = self.get_slot(var, "m")

        v_t = state_ops.assign(v, beta2_t * v + (1.0 - beta2_t) * math_ops.square(grad), use_locking=self._use_locking)
        v_corr_t = math_ops.sqrt(v_t / (1.0 - beta2_power)) + epsilon_t
        grad_corr = grad / v_corr_t

        m_t = state_ops.assign(m, beta1_t * m + (1.0 - beta1_t) * grad_corr, use_locking=self._use_locking)
        m_corr_t = m_t / (1.0 - beta1_power)

        r_t = math_ops.sqrt((sma_t - 4.0) / (sma_inf - 4.0) *
                            (sma_t - 2.0) / (sma_inf - 2.0) *
                            sma_inf / sma_t)

        var_t = tf.where(sma_t >= 5.0, r_t * m_corr_t, m_corr_t)

        if var in self.reg_vars:
            if self._initial_weight_decay > 0.0:
                var_t += math_ops.cast(self._weight_decay_t, var.dtype.base_dtype) * var
            if self._L1_decay > 0.0:
                var_t += math_ops.cast(self._L1_decay, var.dtype.base_dtype) * math_ops.sign(var)

        with tf.control_dependencies([var_t]):
            var_update = state_ops.assign_sub(var, lr_t * var_t, use_locking=self._use_locking)

        updates = [var_update, m_t, v_t]
        return control_flow_ops.group(*updates)
Example #9
0
 def test_complex_sign_gradient(self):
     with context.eager_mode():
         x = math_ops.complex(1., 1.)
         with backprop.GradientTape() as t:
             t.watch(x)
             y = math_ops.sign(x)
         self.assertAllClose(t.gradient(y, x),
                             math_ops.complex(0.353553, -0.353553))
Example #10
0
 def call(self, y_true, y_pred):
     y_pred = ops.convert_to_tensor_v2(y_pred)
     y_true = math_ops.cast(y_true, y_pred.dtype)
     quotient = math_ops.divide(y_pred, y_true)
     sign = math_ops.sign(quotient)
     quabs = math_ops.minimum(math_ops.abs(quotient),
                              100000 * math_ops.abs(y_pred))
     quotient = math_ops.exp(10 - 10 * sign) * quabs + 0.000000001
     return 100 * K.mean(math_ops.abs(math_ops.log(quotient)), axis=-1)
 def Test(self):
     np.random.seed(1)
     n = shape_[-1]
     batch_shape = shape_[:-2]
     np_dtype = dtype_.as_numpy_dtype
     a = np.random.uniform(low=-1.0, high=1.0,
                           size=n * n).reshape([n, n]).astype(np_dtype)
     if dtype_.is_complex:
         a += 1j * np.random.uniform(low=-1.0, high=1.0, size=n *
                                     n).reshape([n, n]).astype(np_dtype)
     a += np.conj(a.T)
     a = np.tile(a, batch_shape + (1, 1))
     # Optimal stepsize for central difference is O(epsilon^{1/3}).
     epsilon = np.finfo(np_dtype).eps
     delta = 0.1 * epsilon**(1.0 / 3.0)
     # tolerance obtained by looking at actual differences using
     # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
     if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64):
         tol = 1e-2
     else:
         tol = 1e-7
     with self.session(use_gpu=True):
         tf_a = constant_op.constant(a)
         if compute_v_:
             tf_e, tf_v = linalg_ops.self_adjoint_eig(tf_a)
             # (complex) Eigenvectors are only unique up to an arbitrary phase
             # We normalize the vectors such that the first component has phase 0.
             top_rows = tf_v[..., 0:1, :]
             if tf_a.dtype.is_complex:
                 angle = -math_ops.angle(top_rows)
                 phase = math_ops.complex(math_ops.cos(angle),
                                          math_ops.sin(angle))
             else:
                 phase = math_ops.sign(top_rows)
             tf_v *= phase
             outputs = [tf_e, tf_v]
         else:
             tf_e = linalg_ops.self_adjoint_eigvals(tf_a)
             outputs = [tf_e]
         for b in outputs:
             x_init = np.random.uniform(low=-1.0, high=1.0, size=n *
                                        n).reshape([n, n]).astype(np_dtype)
             if dtype_.is_complex:
                 x_init += 1j * np.random.uniform(
                     low=-1.0, high=1.0, size=n * n).reshape(
                         [n, n]).astype(np_dtype)
             x_init += np.conj(x_init.T)
             x_init = np.tile(x_init, batch_shape + (1, 1))
             theoretical, numerical = gradient_checker.compute_gradient(
                 tf_a,
                 tf_a.get_shape().as_list(),
                 b,
                 b.get_shape().as_list(),
                 x_init_value=x_init,
                 delta=delta)
             self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
 def Test(self):
   np.random.seed(1)
   n = shape_[-1]
   batch_shape = shape_[:-2]
   np_dtype = dtype_.as_numpy_dtype
   a = np.random.uniform(
       low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
   if dtype_.is_complex:
     a += 1j * np.random.uniform(
         low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
   a += np.conj(a.T)
   a = np.tile(a, batch_shape + (1, 1))
   # Optimal stepsize for central difference is O(epsilon^{1/3}).
   epsilon = np.finfo(np_dtype).eps
   delta = 0.1 * epsilon**(1.0 / 3.0)
   # tolerance obtained by looking at actual differences using
   # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
   if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64):
     tol = 1e-2
   else:
     tol = 1e-7
   with self.session(use_gpu=True):
     tf_a = constant_op.constant(a)
     if compute_v_:
       tf_e, tf_v = linalg_ops.self_adjoint_eig(tf_a)
       # (complex) Eigenvectors are only unique up to an arbitrary phase
       # We normalize the vectors such that the first component has phase 0.
       top_rows = tf_v[..., 0:1, :]
       if tf_a.dtype.is_complex:
         angle = -math_ops.angle(top_rows)
         phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle))
       else:
         phase = math_ops.sign(top_rows)
       tf_v *= phase
       outputs = [tf_e, tf_v]
     else:
       tf_e = linalg_ops.self_adjoint_eigvals(tf_a)
       outputs = [tf_e]
     for b in outputs:
       x_init = np.random.uniform(
           low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
       if dtype_.is_complex:
         x_init += 1j * np.random.uniform(
             low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
       x_init += np.conj(x_init.T)
       x_init = np.tile(x_init, batch_shape + (1, 1))
       theoretical, numerical = gradient_checker.compute_gradient(
           tf_a,
           tf_a.get_shape().as_list(),
           b,
           b.get_shape().as_list(),
           x_init_value=x_init,
           delta=delta)
       self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
Example #13
0
 def _apply_dense(self, grad, var):
     lr = math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
     iter_ = math_ops.cast(self._iter, var.dtype.base_dtype)
     first_iter = math_ops.cast(self._first_iter,var.dtype.base_dtype)
     l1 = math_ops.cast(self._l1_accum, var.dtype.base_dtype)
     
     v = self.get_slot(var, "accumulator")
     v_t = state_ops.assign(v, v + first_iter *var - lr*grad, use_locking=self._use_locking)
     # GRDA update
     var_update = state_ops.assign(var, math_ops.sign(v_t) * math_ops.maximum(math_ops.abs(v_t) - l1, 0), use_locking=self._use_locking)
     return control_flow_ops.group(*[v_t,var_update])
Example #14
0
def modrelu(z, b, comp):
    if comp:
        z_norm = math_ops.sqrt(math_ops.square(math_ops.real(z)) + math_ops.square(math_ops.imag(z))) + 0.00001
        step1 = nn_ops.bias_add(z_norm, b)
        step2 = math_ops.complex(nn_ops.relu(step1), array_ops.zeros_like(z_norm))
        step3 = z/math_ops.complex(z_norm, array_ops.zeros_like(z_norm))
    else:
        z_norm = math_ops.abs(z) + 0.00001
        step1 = nn_ops.bias_add(z_norm, b)
        step2 = nn_ops.relu(step1)
        step3 = math_ops.sign(z)
    return math_ops.multiply(step3, step2)
Example #15
0
 def _sample_n(self, n, seed=None):
   shape = array_ops.concat(0, ([n], self.batch_shape()))
   # Sample uniformly-at-random from the open-interval (-1, 1).
   uniform_samples = random_ops.random_uniform(
       shape=shape,
       minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                           self.dtype.as_numpy_dtype(0.)),
       maxval=1.,
       dtype=self.dtype,
       seed=seed)
   return (self.loc - self.scale * math_ops.sign(uniform_samples) *
           math_ops.log(1. - math_ops.abs(uniform_samples)))
 def Compute(x):
   e, v = linalg_ops.self_adjoint_eig(x)
   # (complex) Eigenvectors are only unique up to an arbitrary phase
   # We normalize the vectors such that the first component has phase 0.
   top_rows = v[..., 0:1, :]
   if dtype_.is_complex:
     angle = -math_ops.angle(top_rows)
     phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle))
   else:
     phase = math_ops.sign(top_rows)
   v *= phase
   return e, v
Example #17
0
 def _sample_n(self, n, seed=None):
     shape = array_ops.concat(([n], self.batch_shape()), 0)
     # Sample uniformly-at-random from the open-interval (-1, 1).
     uniform_samples = random_ops.random_uniform(
         shape=shape,
         minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                             self.dtype.as_numpy_dtype(0.)),
         maxval=1.,
         dtype=self.dtype,
         seed=seed)
     return (self.loc - self.scale * math_ops.sign(uniform_samples) *
             math_ops.log(1. - math_ops.abs(uniform_samples)))
Example #18
0
 def get_grow_tensor(self, weight, method):
     if method.startswith('grad_scale'):
         masked_grad = self._weight2masked_grads[weight.name]
         divisor = extract_number(method)
         grow_tensor = masked_grad / divisor
     elif method.startswith('grad_sign'):
         masked_grad_sign = math_ops.sign(
             self._weight2masked_grads[weight.name])
         divisor = extract_number(method)
         grow_tensor = masked_grad_sign / divisor
     else:
         grow_tensor = super(SparseRigLOptimizer,
                             self).get_grow_tensor(weight, method)
     return grow_tensor
    def _apply_sparse(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
        logbase_t = math_ops.cast(self._logbase_t, var.dtype.base_dtype)
        e_t = math_ops.cast(math.e, var.dtype.base_dtype)

        m = self.get_slot(var, 'm')
        m_t = state_ops.assign(m, (m * beta_t) + (grad * (1 - beta_t)),
                               use_locking=self._use_locking)

        sign_g = ops.IndexedSlices(math_ops.sign(grad.values),
                                   grad.indices,
                                   dense_shape=grad.dense_shape)
        sign_gm = ops.IndexedSlices(
            array_ops.gather(math_ops.sign(m_t), sign_g.indices) *
            sign_g.values,
            sign_g.indices,
            dense_shape=sign_g.dense_shape)

        sign_decayed = math_ops.cast(self._sign_decay_t, var.dtype.base_dtype)
        multiplier_values = math_ops.pow(
            e_t, logbase_t * sign_decayed * sign_gm.values)
        multiplier = ops.IndexedSlices(multiplier_values,
                                       sign_gm.indices,
                                       dense_shape=sign_gm.dense_shape)

        final_update = ops.IndexedSlices(lr_t * multiplier.values *
                                         grad.values,
                                         multiplier.indices,
                                         dense_shape=multiplier.dense_shape)

        var_update = state_ops.scatter_sub(var,
                                           final_update.indices,
                                           final_update.values,
                                           use_locking=self._use_locking)

        return control_flow_ops.group(*[var_update, m_t])
Example #20
0
    def build(self, inputs_shape):
        """construct the IndRNN Cell"""
        if inputs_shape[1].value is None:
            raise ValueError("Expected input shape[1] is known")

        input_depth = inputs_shape[1]
        if self._input_kernel_initializer is None:
            self._input_kernel_initializer = init_ops.random_normal_initializer(
                mean=0, stddev=1e-3)
        # matrix W
        self._input_kernel = self.add_variable(
            "input_kernel",
            shape=[input_depth, self._num_units],
            initializer=self._input_kernel_initializer,
        )

        if self._recurrent_recurrent_kernel_initializer is None:
            self._recurrent_recurrent_kernel_initializer = init_ops.constant_initializer(
                1.)

        # matrix U
        self._recurrent_kernel = self.add_variable(
            "recurrent_kernel",
            shape=[self._num_units],
            initializer=self._recurrent_recurrent_kernel_initializer,
        )

        # Clip the U to min - max
        if self._recurrent_min_abs:
            abs_kernel = math_ops.abs(self._recurrent_kernel)
            min_abs_kernel = math_ops.maximum(abs_kernel,
                                              self._recurrent_min_abs)
            self._recurrent_kernel = math_ops.multiply(
                math_ops.sign(self._recurrent_kernel), min_abs_kernel)
        if self._recurrent_max_abs:
            self._recurrent_kernel = clip_ops.clip_by_value(
                self._recurrent_kernel,
                -self._recurrent_max_abs,
                self._recurrent_max_abs,
            )

        self._bias = self.add_variable(
            "bias",
            shape=[self._num_units],
            initializer=init_ops.zeros_initializer(dtype=self.dtype),
        )
        # built finished
        self.built = True
Example #21
0
    def _apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype

        lr = math_ops.cast(self._lr_t, var_dtype)
        beta = self._beta
        epsilon = self._epsilon
        t = math_ops.cast(self.iterations + 1, var_dtype)

        ops = []

        # Update running sum
        s = self.get_slot(var, 'sum')
        grad_sq = math_ops.square(grad)
        s_new = s + grad_sq
        ops.append(state_ops.assign(s, s_new, use_locking=self._use_locking))

        # Update running counter
        if self._sparse_counter:
            n = self.get_slot(var, 'counter')
            n_new = n + math_ops.sign(grad_sq)
            ops.append(
                state_ops.assign(n, n_new, use_locking=self._use_locking))
        else:
            # Counter is not sparse; just use the current timestep instead
            n_new = t

        # Compute step size
        average = math_ops.div_no_nan(s_new, n_new)
        step = grad / (epsilon + math_ops.sqrt(average))

        # Update momentum
        if self._use_momentum:
            m = self.get_slot(var, 'momentum')
            m_new = beta * m + (1.0 - beta) * step
            ops.append(
                state_ops.assign(m, m_new, use_locking=self._use_locking))
            # Bias correction
            lr = lr / (1.0 - pow(beta, t))
        else:
            # No momentum; just use the current step instead
            m_new = step

        # Update parameters
        ops.append(
            state_ops.assign_sub(var,
                                 lr * m_new,
                                 use_locking=self._use_locking))
        return control_flow_ops.group(*ops)
Example #22
0
    def _apply_dense(self, grad, var):
        lr = math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)

        v = self.get_slot(var, "accumulator")
        v_t = state_ops.assign(v, v - lr * grad, use_locking=self._use_locking)

        iter_ = self._get_iter_variable()
        iter_ = math_ops.cast(iter_, var.dtype.base_dtype)

        c = math_ops.cast(self._c, var.dtype.base_dtype)
        mu = math_ops.cast(self._mu, var.dtype.base_dtype)
        l1 = math_ops.cast(c * math_ops.pow(lr, (0.5 + mu)) * math_ops.pow(iter_, mu), var.dtype.base_dtype)

        # GRDA update
        var_update = state_ops.assign(var, math_ops.sign(v_t) * math_ops.maximum(math_ops.abs(v_t) - l1, 0), use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, v_t])
Example #23
0
    def _orthogonal_matrix(self, n):
        """Construct an n x n orthogonal matrix.

    Args:
      n: Dimension.
    Returns:
      A n x n orthogonal matrix.
    """
        a = random_ops.random_normal([n, n], dtype=self.dtype, seed=self.seed)
        if self.seed:
            self.seed += 1
        q, r = gen_linalg_ops.qr(a)
        d = array_ops.diag_part(r)
        # make q uniform
        q *= math_ops.sign(d)
        return q
Example #24
0
  def _orthogonal_matrix(self, n):
    """Construct an n x n orthogonal matrix.

    Args:
      n: dimension.
    Returns:
      a n x n orthogonal matrix.
    """
    a = random_ops.random_normal([n, n], dtype=self.dtype, seed=self.seed)
    if self.seed:
      self.seed += 1
    q, r = linalg_ops.qr(a)
    d = array_ops.diag_part(r)
    # make q uniform
    q *= math_ops.sign(d)
    return q
Example #25
0
  def build(self, inputs_shape):
    if inputs_shape[1].value is None:
      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                       % inputs_shape)

    input_depth = inputs_shape[1].value
    self._input_kernel = self.add_variable(
        "input_kernel",
        shape=[input_depth, self._num_units])

    if self._recurrent_initializer is None:
      # Initialize the recurrent weights uniformly in [-max_abs, max_abs] or
      # [-1, 1] if max_abs exceeds 1
      init_bound = 1.0
      if self._recurrent_max_abs and self._recurrent_max_abs < init_bound:
        init_bound = self._recurrent_max_abs

      self._recurrent_initializer = init_ops.random_uniform_initializer(
          minval=-init_bound,
          maxval=init_bound
      )

    self._recurrent_kernel = self.add_variable(
        "recurrent_kernel",
        shape=[self._num_units], initializer=self._recurrent_initializer)

    # Clip the absolute values of the recurrent weights to the specified minimum
    if self._recurrent_min_abs:
      abs_kernel = math_ops.abs(self._recurrent_kernel)
      min_abs_kernel = math_ops.maximum(abs_kernel, self._recurrent_min_abs)
      self._recurrent_kernel = math_ops.multiply(
          math_ops.sign(self._recurrent_kernel),
          min_abs_kernel
      )

    # Clip the absolute values of the recurrent weights to the specified maximum
    if self._recurrent_max_abs:
      self._recurrent_kernel = clip_ops.clip_by_value(self._recurrent_kernel,
                                                      -self._recurrent_max_abs,
                                                      self._recurrent_max_abs)

    self._bias = self.add_variable(
        "bias",
        shape=[self._num_units],
        initializer=init_ops.zeros_initializer(dtype=self.dtype))

    self.built = True
Example #26
0
    def build(self, inputs_shape):
        if inputs_shape[1].value is None:
            raise ValueError(
                "Expected inputs.shape[-1] to be known, saw shape: %s" %
                inputs_shape)

        self._input_depth = inputs_shape[1].value
        self._filters_num = self._input_depth // self._n_nodes
        self._output_depth = self._n_nodes * self._num_units

        self._conv_kernel = self.add_variable(
            "conv_kernel", [self._filters_num, self._num_units],
            dtype=self.dtype,
            initializer=self._input_kernel_initializer)

        self._bias = self.add_variable(
            "bias",
            shape=[self._num_units],
            initializer=init_ops.zeros_initializer(dtype=self.dtype))

        if self._recurrent_kernel_initializer is None:
            self._recurrent_kernel_initializer = init_ops.random_uniform_initializer(
                minval=0.,
                maxval=1,
            )

        self._recurrent_kernel = self.add_variable(
            "recurrent_kernel",
            shape=[self._num_units * self._n_nodes],
            initializer=self._recurrent_kernel_initializer)

        # Clip the absolute values of the recurrent weights to the specified minimum
        if self._recurrent_min_abs and self._recurrent_min_abs != 0:
            abs_kernel = math_ops.abs(self._recurrent_kernel)
            min_abs_kernel = math_ops.maximum(abs_kernel,
                                              self._recurrent_min_abs)
            self._recurrent_kernel = math_ops.multiply(
                math_ops.sign(self._recurrent_kernel), min_abs_kernel)

        # Clip the absolute values of the recurrent weights to the specified maximum
        self._recurrent_max_abs = self._recurrent_max_abs or 1.
        self._recurrent_kernel = clip_ops.clip_by_value(
            self._recurrent_kernel, -self._recurrent_max_abs,
            self._recurrent_max_abs)

        self.built = True
Example #27
0
def _BesselI1eGrad(op, grad):
  """Compute gradient of bessel_i1e(x) with respect to its argument."""
  x = op.inputs[0]
  y = op.outputs[0]
  with ops.control_dependencies([grad]):
    # For x = 0, the correct gradient is 0.5.
    # However, the main branch gives NaN because of the division by x, so
    # we impute the gradient manually.
    # An alternative solution is to express the gradient via bessel_i0e and
    # bessel_i2e, but the latter is not yet implemented in Eigen.
    eps = np.finfo(x.dtype.as_numpy_dtype).eps
    zeros = array_ops.zeros_like(x)
    x_is_not_tiny = math_ops.abs(x) > eps
    safe_x = array_ops.where(x_is_not_tiny, x, eps + zeros)
    dy_dx = math_ops.bessel_i0e(safe_x) - y * (
        math_ops.sign(safe_x) + math_ops.reciprocal(safe_x))
    return grad * array_ops.where(x_is_not_tiny, dy_dx, 0.5 + zeros)
Example #28
0
def _BesselI1eGrad(op, grad):
  """Compute gradient of bessel_i1e(x) with respect to its argument."""
  x = op.inputs[0]
  y = op.outputs[0]
  with ops.control_dependencies([grad]):
    # For x = 0, the correct gradient is 0.5.
    # However, the main branch gives NaN because of the division by x, so
    # we impute the gradient manually.
    # An alternative solution is to express the gradient via bessel_i0e and
    # bessel_i2e, but the latter is not yet implemented in Eigen.
    eps = np.finfo(x.dtype.as_numpy_dtype).eps
    zeros = array_ops.zeros_like(x)
    x_is_not_tiny = math_ops.abs(x) > eps
    safe_x = array_ops.where(x_is_not_tiny, x, eps + zeros)
    dy_dx = math_ops.bessel_i0e(safe_x) - y * (
        math_ops.sign(safe_x) + math_ops.reciprocal(safe_x))
    return grad * array_ops.where(x_is_not_tiny, dy_dx, 0.5 + zeros)
Example #29
0
 def _sample_n(self, n, seed=None):
     shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
     # Uniform variates must be sampled from the open-interval `(-1, 1)` rather
     # than `[-1, 1)`. In the case of `(0, 1)` we'd use
     # `np.finfo(self.dtype.as_numpy_dtype).tiny` because it is the smallest,
     # positive, "normal" number. However, the concept of subnormality exists
     # only at zero; here we need the smallest usable number larger than -1,
     # i.e., `-1 + eps/2`.
     uniform_samples = random_ops.random_uniform(
         shape=shape,
         minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                             self.dtype.as_numpy_dtype(0.)),
         maxval=1.,
         dtype=self.dtype,
         seed=seed)
     return (self.loc - self.scale * math_ops.sign(uniform_samples) *
             math_ops.log1p(-math_ops.abs(uniform_samples)))
Example #30
0
 def _sample_n(self, n, seed=None):
   shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
   # Uniform variates must be sampled from the open-interval `(-1, 1)` rather
   # than `[-1, 1)`. In the case of `(0, 1)` we'd use
   # `np.finfo(self.dtype.as_numpy_dtype).tiny` because it is the smallest,
   # positive, "normal" number. However, the concept of subnormality exists
   # only at zero; here we need the smallest usable number larger than -1,
   # i.e., `-1 + eps/2`.
   uniform_samples = random_ops.random_uniform(
       shape=shape,
       minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                           self.dtype.as_numpy_dtype(0.)),
       maxval=1.,
       dtype=self.dtype,
       seed=seed)
   return (self.loc - self.scale * math_ops.sign(uniform_samples) *
           math_ops.log1p(-math_ops.abs(uniform_samples)))
Example #31
0
    def build(self, inputs_shape):
        if inputs_shape[1].value is None:
            raise ValueError(
                "Expected inputs.shape[-1] to be known, saw shape: %s" %
                inputs_shape)

        input_depth = inputs_shape[1].value
        self._input_kernel = self.add_variable(
            "input_%s" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
            shape=[input_depth, self._num_units])

        if self._recurrent_initializer is None:
            if self._recurrent_max_abs:
                self._recurrent_initializer = init_ops.random_uniform_initializer(
                    minval=-self._recurrent_max_abs,
                    maxval=self._recurrent_max_abs)
            else:
                self._recurrent_initializer = init_ops.random_uniform_initializer(
                    minval=-1.0, maxval=1.0)

        self._recurrent_kernel = self.add_variable(
            "recurrent_%s" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
            shape=[self._num_units],
            initializer=self._recurrent_initializer)

        # Clip the absolute values of the recurrent weights
        if self._recurrent_min_abs:
            abs_kernel = math_ops.abs(self._recurrent_kernel)
            min_abs_kernel = math_ops.maximum(abs_kernel,
                                              self._recurrent_min_abs)
            self._recurrent_kernel = math_ops.multiply(
                math_ops.sign(self._recurrent_kernel), min_abs_kernel)

        if self._recurrent_max_abs:
            self._recurrent_kernel = clip_ops.clip_by_value(
                self._recurrent_kernel, -self._recurrent_max_abs,
                self._recurrent_max_abs)

        self._bias = self.add_variable(
            rnn_cell_impl._BIAS_VARIABLE_NAME,
            shape=[self._num_units],
            initializer=init_ops.zeros_initializer(dtype=self.dtype))

        self.built = True
Example #32
0
    def build(self, inputs_shape):
        if inputs_shape[1].value is None:
            raise ValueError(
                "Expected inputs.shape[-1] to be known, saw shape: %s" %
                inputs_shape)

        input_depth = inputs_shape[1].value
        #input weights
        if self._input_initializer is None:
            self._input_initializer = init_ops.random_normal_initializer(
                mean=0.0, stddev=0.001)
        #input weights variable
        self._input_kernel = self.add_variable(
            "input_kernel",
            shape=[input_depth, self._num_units],
            initializer=self._input_initializer)

        if self._recurrent_initializer is None:
            self._recurrent_initializer = init_ops.constant_initializer(1.)
        self._recurrent_kernel = self.add_variable(
            "recurrent_kernel",
            shape=[self._num_units],
            initializer=self._recurrent_initializer)

        # Clip the absolute values of the recurrent weights to the specified minimum
        if self._recurrent_min_abs:
            abs_kernel = math_ops.abs(self._recurrent_kernel)
            min_abs_kernel = math_ops.maximum(abs_kernel,
                                              self._recurrent_min_abs)
            self._recurrent_kernel = math_ops.multiply(
                math_ops.sign(self._recurrent_kernel), min_abs_kernel)

        # Clip the absolute values of the recurrent weights to the specified maximum
        if self._recurrent_max_abs:
            self._recurrent_kernel = clip_ops.clip_by_value(
                self._recurrent_kernel, -self._recurrent_max_abs,
                self._recurrent_max_abs)

        self._bias = self.add_variable(
            "bias",
            shape=[self._num_units],
            initializer=init_ops.zeros_initializer(dtype=self.dtype))

        self.built = True
Example #33
0
  def cdf(self, x, name="cdf"):
    """CDF of observations in `x` under the Laplace distribution(s).

    Args:
      x: tensor of dtype `dtype`, must be broadcastable with `loc` and `scale`.
      name: The name to give this op.

    Returns:
      cdf: tensor of dtype `dtype`, the CDFs of `x`.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self._loc, self._scale, x]):
        x = ops.convert_to_tensor(x)
        if x.dtype != self.dtype:
          raise TypeError("Input x dtype does not match dtype: %s vs. %s"
                          % (x.dtype, self.dtype))
        y = x - self._loc
        return 0.5 + 0.5 * math_ops.sign(y) * (
            1. - math_ops.exp(-math_ops.abs(y) / self._scale))
Example #34
0
  def cdf(self, x, name="cdf"):
    """CDF of observations in `x` under the Laplace distribution(s).

    Args:
      x: tensor of dtype `dtype`, must be broadcastable with `loc` and `scale`.
      name: The name to give this op.

    Returns:
      cdf: tensor of dtype `dtype`, the CDFs of `x`.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._loc, self._scale, x], name):
        x = ops.convert_to_tensor(x)
        if x.dtype != self.dtype:
          raise TypeError("Input x dtype does not match dtype: %s vs. %s"
                          % (x.dtype, self.dtype))
        y = x - self._loc
        return 0.5 + 0.5 * math_ops.sign(y) * (
            1. - math_ops.exp(-math_ops.abs(y) / self._scale))
Example #35
0
 def _NormalizingSvd(tf_a):
   tf_s, tf_u, tf_v = linalg_ops.svd(tf_a, compute_uv=True, full_matrices=True)
   # Singular vectors are only unique up to an arbitrary phase. We normalize
   # the vectors such that the first component of u (if m >=n) or v (if n > m)
   # have phase 0.
   m = tf_a.shape[-2]
   n = tf_a.shape[-1]
   if m >= n:
     top_rows = tf_u[..., 0:1, :]
   else:
     top_rows = tf_v[..., 0:1, :]
   if tf_u.dtype.is_complex:
     angle = -math_ops.angle(top_rows)
     phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle))
   else:
     phase = math_ops.sign(top_rows)
   tf_u *= phase[..., :m]
   tf_v *= phase[..., :n]
   return tf_s, tf_u, tf_v
Example #36
0
    def update_weights(self, train_op):
        """Updates the model weights.

    This function must be called on at least one worker after `minimize`.
    In distributed training this call can be omitted on non-chief workers to
    speed up training.

    Args:
      train_op: The operation returned by the `minimize` call.

    Returns:
      An Operation that updates the model weights.
    """
        with ops.control_dependencies([train_op]):
            update_ops = []
            # Copy over unshrunk weights to user provided variables.
            for name in ['sparse_features_weights', 'dense_features_weights']:
                for var, slot_var in zip(self._variables[name],
                                         self._slots['unshrunk_' + name]):
                    for v, sv in zip(self._var_to_list(var),
                                     self._var_to_list(slot_var)):
                        update_ops.append(v.assign(sv))

        # Apply proximal step.
        if self._symmetric_l1_regularization() > 0:
            shrinkage = (self._symmetric_l1_regularization() /
                         self._symmetric_l2_regularization())
            with ops.control_dependencies(update_ops):
                update_ops = []
                for name in [
                        'sparse_features_weights', 'dense_features_weights'
                ]:
                    for var in self._variables[name]:
                        for v in self._var_to_list(var):
                            with ops.device(v.device):
                                v_shrunk = math_ops.sign(v) * math_ops.maximum(
                                    0.0,
                                    math_ops.abs(v) - shrinkage)
                                update_ops.append(v.assign(v_shrunk))
                return control_flow_ops.group(*update_ops)
        else:
            return control_flow_ops.group(*update_ops)
Example #37
0
  def build(self, inputs_shape):
    if inputs_shape[1].value is None:
      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                       % inputs_shape)

    input_depth = inputs_shape[1].value
    if self._input_initializer is None:
      self._input_initializer = init_ops.random_normal_initializer(mean=0.0,
                                                                   stddev=0.001)
    self._input_kernel = self.add_variable(
        "input_kernel",
        shape=[input_depth, self._num_units],
        initializer=self._input_initializer)

    if self._recurrent_initializer is None:
      self._recurrent_initializer = init_ops.constant_initializer(1.)
    self._recurrent_kernel = self.add_variable(
        "recurrent_kernel",
        shape=[self._num_units],
        initializer=self._recurrent_initializer)

    # Clip the absolute values of the recurrent weights to the specified minimum
    if self._recurrent_min_abs:
      abs_kernel = math_ops.abs(self._recurrent_kernel)
      min_abs_kernel = math_ops.maximum(abs_kernel, self._recurrent_min_abs)
      self._recurrent_kernel = math_ops.multiply(
          math_ops.sign(self._recurrent_kernel),
          min_abs_kernel
      )

    # Clip the absolute values of the recurrent weights to the specified maximum
    if self._recurrent_max_abs:
      self._recurrent_kernel = clip_ops.clip_by_value(self._recurrent_kernel,
                                                      -self._recurrent_max_abs,
                                                      self._recurrent_max_abs)

    self._bias = self.add_variable(
        "bias",
        shape=[self._num_units],
        initializer=init_ops.zeros_initializer(dtype=self.dtype))

    self.built = True
Example #38
0
 def _NormalizingSvd(tf_a):
   tf_s, tf_u, tf_v = linalg_ops.svd(
       tf_a, compute_uv=True, full_matrices=full_matrices_)
   # Singular vectors are only unique up to an arbitrary phase. We normalize
   # the vectors such that the first component of u (if m >=n) or v (if n > m)
   # have phase 0.
   m = tf_a.shape[-2]
   n = tf_a.shape[-1]
   if m >= n:
     top_rows = tf_u[..., 0:1, :]
   else:
     top_rows = tf_v[..., 0:1, :]
   if tf_u.dtype.is_complex:
     angle = -math_ops.angle(top_rows)
     phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle))
   else:
     phase = math_ops.sign(top_rows)
   tf_u *= phase[..., :m]
   tf_v *= phase[..., :n]
   return tf_s, tf_u, tf_v
Example #39
0
    def __call__(self, shape, dtype=None, partition_info=None):
        if dtype is None:
            dtype = self.dtype
        # Check the shape
        if len(shape) < 3 or len(shape) > 5:
            raise ValueError("The tensor to initialize must be at least "
                             "three-dimensional and at most five-dimensional")

        if shape[-2] > shape[-1]:
            raise ValueError("In_filters cannot be greater than out_filters.")

        # Generate a random matrix
        a = random_ops.random_normal([shape[-1], shape[-1]],
                                     dtype=dtype,
                                     seed=self.seed)
        # Compute the qr factorization
        if context.executing_eagerly():
            with ops.device("cpu:0"):  # TODO(b/73102536)
                q, r = gen_linalg_ops.qr(a, full_matrices=False)
        else:
            q, r = gen_linalg_ops.qr(a, full_matrices=False)
        # Make Q uniform
        d = array_ops.diag_part(r)
        q *= math_ops.sign(d)
        q = q[:shape[-2], :]
        q *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype))
        if len(shape) == 3:
            weight = array_ops.scatter_nd([[(shape[0] - 1) // 2]],
                                          array_ops.expand_dims(q, 0), shape)
        elif len(shape) == 4:
            weight = array_ops.scatter_nd([[(shape[0] - 1) // 2,
                                            (shape[1] - 1) // 2]],
                                          array_ops.expand_dims(q, 0), shape)
        else:
            weight = array_ops.scatter_nd([[(shape[0] - 1) // 2,
                                            (shape[1] - 1) // 2,
                                            (shape[2] - 1) // 2]],
                                          array_ops.expand_dims(q, 0), shape)
        return weight
Example #40
0
    def __call__(self, shape, dtype=dtypes.float32, **kwargs):
        """Returns a tensor object initialized as specified by the initializer.

    Args:
      shape: Shape of the tensor.
      dtype: Optional dtype of the tensor. Only floating point types are
        supported.
      **kwargs: Additional keyword arguments.

    Raises:
      ValueError: If the dtype is not floating point or the input shape is not
       valid.
    """
        self._validate_kwargs(kwargs, support_partition=False)
        dtype = _assert_float_dtype(dtype)
        # Check the shape
        if len(shape) < 2:
            raise ValueError(
                "The tensor to initialize, specified by argument `shape`"
                " must be at least two-dimensional. Received shape="
                f"{shape}")
        # Flatten the input shape with the last dimension remaining
        # its original shape so it works for conv2d
        num_rows = 1
        for dim in shape[:-1]:
            num_rows *= dim
        num_cols = shape[-1]
        flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))

        # Generate a random matrix
        a = self._random_generator.random_normal(flat_shape, dtype=dtype)
        # Compute the qr factorization
        q, r = gen_linalg_ops.qr(a, full_matrices=False)
        # Make Q uniform
        d = array_ops.diag_part(r)
        q *= math_ops.sign(d)
        if num_rows < num_cols:
            q = array_ops.matrix_transpose(q)
        return self.gain * array_ops.reshape(q, shape)
Example #41
0
    def testDispatchForUnaryElementwiseAPIs(self):
        @dispatch.dispatch_for_unary_elementwise_apis(MaskedTensor)
        def unary_elementwise_api_handler(api_func, x):
            return MaskedTensor(api_func(x.values), x.mask)

        try:
            x = MaskedTensor([1, -2, -3], [True, True, False])
            # Test calls with positional & keyword argument (& combinations)
            abs_x = math_ops.abs(x)
            sign_x = math_ops.sign(x=x)
            neg_x = math_ops.negative(x, "neg_x")
            invert_x = bitwise_ops.invert(x, name="invert_x")
            ones_like_x = array_ops.ones_like(x, name="ones_like_x")
            ones_like_x_float = array_ops.ones_like(x,
                                                    dtypes.float32,
                                                    name="ones_like_x_float")
            self.assertAllEqual(abs_x.values, [1, 2, 3])
            self.assertAllEqual(sign_x.values, [1, -1, -1])
            self.assertAllEqual(neg_x.values, [-1, 2, 3])
            self.assertAllEqual(invert_x.values, [-2, 1, 2])
            self.assertAllEqual(ones_like_x.values, [1, 1, 1])
            self.assertAllEqual(ones_like_x_float.values, [1., 1., 1.])
            for result in [
                    abs_x, sign_x, neg_x, invert_x, ones_like_x,
                    ones_like_x_float
            ]:
                self.assertAllEqual(result.mask, [True, True, False])
            if not context.executing_eagerly(
            ):  # names not defined in eager mode.
                self.assertRegex(neg_x.values.name, r"^neg_x/Neg:.*")
                self.assertRegex(invert_x.values.name, r"^invert_x/.*")
                self.assertRegex(ones_like_x.values.name, r"^ones_like_x/.*")
                self.assertRegex(ones_like_x_float.values.name,
                                 r"^ones_like_x_float/.*")

        finally:
            dispatch.unregister_elementwise_api_handler(
                unary_elementwise_api_handler)
Example #42
0
def random_sign_uniform(shape,
                        minval=None,
                        maxval=None,
                        dtype=dtypes.float32,
                        seed=None):
    """Tensor with (possibly complex) random entries from a "sign Uniform".

  Letting `Z` be a random variable equal to `-1` and `1` with equal probability,
  Samples from this `Op` are distributed like

  ```
  Z * X, where X ~ Uniform[minval, maxval], if dtype is real,
  Z * (X + iY),  where X, Y ~ Uniform[minval, maxval], if dtype is complex.
  ```

  Args:
    shape:  `TensorShape` or Python list.  Shape of the returned tensor.
    minval:  `0-D` `Tensor` giving the minimum values.
    maxval:  `0-D` `Tensor` giving the maximum values.
    dtype:  `TensorFlow` `dtype` or Python dtype
    seed:  Python integer seed for the RNG.

  Returns:
    `Tensor` with desired shape and dtype.
  """
    dtype = dtypes.as_dtype(dtype)

    with ops.name_scope("random_sign_uniform"):
        unsigned_samples = random_uniform(shape,
                                          minval=minval,
                                          maxval=maxval,
                                          dtype=dtype,
                                          seed=seed)
        if seed is not None:
            seed += 12
        signs = math_ops.sign(
            random_ops.random_uniform(shape, minval=-1., maxval=1., seed=seed))
        return unsigned_samples * math_ops.cast(signs, unsigned_samples.dtype)
Example #43
0
    def __call__(self, shape, dtype=None, **kwargs):
        """Returns a tensor object initialized to an orthogonal matrix.

    Args:
      shape: Shape of the tensor.
      dtype: Optional dtype of the tensor. Only floating point types are
        supported. If not specified, `tf.keras.backend.floatx()` is used,
       which default to `float32` unless you configured it otherwise
       (via `tf.keras.backend.set_floatx(float_dtype)`)
      **kwargs: Additional keyword arguments.
    """
        _validate_kwargs(self.__class__.__name__,
                         kwargs,
                         support_partition=False)
        dtype = _assert_float_dtype(_get_dtype(dtype))
        # Check the shape
        if len(shape) < 2:
            raise ValueError('The tensor to initialize must be '
                             'at least two-dimensional')
        # Flatten the input shape with the last dimension remaining
        # its original shape so it works for conv2d
        num_rows = 1
        for dim in shape[:-1]:
            num_rows *= dim
        num_cols = shape[-1]
        flat_shape = (max(num_cols, num_rows), min(num_cols, num_rows))

        # Generate a random matrix
        a = self._random_generator.random_normal(flat_shape, dtype=dtype)
        # Compute the qr factorization
        q, r = gen_linalg_ops.qr(a, full_matrices=False)
        # Make Q uniform
        d = array_ops.tensor_diag_part(r)
        q *= math_ops.sign(d)
        if num_rows < num_cols:
            q = array_ops.matrix_transpose(q)
        return self.gain * array_ops.reshape(q, shape)
Example #44
0
    def sample(self, n, seed=None, name="sample"):
        """Sample `n` observations from the Laplace Distributions.

    Args:
      n: `Scalar`, type int32, the number of observations to sample.
      seed: Python integer, the random seed.
      name: The name to give this op.

    Returns:
      samples: `[n, ...]`, a `Tensor` of `n` samples for each
        of the distributions determined by broadcasting the parameters.
    """
        with ops.name_scope(self.name):
            with ops.op_scope([self._loc, self._scale, n], name):
                n = ops.convert_to_tensor(n)
                n_val = tensor_util.constant_value(n)
                shape = array_ops.concat(
                    0, [array_ops.pack([n]),
                        self.batch_shape()])
                # Sample uniformly-at-random from the open-interval (-1, 1).
                uniform_samples = random_ops.random_uniform(
                    shape=shape,
                    minval=np.nextafter(self.dtype.as_numpy_dtype(-1.),
                                        self.dtype.as_numpy_dtype(0.)),
                    maxval=self.dtype.as_numpy_dtype(1.),
                    dtype=self.dtype,
                    seed=seed)

                # Provide some hints to shape inference
                inferred_shape = tensor_shape.vector(n_val).concatenate(
                    self.get_batch_shape())
                uniform_samples.set_shape(inferred_shape)

                return (self._loc -
                        self._scale * math_ops.sign(uniform_samples) *
                        math_ops.log(1. - math_ops.abs(uniform_samples)))
Example #45
0
def _ComplexAbsGrad(op, grad):
  """Returns the gradient of ComplexAbs."""
  # TODO(b/27786104): The cast to complex could be removed once arithmetic
  # supports mixtures of complex64 and real values.
  return (math_ops.complex(grad, array_ops.zeros_like(grad)) * math_ops.sign(
      op.inputs[0]))
Example #46
0
def _ComplexAbsGrad(op, grad):
    """Returns the gradient of ComplexAbs."""
    # TODO(b/27786104): The cast to complex could be removed once arithmetic
    # supports mixtures of complex64 and real values.
    return (math_ops.complex(grad, array_ops.zeros_like(grad)) *
            math_ops.sign(op.inputs[0]))
Example #47
0
def _BesselI0eGrad(op, grad):
  """Compute gradient of bessel_i0e(x) with respect to its argument."""
  x = op.inputs[0]
  y = op.outputs[0]
  with ops.control_dependencies([grad]):
    return grad * (math_ops.bessel_i1e(x) - math_ops.sign(x) * y)
 def indicator(x):
   x1_times_x2 = math_ops.reduce_prod(x, reduction_indices=[-1])
   return 0.5 * (math_ops.sign(x1_times_x2) + 1.0)
Example #49
0
def reduce_weighted_logsumexp(
    logx,
    w=None,
    axis=None,
    keep_dims=False,
    return_sign=False,
    name=None):
  """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`.

  If all weights `w` are known to be positive, it is more efficient to directly
  use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more
  efficient than `du.reduce_weighted_logsumexp(logx, w)`.

  Reduces `input_tensor` along the dimensions given in `axis`.
  Unless `keep_dims` is true, the rank of the tensor is reduced by 1 for each
  entry in `axis`. If `keep_dims` is true, the reduced dimensions
  are retained with length 1.

  If `axis` has no entries, all dimensions are reduced, and a
  tensor with a single element is returned.

  This function is more numerically stable than log(sum(w * exp(input))). It
  avoids overflows caused by taking the exp of large inputs and underflows
  caused by taking the log of small inputs.

  For example:

  ```python
  x = tf.constant([[0., 0, 0],
                   [0, 0, 0]])

  w = tf.constant([[-1., 1, 1],
                   [1, 1, 1]])

  du.reduce_weighted_logsumexp(x, w)
  # ==> log(-1*1 + 1*1 + 1*1 + 1*1 + 1*1 + 1*1) = log(4)

  du.reduce_weighted_logsumexp(x, w, axis=0)
  # ==> [log(-1+1), log(1+1), log(1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1)
  # ==> [log(-1+1+1), log(1+1+1)]

  du.reduce_weighted_logsumexp(x, w, axis=1, keep_dims=True)
  # ==> [[log(-1+1+1)], [log(1+1+1)]]

  du.reduce_weighted_logsumexp(x, w, axis=[0, 1])
  # ==> log(-1+5)
  ```

  Args:
    logx: The tensor to reduce. Should have numeric type.
    w: The weight tensor. Should have numeric type identical to `logx`.
    axis: The dimensions to reduce. If `None` (the default),
      reduces all dimensions. Must be in the range
      `[-rank(input_tensor), rank(input_tensor))`.
    keep_dims: If true, retains reduced dimensions with length 1.
    return_sign: If `True`, returns the sign of the result.
    name: A name for the operation (optional).

  Returns:
    lswe: The `log(abs(sum(weight * exp(x))))` reduced tensor.
    sign: (Optional) The sign of `sum(weight * exp(x))`.
  """
  with ops.name_scope(name, "reduce_weighted_logsumexp", [logx, w]):
    logx = ops.convert_to_tensor(logx, name="logx")
    if w is None:
      lswe = math_ops.reduce_logsumexp(logx, axis=axis, keep_dims=keep_dims)
      if return_sign:
        sgn = array_ops.ones_like(lswe)
        return lswe, sgn
      return lswe
    w = ops.convert_to_tensor(w, dtype=logx.dtype, name="w")
    log_absw_x = logx + math_ops.log(math_ops.abs(w))
    max_log_absw_x = math_ops.reduce_max(log_absw_x, axis=axis, keep_dims=True)
    # If the largest element is `-inf` or `inf` then we don't bother subtracting
    # off the max. We do this because otherwise we'd get `inf - inf = NaN`. That
    # this is ok follows from the fact that we're actually free to subtract any
    # value we like, so long as we add it back after taking the `log(sum(...))`.
    max_log_absw_x = array_ops.where(
        math_ops.is_inf(max_log_absw_x),
        array_ops.zeros_like(max_log_absw_x),
        max_log_absw_x)
    wx_over_max_absw_x = (
        math_ops.sign(w) * math_ops.exp(log_absw_x - max_log_absw_x))
    sum_wx_over_max_absw_x = math_ops.reduce_sum(
        wx_over_max_absw_x,
        axis=axis,
        keep_dims=keep_dims)
    if not keep_dims:
      max_log_absw_x = array_ops.squeeze(max_log_absw_x, axis)
    sgn = math_ops.sign(sum_wx_over_max_absw_x)
    lswe = max_log_absw_x + math_ops.log(sgn * sum_wx_over_max_absw_x)
    if return_sign:
      return lswe, sgn
    return lswe
Example #50
0
 def _cdf(self, x):
   z = self._z(x)
   return (0.5 + 0.5 * math_ops.sign(z) *
           (1. - math_ops.exp(-math_ops.abs(z))))
Example #51
0
def _AbsGrad(op, grad):
  x = op.inputs[0]
  return grad * math_ops.sign(x)
  def build(self, input_shape):
    """Builds the layer.

    Creates the variables for the network modeling the densities, creates the
    auxiliary loss estimating the median and tail quantiles of the densities,
    and then uses that to create the probability mass functions and the update
    op that produces the discrete cumulative density functions used by the range
    coder.

    Args:
      input_shape: Shape of the input tensor, used to get the number of
        channels.

    Raises:
      ValueError: if `input_shape` doesn't specify the length of the channel
        dimension.
    """
    input_shape = tensor_shape.TensorShape(input_shape)
    channel_axis = self._channel_axis(input_shape.ndims)
    channels = input_shape[channel_axis].value
    if channels is None:
      raise ValueError("The channel dimension of the inputs must be defined.")
    self.input_spec = base_layer.InputSpec(
        ndim=input_shape.ndims, axes={channel_axis: channels})
    filters = (1,) + self.filters + (1,)
    scale = self.init_scale ** (1 / (len(self.filters) + 1))

    # Create variables.
    self._matrices = []
    self._biases = []
    self._factors = []
    for i in range(len(self.filters) + 1):
      init = np.log(np.expm1(1 / scale / filters[i + 1]))
      matrix = self.add_variable(
          "matrix_{}".format(i), dtype=self.dtype,
          shape=(channels, filters[i + 1], filters[i]),
          initializer=init_ops.Constant(init))
      matrix = nn.softplus(matrix)
      self._matrices.append(matrix)

      bias = self.add_variable(
          "bias_{}".format(i), dtype=self.dtype,
          shape=(channels, filters[i + 1], 1),
          initializer=init_ops.RandomUniform(-.5, .5))
      self._biases.append(bias)

      if i < len(self.filters):
        factor = self.add_variable(
            "factor_{}".format(i), dtype=self.dtype,
            shape=(channels, filters[i + 1], 1),
            initializer=init_ops.Zeros())
        factor = math_ops.tanh(factor)
        self._factors.append(factor)

    # To figure out what range of the densities to sample, we need to compute
    # the quantiles given by `tail_mass / 2` and `1 - tail_mass / 2`. Since we
    # can't take inverses of the cumulative directly, we make it an optimization
    # problem:
    # `quantiles = argmin(|logit(cumulative) - target|)`
    # where `target` is `logit(tail_mass / 2)` or `logit(1 - tail_mass / 2)`.
    # Taking the logit (inverse of sigmoid) of the cumulative makes the
    # representation of the right target more numerically stable.

    # Numerically stable way of computing logits of `tail_mass / 2`
    # and `1 - tail_mass / 2`.
    target = np.log(2 / self.tail_mass - 1)
    # Compute lower and upper tail quantile as well as median.
    target = constant_op.constant([-target, 0, target], dtype=self.dtype)

    def quantiles_initializer(shape, dtype=None, partition_info=None):
      del partition_info  # unused
      assert tuple(shape[1:]) == (1, 3)
      init = constant_op.constant(
          [[[-self.init_scale, 0, self.init_scale]]], dtype=dtype)
      return array_ops.tile(init, (shape[0], 1, 1))

    quantiles = self.add_variable(
        "quantiles", shape=(channels, 1, 3), dtype=self.dtype,
        initializer=quantiles_initializer)
    logits = self._logits_cumulative(quantiles, stop_gradient=True)
    loss = math_ops.reduce_sum(abs(logits - target))
    self.add_loss(loss, inputs=None)

    # Save medians for `call`, `compress`, and `decompress`.
    self._medians = quantiles[:, :, 1:2]
    if not self.optimize_integer_offset:
      self._medians = math_ops.round(self._medians)

    # Largest distance observed between lower tail quantile and median,
    # or between median and upper tail quantile.
    minima = math_ops.reduce_max(self._medians - quantiles[:, :, 0:1])
    maxima = math_ops.reduce_max(quantiles[:, :, 2:3] - self._medians)
    minmax = math_ops.maximum(minima, maxima)
    minmax = math_ops.ceil(minmax)
    minmax = math_ops.maximum(minmax, 1)

    # Sample the density up to `minmax` around the median.
    samples = math_ops.range(-minmax, minmax + 1, dtype=self.dtype)
    samples += self._medians

    half = constant_op.constant(.5, dtype=self.dtype)
    # We strip the sigmoid from the end here, so we can use the special rule
    # below to only compute differences in the left tail of the sigmoid.
    # This increases numerical stability (see explanation in `call`).
    lower = self._logits_cumulative(samples - half, stop_gradient=True)
    upper = self._logits_cumulative(samples + half, stop_gradient=True)
    # Flip signs if we can move more towards the left tail of the sigmoid.
    sign = -math_ops.sign(math_ops.add_n([lower, upper]))
    pmf = abs(math_ops.sigmoid(sign * upper) - math_ops.sigmoid(sign * lower))
    # Add tail masses to first and last bin of pmf, as we clip values for
    # compression, meaning that out-of-range values get mapped to these bins.
    pmf = array_ops.concat([
        math_ops.add_n([pmf[:, 0, :1], math_ops.sigmoid(lower[:, 0, :1])]),
        pmf[:, 0, 1:-1],
        math_ops.add_n([pmf[:, 0, -1:], math_ops.sigmoid(-upper[:, 0, -1:])]),
        ], axis=-1)
    self._pmf = pmf

    cdf = coder_ops.pmf_to_quantized_cdf(
        pmf, precision=self.range_coder_precision)
    def cdf_getter(*args, **kwargs):
      del args, kwargs  # ignored
      return variable_scope.get_variable(
          "quantized_cdf", dtype=dtypes.int32, initializer=cdf,
          trainable=False, validate_shape=False, collections=())
    # Need to provide a fake shape here since add_variable insists on it.
    self._quantized_cdf = self.add_variable(
        "quantized_cdf", shape=(channels, 1), dtype=dtypes.int32,
        getter=cdf_getter, trainable=False)

    update_op = state_ops.assign(
        self._quantized_cdf, cdf, validate_shape=False)
    self.add_update(update_op, inputs=None)

    super(EntropyBottleneck, self).build(input_shape)
Example #53
0
 def _cdf(self, x):
   y = x - self.loc
   return (0.5 + 0.5 * math_ops.sign(y) *
           (1. - math_ops.exp(-math_ops.abs(y) / self.scale)))
  def call(self, inputs, training):
    """Pass a tensor through the bottleneck.

    Args:
      inputs: The tensor to be passed through the bottleneck.
      training: Boolean. If `True`, returns a differentiable approximation of
        the inputs, and their likelihoods under the modeled probability
        densities. If `False`, returns the quantized inputs and their
        likelihoods under the corresponding probability mass function. These
        quantities can't be used for training, as they are not differentiable,
        but represent actual compression more closely.

    Returns:
      values: `Tensor` with the same shape as `inputs` containing the perturbed
        or quantized input values.
      likelihood: `Tensor` with the same shape as `inputs` containing the
        likelihood of `values` under the modeled probability distributions.

    Raises:
      ValueError: if `inputs` has different `dtype` or number of channels than
        a previous set of inputs the model was invoked with earlier.
    """
    inputs = ops.convert_to_tensor(inputs)
    ndim = self.input_spec.ndim
    channel_axis = self._channel_axis(ndim)
    half = constant_op.constant(.5, dtype=self.dtype)

    # Convert to (channels, 1, batch) format by commuting channels to front
    # and then collapsing.
    order = list(range(ndim))
    order.pop(channel_axis)
    order.insert(0, channel_axis)
    values = array_ops.transpose(inputs, order)
    shape = array_ops.shape(values)
    values = array_ops.reshape(values, (shape[0], 1, -1))

    # Add noise or quantize.
    if training:
      noise = random_ops.random_uniform(array_ops.shape(values), -half, half)
      values = math_ops.add_n([values, noise])
    elif self.optimize_integer_offset:
      values = math_ops.round(values - self._medians) + self._medians
    else:
      values = math_ops.round(values)

    # Evaluate densities.
    # We can use the special rule below to only compute differences in the left
    # tail of the sigmoid. This increases numerical stability: sigmoid(x) is 1
    # for large x, 0 for small x. Subtracting two numbers close to 0 can be done
    # with much higher precision than subtracting two numbers close to 1.
    lower = self._logits_cumulative(values - half, stop_gradient=False)
    upper = self._logits_cumulative(values + half, stop_gradient=False)
    # Flip signs if we can move more towards the left tail of the sigmoid.
    sign = -math_ops.sign(math_ops.add_n([lower, upper]))
    sign = array_ops.stop_gradient(sign)
    likelihood = abs(
        math_ops.sigmoid(sign * upper) - math_ops.sigmoid(sign * lower))
    if self.likelihood_bound > 0:
      likelihood_bound = constant_op.constant(
          self.likelihood_bound, dtype=self.dtype)
      # TODO(jballe): Override gradients.
      likelihood = math_ops.maximum(likelihood, likelihood_bound)

    # Convert back to input tensor shape.
    order = list(range(1, ndim))
    order.insert(channel_axis, 0)
    values = array_ops.reshape(values, shape)
    values = array_ops.transpose(values, order)
    likelihood = array_ops.reshape(likelihood, shape)
    likelihood = array_ops.transpose(likelihood, order)

    if not context.executing_eagerly():
      values_shape, likelihood_shape = self.compute_output_shape(inputs.shape)
      values.set_shape(values_shape)
      likelihood.set_shape(likelihood_shape)

    return values, likelihood