Ejemplo n.º 1
0
 def _show_max_abs(tensor):
     output_tensor = math_ops.cast(
         math_ops.reduce_max(math_ops.abs(tensor)), dtypes.float64)
     zero = constant_op.constant(0, dtypes.float64)
     output_tensor = gen_math_ops.maximum(zero, output_tensor)
     return _print_tensor(op_name, output_idx, -1, tensor,
                          output_tensor)
Ejemplo n.º 2
0
 def GraphFn(self, x):
     dtype = x.dtype
     # scale
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r1 = x / a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r2 = a / x
     a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
     r3 = a + x
     a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
     r4 = x * a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r5 = x - a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r6 = a - x
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r7 = x - a
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r8 = a - x
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r9 = gen_math_ops.maximum(x, a)
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r10 = gen_math_ops.minimum(a, x)
     a = constant_op.constant(np.random.randn(3), dtype=dtype)
     r11 = x * a
     a = constant_op.constant(np.random.randn(1), dtype=dtype)
     r12 = a * x
     concat1 = array_ops.concat([r1, r2, r3, r4, r5, r6], axis=-1)
     concat2 = array_ops.concat([r7, r8, r9, r10, r11, r12], axis=3)
     x = array_ops.concat([concat1, concat2], axis=-1)
     return gen_array_ops.reshape(x, [2, -1], name="output_0")
Ejemplo n.º 3
0
 def decrease_loss_scale():
     new_loss_scale_value = gen_math_ops.maximum(
         1., self._loss_scale * self._decr_ratio)
     update_loss_scale = state_ops.assign(self._loss_scale,
                                          new_loss_scale_value)
     return control_flow_ops.group(update_loss_scale,
                                   self._reset_stats())
Ejemplo n.º 4
0
def saturate_cast(value, dtype, name=None):
  """Performs a safe saturating cast of `value` to `dtype`.

  This function casts the input to `dtype` without applying any scaling.  If
  there is a danger that values would over or underflow in the cast, this op
  applies the appropriate clamping before the cast.

  Args:
    value: A `Tensor`.
    dtype: The desired output `DType`.
    name: A name for the operation (optional).

  Returns:
    `value` safely cast to `dtype`.
  """
  # When casting to a type with smaller representable range, clamp.
  # Note that this covers casting to unsigned types as well.
  with ops.op_scope([value], name, "saturate_cast") as name:
    value = ops.convert_to_tensor(value, name="value")
    dtype = dtypes.as_dtype(dtype).base_dtype
    if value.dtype.min < dtype.min:
      value = gen_math_ops.maximum(value, ops.convert_to_tensor(
          dtype.min, dtype=value.dtype, name="min"))
    if value.dtype.max > dtype.max:
      value = gen_math_ops.minimum(value, ops.convert_to_tensor(
          dtype.max, dtype=value.dtype, name="max"))
    return cast(value, dtype, name=name)
Ejemplo n.º 5
0
 def decr_loss_scale():
     update_op = state_ops.assign(
         self._loss_scale,
         gen_math_ops.maximum(1.,
                              self._loss_scale * self._decr_ratio))
     # When loss_scale is updated, both good and bad steps are reset.
     return control_flow_ops.group(update_op, self._reset_stats())
Ejemplo n.º 6
0
 def _show_max_abs(tensor):
     tensor = math_ops.cast(tensor, dtypes.float32)
     output_tensor = math_ops.reduce_max(math_ops.abs(tensor))
     zero = constant_op.constant(0, dtypes.float32)
     output_tensor = gen_math_ops.maximum(zero, output_tensor)
     # The shape has to be 1. Set it if it does not have the information.
     output_tensor = array_ops.reshape(output_tensor, [1])
     return output_tensor
Ejemplo n.º 7
0
def posdef_inv_eig(tensor, identity, damping):
    """Computes inverse(tensor + damping * identity) with eigendecomposition."""
    eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(
        tensor + damping * identity)
    # TODO(GD): it's a little hacky
    eigenvalues = gen_math_ops.maximum(eigenvalues, damping)
    return math_ops.matmul(
        eigenvectors / eigenvalues, eigenvectors, transpose_b=True)
Ejemplo n.º 8
0
 def _show_max_abs(tensor):
   tensor = math_ops.cast(tensor, dtypes.float32)
   output_tensor = math_ops.reduce_max(math_ops.abs(tensor))
   zero = constant_op.constant(0, dtypes.float32)
   output_tensor = gen_math_ops.maximum(zero, output_tensor)
   # The shape has to be 1. Set it if it does not have the information.
   output_tensor = array_ops.reshape(output_tensor, [1])
   return output_tensor
Ejemplo n.º 9
0
def gen_non_linearity(A, non_linearity):
    '''
    Returns required activation for a tensor based on the inputs
    '''
    if non_linearity == "tanh":
        return math_ops.tanh(A)
    elif non_linearity == "sigmoid":
        return math_ops.sigmoid(A)
    elif non_linearity == "relu":
        return gen_math_ops.maximum(A, 0.0)
    elif non_linearity == "quantTanh":
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), -1.0)
    elif non_linearity == "quantSigm":
        A = (A + 1.0) / 2.0
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), 0.0)
    else:
        return math_ops.tanh(A)
Ejemplo n.º 10
0
def get_range_len(start, limit, delta):
  dist = ops.convert_to_tensor(limit - start)
  unadjusted_len = dist // delta
  adjustment = math_ops.cast(
      gen_math_ops.not_equal(dist % delta,
                             array_ops.zeros_like(unadjusted_len)), dist.dtype)
  final_len = unadjusted_len + adjustment
  return gen_math_ops.maximum(final_len, array_ops.zeros_like(final_len))
Ejemplo n.º 11
0
def sequence_mask(lengths, maxlen=None, dtype=dtypes.bool, name=None):
    """Returns a mask tensor representing the first N positions of each cell.
	If `lengths` has shape `[d_1, d_2, ..., d_n]` the resulting tensor `mask` has
	dtype `dtype` and shape `[d_1, d_2, ..., d_n, maxlen]`, with
	```
	mask[i_1, i_2, ..., i_n, j] = (j < lengths[i_1, i_2, ..., i_n])
	```
	Examples:
	```python
	tf.sequence_mask([1, 3, 2], 5)  # [[True, False, False, False, False],
																	#  [True, True, True, False, False],
																	#  [True, True, False, False, False]]
	tf.sequence_mask([[1, 3],[2,0]])  # [[[True, False, False],
																		#   [True, True, True]],
																		#  [[True, True, False],
																		#   [False, False, False]]]
	```
	Args:
		lengths: integer tensor, all its values <= maxlen.
		maxlen: scalar integer tensor, size of last dimension of returned tensor.
			Default is the maximum value in `lengths`.
		dtype: output type of the resulting tensor.
		name: name of the op.
	Returns:
		A mask tensor of shape `lengths.shape + (maxlen,)`, cast to specified dtype.
	Raises:
		ValueError: if `maxlen` is not a scalar.
	"""
    with ops.name_scope(name, "SequenceMask", [lengths, maxlen]):
        lengths = ops.convert_to_tensor(lengths)

        if maxlen is None:
            maxlen = gen_math_ops._max(lengths, _all_dimensions(lengths))
            maxlen = gen_math_ops.maximum(constant(0, maxlen.dtype), maxlen)
        else:
            maxlen = ops.convert_to_tensor(maxlen)
        if maxlen.get_shape(
        ).ndims is not None and maxlen.get_shape().ndims != 0:
            raise ValueError("maxlen must be scalar for sequence_mask")

        # The basic idea is to compare a range row vector of size maxlen:
        # [0, 1, 2, 3, 4]
        # to length as a matrix with 1 column: [[1], [3], [2]].
        # Because of broadcasting on both arguments this comparison results
        # in a matrix of size (len(lengths), maxlen)
        row_vector = gen_math_ops._range(constant(0, maxlen.dtype), maxlen,
                                         constant(1, maxlen.dtype))
        # Since maxlen >= max(lengths), it is safe to use maxlen as a cast
        # authoritative type. Whenever maxlen fits into tf.int32, so do the lengths.
        matrix = gen_math_ops.cast(expand_dims(lengths, -1), maxlen.dtype)
        result = row_vector < matrix

        if dtype is None or result.dtype.base_dtype == dtype.base_dtype:
            return result
        else:
            return gen_math_ops.cast(result, dtype)
Ejemplo n.º 12
0
 def loss_op(self, targets, prediction_ops):
   """Create loss_op."""
   prediction = prediction_ops["mean"]
   covariance = prediction_ops["covariance"]
   # Normal data log probability.
   sigma = math_ops.sqrt(gen_math_ops.maximum(covariance, 1e-5))
   log_prob1 = math_utils.normal_log_prob(targets, sigma, prediction)
   log_prob1 += math_ops.log(1 - self._anomaly_prior_probability)
   # Anomaly log probability.
   log_prob2 = self._anomaly_log_prob(targets, prediction_ops)
   log_prob2 += math_ops.log(self._anomaly_prior_probability)
   # We need to compute log(exp(log_prob1) + exp(log_prob2). For numerical
   # stability, we rewrite the expression as below.
   p1 = gen_math_ops.minimum(log_prob1, log_prob2)
   p2 = gen_math_ops.maximum(log_prob1, log_prob2)
   mixed_log_prob = p2 + math_ops.log(1 + gen_math_ops.exp(p1 - p2))
   loss_op = -math_ops.reduce_sum(mixed_log_prob)
   loss_op /= math_ops.cast(
       math_ops.reduce_prod(array_ops.shape(targets)), self.dtype)
   return loss_op
Ejemplo n.º 13
0
 def loss_op(self, targets, prediction_ops):
   """Create loss_op."""
   prediction = prediction_ops["mean"]
   covariance = prediction_ops["covariance"]
   # Normal data log probability.
   sigma = math_ops.sqrt(gen_math_ops.maximum(covariance, 1e-5))
   log_prob1 = math_utils.normal_log_prob(targets, sigma, prediction)
   log_prob1 += math_ops.log(1 - self._anomaly_prior_probability)
   # Anomaly log probability.
   log_prob2 = self._anomaly_log_prob(targets, prediction_ops)
   log_prob2 += math_ops.log(self._anomaly_prior_probability)
   # We need to compute log(exp(log_prob1) + exp(log_prob2). For numerical
   # stability, we rewrite the expression as below.
   p1 = gen_math_ops.minimum(log_prob1, log_prob2)
   p2 = gen_math_ops.maximum(log_prob1, log_prob2)
   mixed_log_prob = p2 + math_ops.log(1 + gen_math_ops.exp(p1 - p2))
   loss_op = -math_ops.reduce_sum(mixed_log_prob)
   loss_op /= math_ops.cast(
       math_ops.reduce_prod(array_ops.shape(targets)), self.dtype)
   return loss_op
Ejemplo n.º 14
0
 def _anomaly_log_prob(self, targets, prediction_ops):
   prediction = prediction_ops["mean"]
   if self._anomaly_distribution == AnomalyMixtureARModel.GAUSSIAN_ANOMALY:
     anomaly_variance = prediction_ops["anomaly_params"]
     anomaly_sigma = math_ops.sqrt(
         gen_math_ops.maximum(anomaly_variance, 1e-5))
     log_prob = math_utils.normal_log_prob(targets, anomaly_sigma, prediction)
   else:
     assert self._anomaly_distribution == AnomalyMixtureARModel.CAUCHY_ANOMALY
     anomaly_scale = prediction_ops["anomaly_params"]
     log_prob = math_utils.cauchy_log_prob(targets, anomaly_scale, prediction)
   return log_prob
Ejemplo n.º 15
0
 def _anomaly_log_prob(self, targets, prediction_ops):
   prediction = prediction_ops["mean"]
   if self._anomaly_distribution == AnomalyMixtureARModel.GAUSSIAN_ANOMALY:
     anomaly_variance = prediction_ops["anomaly_params"]
     anomaly_sigma = math_ops.sqrt(
         gen_math_ops.maximum(anomaly_variance, 1e-5))
     log_prob = math_utils.normal_log_prob(targets, anomaly_sigma, prediction)
   else:
     assert self._anomaly_distribution == AnomalyMixtureARModel.CAUCHY_ANOMALY
     anomaly_scale = prediction_ops["anomaly_params"]
     log_prob = math_utils.cauchy_log_prob(targets, anomaly_scale, prediction)
   return log_prob
Ejemplo n.º 16
0
def gen_non_linearity(A, non_linearity):
    if non_linearity == "tanh":
        return math_ops.tanh(A)
    elif non_linearity == "sigmoid":
        return math_ops.sigmoid(A)
    elif non_linearity == "relu":
        return gen_math_ops.maximum(A, 0.0)
    elif non_linearity == "quantTanh":
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), -1.0)
    elif non_linearity == "quantSigm":
        A = (A + 1.0) / 2.0
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), 0.0)
    elif non_linearity == "quantSigm4":
        A = (A + 2.0) / 4.0
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), 0.0)
    else:
        # non_linearity is a user specified function
        if not callable(non_linearity):
            raise ValueError("non_linearity is either a callable or a value " +
                             + "['tanh', 'sigmoid', 'relu', 'quantTanh', " +
                             "'quantSigm'")
        return non_linearity(A)
Ejemplo n.º 17
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = []

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )

        with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
            t = math_ops.cast(self.iterations, K.floatx())
        lr_t = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))

        lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t))
        if self.sgdcorr:
            m_rate = 1. - self.beta_1 / (self.gamma * t + 1.)
        else:
            m_rate = 1. - self.beta_1

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + m_rate * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
            if self.amsgrad:
                vhat_t = math_ops.maximum(vhat, v_t)
                lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(vhat_t) + self.epsilon)
                self.updates.append(state_ops.assign(vhat, vhat_t))
            else:
                lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(v_t) + self.epsilon)

            lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_t * lr_v, lower_bound), upper_bound)
            p_t = p - lr * lr_bound * m_t

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))
            
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Ejemplo n.º 18
0
 def loss_op(self, targets, prediction_ops):
   """Create loss_op."""
   prediction = prediction_ops["mean"]
   if self.loss == ARModel.NORMAL_LIKELIHOOD_LOSS:
     covariance = prediction_ops["covariance"]
     sigma = math_ops.sqrt(gen_math_ops.maximum(covariance, 1e-5))
     normal = distributions.Normal(loc=targets, scale=sigma)
     loss_op = -math_ops.reduce_sum(normal.log_prob(prediction))
   else:
     assert self.loss == ARModel.SQUARED_LOSS, self.loss
     loss_op = math_ops.reduce_sum(math_ops.square(prediction - targets))
   loss_op /= math_ops.cast(
       math_ops.reduce_prod(array_ops.shape(targets)), loss_op.dtype)
   return loss_op
Ejemplo n.º 19
0
def _unsorted_segment_N(data, segment_ids, num_segments):
    """ Helper function for unsorted_segment_mean/_sqrtN. Computes the number
      of segment entries with 0-entries set to 1 to allow division by N.
  """
    # bincount doesn't support negative indices so we use unsorted_segment_sum
    segment_ids_shape = array_ops.shape_internal(segment_ids)
    ones_tensor = array_ops.ones(segment_ids_shape, dtype=data.dtype)
    N = gen_math_ops.unsorted_segment_sum(ones_tensor, segment_ids,
                                          num_segments)
    # add dimensions for all non-reduced axes
    ndims_output = data.shape.ndims - segment_ids.shape.ndims
    broadcast_shape = [num_segments] + [1] * ndims_output
    N = array_ops.reshape(N, broadcast_shape)
    return gen_math_ops.maximum(N, 1)
Ejemplo n.º 20
0
 def loss_op(self, targets, prediction_ops):
   """Create loss_op."""
   prediction = prediction_ops["mean"]
   if self.loss == ARModel.NORMAL_LIKELIHOOD_LOSS:
     covariance = prediction_ops["covariance"]
     sigma = math_ops.sqrt(gen_math_ops.maximum(covariance, 1e-5))
     normal = distributions.normal.Normal(loc=targets, scale=sigma)
     loss_op = -math_ops.reduce_sum(normal.log_prob(prediction))
   else:
     assert self.loss == ARModel.SQUARED_LOSS, self.loss
     loss_op = math_ops.reduce_sum(math_ops.square(prediction - targets))
   loss_op /= math_ops.cast(
       math_ops.reduce_prod(array_ops.shape(targets)), loss_op.dtype)
   return loss_op
Ejemplo n.º 21
0
    def __sample_w_rej(self, n, seed):
        c = math_ops.sqrt((4 * (self.scale**2)) + (self.__mf - 1)**2)
        b_true = (-2 * self.scale + c) / (self.__mf - 1)

        # using Taylor approximation with a smooth swift from 10 < scale < 11
        # to avoid numerical errors for large scale
        b_app = (self.__mf - 1) / (4 * self.scale)
        s = gen_math_ops.minimum(gen_math_ops.maximum(0., self.scale - 10), 1.)
        b = b_app * s + b_true * (1 - s)

        a = (self.__mf - 1 + 2 * self.scale + c) / 4
        d = (4 * a * b) / (1 + b) - (self.__mf - 1) * math_ops.log(self.__mf -
                                                                   1)

        self.__b, (self.__e, self.__w) = b, self.__while_loop(b, a, d, n, seed)
        return self.__w
Ejemplo n.º 22
0
 def GetParams(self):
     """Testing Concatenation in TF-TRT conversion."""
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [2, 3, 3, 1]
     output_name = "output"
     g = ops.Graph()
     with g.as_default():
         x = array_ops.placeholder(dtype=dtype,
                                   shape=input_dims,
                                   name=input_name)
         # scale
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r1 = x / a
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r2 = a / x
         a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
         r3 = a + x
         a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
         r4 = x * a
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r5 = x - a
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r6 = a - x
         a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
         r7 = x - a
         a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
         r8 = a - x
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r9 = gen_math_ops.maximum(x, a)
         a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
         r10 = gen_math_ops.minimum(a, x)
         a = constant_op.constant(np.random.randn(3), dtype=dtype)
         r11 = x * a
         a = constant_op.constant(np.random.randn(1), dtype=dtype)
         r12 = a * x
         concat1 = array_ops.concat([r1, r2, r3, r4, r5, r6], axis=-1)
         concat2 = array_ops.concat([r7, r8, r9, r10, r11, r12], axis=3)
         x = array_ops.concat([concat1, concat2], axis=-1)
         gen_array_ops.reshape(x, [2, -1], name=output_name)
     return trt_test.TfTrtIntegrationTestParams(gdef=g.as_graph_def(),
                                                input_names=[input_name],
                                                input_dims=[input_dims],
                                                output_names=[output_name],
                                                expected_output_dims=[
                                                    (2, 126)
                                                ])
Ejemplo n.º 23
0
 def _anomaly_log_prob(self, targets, prediction_ops):
     prediction = prediction_ops["mean"]
     if self._anomaly_distribution == AnomalyMixtureARModel.GAUSSIAN_ANOMALY:
         anomaly_variance = prediction_ops["anomaly_params"]
         anomaly_sigma = math_ops.sqrt(
             gen_math_ops.maximum(anomaly_variance, 1e-5))
         normal = distributions.Normal(loc=targets, scale=anomaly_sigma)
         log_prob = normal.log_prob(prediction)
     else:
         assert self._anomaly_distribution == AnomalyMixtureARModel.CAUCHY_ANOMALY
         anomaly_scale = prediction_ops["anomaly_params"]
         cauchy = distributions.StudentT(df=array_ops.ones(
             [], dtype=anomaly_scale.dtype),
                                         loc=targets,
                                         scale=anomaly_scale)
         log_prob = cauchy.log_prob(prediction)
     return log_prob
Ejemplo n.º 24
0
 def _anomaly_log_prob(self, targets, prediction_ops):
   prediction = prediction_ops["mean"]
   if self._anomaly_distribution == AnomalyMixtureARModel.GAUSSIAN_ANOMALY:
     anomaly_variance = prediction_ops["anomaly_params"]
     anomaly_sigma = math_ops.sqrt(
         gen_math_ops.maximum(anomaly_variance, 1e-5))
     normal = distributions.Normal(loc=targets, scale=anomaly_sigma)
     log_prob = normal.log_prob(prediction)
   else:
     assert self._anomaly_distribution == AnomalyMixtureARModel.CAUCHY_ANOMALY
     anomaly_scale = prediction_ops["anomaly_params"]
     cauchy = distributions.StudentT(
         df=array_ops.ones([], dtype=anomaly_scale.dtype),
         loc=targets,
         scale=anomaly_scale)
     log_prob = cauchy.log_prob(prediction)
   return log_prob
Ejemplo n.º 25
0
 def GetParams(self):
   """Testing Concatenation in TF-TRT conversion."""
   dtype = dtypes.float32
   input_name = "input"
   input_dims = [2, 3, 3, 1]
   g = ops.Graph()
   with g.as_default():
     x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
     # scale
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r1 = x / a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r2 = a / x
     a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
     r3 = a + x
     a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
     r4 = x * a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r5 = x - a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r6 = a - x
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r7 = x - a
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r8 = a - x
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r9 = gen_math_ops.maximum(x, a)
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r10 = gen_math_ops.minimum(a, x)
     a = constant_op.constant(np.random.randn(3), dtype=dtype)
     r11 = x * a
     a = constant_op.constant(np.random.randn(1), dtype=dtype)
     r12 = a * x
     concat1 = array_ops.concat([r1, r2, r3, r4, r5, r6], axis=-1)
     concat2 = array_ops.concat([r7, r8, r9, r10, r11, r12], axis=3)
     x = array_ops.concat([concat1, concat2], axis=-1)
     gen_array_ops.reshape(x, [2, -1], name=self.output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       expected_engines=["my_trt_op_0"],
       expected_output_dims=(2, 126),
       allclose_atol=1.e-03,
       allclose_rtol=1.e-03)
Ejemplo n.º 26
0
 def _apply_dense(self, grad, var):
   # bias-corrected learning rate
   lr = self._lr_t * math_ops.sqrt(1. - self._beta2_power) / (1. - self._beta1_power)
   first_mom = self.get_slot(var, "first_mom")
   second_mom = self.get_slot(var, "second_mom")
   second_mom_max = self.get_slot(var, "second_mom_max")
   first_update = first_mom.assign(self._beta1_t * first_mom +
                                   self._one_minus_beta1 * grad,
                                   use_locking=self._use_locking)
   second_update = second_mom.assign(self._beta2_t * second_mom +
                                     self._one_minus_beta2 * math_ops.square(grad),
                                     use_locking=self._use_locking)
   # AMSGrad compared to ADAM
   second_max_update = second_mom_max.assign(gen_math_ops.maximum(second_mom_max,
                                                                  second_update))
   var_update = var.assign_sub(lr * first_update / (math_ops.sqrt(second_max_update) +
                                                    self._epsilon_t),
                               use_locking=self._use_locking)
   return control_flow_ops.group(*[var_update, first_update,
                                   second_update, second_max_update])
Ejemplo n.º 27
0
def clip_covariance(covariance_matrix, maximum_variance_ratio,
                    minimum_variance):
    """Enforce constraints on a covariance matrix to improve numerical stability.

  Args:
    covariance_matrix: A [..., N, N] batch of covariance matrices.
    maximum_variance_ratio: The maximum allowed ratio of two diagonal
      entries. Any entries lower than the maximum entry divided by this ratio
      will be set to that value.
    minimum_variance: A floor for diagonal entries in the returned matrix.
  Returns:
    A new covariance matrix with the requested constraints enforced. If the
    input was positive definite, the output will be too.
  """
    # TODO(allenl): Smarter scaling here so that correlations are preserved when
    # fiddling with diagonal elements.
    diagonal = array_ops.matrix_diag_part(covariance_matrix)
    maximum = math_ops.reduce_max(diagonal, axis=-1, keep_dims=True)
    new_diagonal = gen_math_ops.maximum(diagonal,
                                        maximum / maximum_variance_ratio)
    return array_ops.matrix_set_diag(
        covariance_matrix, math_ops.maximum(new_diagonal, minimum_variance))
def posdef_inv_eig(tensor, identity, damping):
    """Computes inverse(tensor + damping * identity) with eigendecomposition."""
    # # this works
    # with tf.device('/cpu:0'):
    #     eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(
    #         tensor + damping * identity)

    # # this doesn't work
    # eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(
    #     tensor + damping * identity)

    # this works
    eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(
        tf.to_double(tensor + damping * identity))
    eigenvalues, eigenvectors = tf.to_float(eigenvalues), tf.to_float(
        eigenvectors)

    # TODO(GD): it's a little hacky
    eigenvalues = gen_math_ops.maximum(eigenvalues, damping)
    return math_ops.matmul(eigenvectors / eigenvalues,
                           eigenvectors,
                           transpose_b=True)
Ejemplo n.º 29
0
def clip_covariance(
    covariance_matrix, maximum_variance_ratio, minimum_variance):
  """Enforce constraints on a covariance matrix to improve numerical stability.

  Args:
    covariance_matrix: A [..., N, N] batch of covariance matrices.
    maximum_variance_ratio: The maximum allowed ratio of two diagonal
      entries. Any entries lower than the maximum entry divided by this ratio
      will be set to that value.
    minimum_variance: A floor for diagonal entries in the returned matrix.
  Returns:
    A new covariance matrix with the requested constraints enforced. If the
    input was positive definite, the output will be too.
  """
  # TODO(allenl): Smarter scaling here so that correlations are preserved when
  # fiddling with diagonal elements.
  diagonal = array_ops.matrix_diag_part(covariance_matrix)
  maximum = math_ops.reduce_max(diagonal, axis=-1, keep_dims=True)
  new_diagonal = gen_math_ops.maximum(
      diagonal, maximum / maximum_variance_ratio)
  return array_ops.matrix_set_diag(
      covariance_matrix, math_ops.maximum(new_diagonal, minimum_variance))
Ejemplo n.º 30
0
def repeat_with_axis(data, repeats, axis, name=None):
  """Repeats elements of `data`.
  Args:
    data: An `N`-dimensional tensor.
    repeats: A 1-D integer tensor specifying how many times each element in
      `axis` should be repeated.  `len(repeats)` must equal `data.shape[axis]`.
      Supports broadcasting from a scalar value.
    axis: `int`.  The axis along which to repeat values.  Must be less than
      `max(N, 1)`.
    name: A name for the operation.
  Returns:
    A tensor with `max(N, 1)` dimensions.  Has the same shape as `data`,
    except that dimension `axis` has size `sum(repeats)`.
  #### Examples:
    ```python
    >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0)
    ['a', 'a', 'a', 'c', 'c']
    >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0)
    [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]]
    >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1)
    [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]]
    ```
  """
  if not isinstance(axis, int):
    raise TypeError("axis must be an int; got %s" % type(axis).__name__)

  with ops.name_scope(name, "Repeat", [data, repeats]):
    data = ops.convert_to_tensor(data, name="data")
    repeats = convert_to_int_tensor(repeats, name="repeats")
    repeats.shape.with_rank_at_most(1)

    # If `data` is a scalar, then upgrade it to a vector.
    data = _with_nonzero_rank(data)
    data_shape = shape(data)

    # If `axis` is negative, then convert it to a positive value.
    axis = get_positive_axis(axis, data.shape.ndims)

    # Check data Tensor shapes.
    if repeats.shape.ndims == 1:
      data.shape.dims[axis].assert_is_compatible_with(repeats.shape[0])

    # If we know that `repeats` is a scalar, then we can just tile & reshape.
    if repeats.shape.ndims == 0:
      expanded = expand_dims(data, axis + 1)
      tiled = tile_one_dimension(expanded, axis + 1, repeats)
      result_shape = concat([data_shape[:axis], [-1], data_shape[axis + 1:]],
                            axis=0)
      return tf.reshape(tiled, result_shape)

    # Broadcast the `repeats` tensor so rank(repeats) == axis + 1.
    if repeats.shape.ndims != axis + 1:
      repeats_shape = shape(repeats)
      repeats_ndims = rank(repeats)
      broadcast_shape = concat(
          [data_shape[:axis + 1 - repeats_ndims], repeats_shape], axis=0)
      repeats = broadcast_to(repeats, broadcast_shape)
      repeats.set_shape([None] * (axis + 1))

    # Create a "sequence mask" based on `repeats`, where slices across `axis`
    # contain one `True` value for each repetition.  E.g., if
    # `repeats = [3, 1, 2]`, then `mask = [[1, 1, 1], [1, 0, 0], [1, 1, 0]]`.
    max_repeat = gen_math_ops.maximum(
        0, gen_math_ops._max(repeats, _all_dimensions(repeats)))
    mask = tf.sequence_mask(repeats, max_repeat)

    # Add a new dimension around each value that needs to be repeated, and
    # then tile that new dimension to match the maximum number of repetitions.
    expanded = expand_dims(data, axis + 1)
    tiled = tile_one_dimension(expanded, axis + 1, max_repeat)

    # Use `boolean_mask` to discard the extra repeated values.  This also
    # flattens all dimensions up through `axis`.
    masked = tf.boolean_mask(tiled, mask)

    # Reshape the output tensor to add the outer dimensions back.
    if axis == 0:
      result = masked
    else:
      result_shape = concat([data_shape[:axis], [-1], data_shape[axis + 1:]],
                            axis=0)
      result = tf.reshape(masked, result_shape)

    # Preserve shape information.
    if data.shape.ndims is not None:
      new_axis_size = 0 if repeats.shape[0] == 0 else None
      result.set_shape(data.shape[:axis].concatenate(
          [new_axis_size]).concatenate(data.shape[axis + 1:]))

    return result
Ejemplo n.º 31
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )

        t = math_ops.cast(self.iterations, K.floatx()) + 1

        lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t))
        if self.sgdcorr:
            m_rate = 1. - self.beta_1 / (self.gamma * t + 1.)
        else:
            m_rate = 1. - self.beta_1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (
            1. - 0.5 *
            (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (
            1. - 0.5 *
            (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]

        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + m_rate * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g)
            if self.amsgrad:
                vhat_t = math_ops.maximum(vhat, v_t)
                self.updates.append(state_ops.assign(vhat, vhat_t))
                v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t))
            else:
                v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t))
            m_t_bar = (m_rate / (1.-self.beta_1)) * (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime
            beta_1_reduce = 1. - math_ops.pow(self.beta_1, t)
            lr_v = gen_math_ops.reciprocal((gen_math_ops.sqrt(v_t_prime) + self.epsilon) * beta_1_reduce)

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))

            lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_v, lower_bound), upper_bound)
            p_t = p - lr * lr_bound * beta_1_reduce * m_t_bar
            
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Ejemplo n.º 32
0
 def decr_loss_scale():
   update_op = state_ops.assign(
       self._loss_scale,
       gen_math_ops.maximum(1., self._loss_scale * self._decr_ratio))
   # When loss_scale is updated, both good and bad steps are reset.
   return control_flow_ops.group(update_op, self._reset_stats())
Ejemplo n.º 33
0
 def _update_statistics_from_mini_batch(
     self, statistics, auxiliary_variables, times, values):
   """Given mini-batch input, update `statistics` and `auxiliary_variables`."""
   values = math_ops.cast(values, self._dtype)
   # The density (measured in times per observation) that we see in each part
   # of the mini-batch.
   batch_inter_observation_duration = (math_ops.cast(
       math_ops.reduce_max(times, axis=1) - math_ops.reduce_min(times, axis=1),
       self._dtype) / math_ops.cast(
           array_ops.shape(times)[1] - 1, self._dtype))
   # Co-locate updates with their variables to minimize race conditions when
   # updating statistics.
   with ops.colocate_with(auxiliary_variables.max_time_seen):
     # There is a race condition if this value is being updated from multiple
     # workers. However, it should eventually reach the correct value if the
     # last chunk is presented enough times.
     max_time_seen_assign = state_ops.assign(
         auxiliary_variables.max_time_seen,
         gen_math_ops.maximum(auxiliary_variables.max_time_seen,
                              math_ops.reduce_max(times)))
   with ops.colocate_with(auxiliary_variables.chunk_count):
     chunk_count_assign = state_ops.assign_add(auxiliary_variables.chunk_count,
                                               array_ops.shape(
                                                   times,
                                                   out_type=dtypes.int64)[0])
   with ops.colocate_with(auxiliary_variables.inter_observation_duration_sum):
     inter_observation_duration_assign = state_ops.assign_add(
         auxiliary_variables.inter_observation_duration_sum,
         math_ops.reduce_sum(batch_inter_observation_duration))
   with ops.colocate_with(auxiliary_variables.example_count):
     example_count_assign = state_ops.assign_add(
         auxiliary_variables.example_count,
         array_ops.size(times, out_type=dtypes.int64))
   # Note: These mean/variance updates assume that all points are equally
   # likely, which is not true if _chunks_ are sampled uniformly from the space
   # of all possible contiguous chunks, since points at the start and end of
   # the series are then members of fewer chunks. For series which are much
   # longer than the chunk size (the usual/expected case), this effect becomes
   # irrelevant.
   with ops.colocate_with(auxiliary_variables.overall_feature_sum):
     overall_feature_sum_assign = state_ops.assign_add(
         auxiliary_variables.overall_feature_sum,
         math_ops.reduce_sum(values, axis=[0, 1]))
   with ops.colocate_with(auxiliary_variables.overall_feature_sum_of_squares):
     overall_feature_sum_of_squares_assign = state_ops.assign_add(
         auxiliary_variables.overall_feature_sum_of_squares,
         math_ops.reduce_sum(values**2, axis=[0, 1]))
   per_chunk_aux_updates = control_flow_ops.group(
       max_time_seen_assign, chunk_count_assign,
       inter_observation_duration_assign, example_count_assign,
       overall_feature_sum_assign, overall_feature_sum_of_squares_assign)
   with ops.control_dependencies([per_chunk_aux_updates]):
     example_count_float = math_ops.cast(auxiliary_variables.example_count,
                                         self._dtype)
     new_feature_mean = (auxiliary_variables.overall_feature_sum /
                         example_count_float)
     overall_feature_mean_update = state_ops.assign(
         statistics.overall_feature_moments.mean, new_feature_mean)
     overall_feature_var_update = state_ops.assign(
         statistics.overall_feature_moments.variance,
         # De-biased n / (n - 1) variance correction
         example_count_float / (example_count_float - 1.) *
         (auxiliary_variables.overall_feature_sum_of_squares /
          example_count_float - new_feature_mean**2))
     # TODO(b/35675805): Remove this cast
     min_time_batch = math_ops.cast(math_ops.argmin(times[:, 0]), dtypes.int32)
     def series_start_updates():
       # If this is the lowest-time chunk that we have seen so far, update
       # series start moments to reflect that. Note that these statistics are
       # "best effort", as there are race conditions in the update (however,
       # they should eventually converge if the start of the series is
       # presented enough times).
       mean, variance = nn.moments(
           values[min_time_batch, :self._starting_variance_window_size],
           axes=[0])
       return control_flow_ops.group(
           state_ops.assign(statistics.series_start_moments.mean, mean),
           state_ops.assign(statistics.series_start_moments.variance,
                            variance))
     with ops.colocate_with(statistics.start_time):
       series_start_update = control_flow_ops.cond(
           # Update moments whenever we even match the lowest time seen so far,
           # to ensure that series start statistics are eventually updated to
           # their correct values, despite race conditions (i.e. eventually
           # statistics.start_time will reflect the global lowest time, and
           # given that we will eventually update the series start moments to
           # their correct values).
           math_ops.less_equal(times[min_time_batch, 0],
                               statistics.start_time),
           series_start_updates,
           control_flow_ops.no_op)
       with ops.control_dependencies([series_start_update]):
         # There is a race condition if this update is performed in parallel on
         # multiple workers. Since models may be sensitive to being presented
         # with times before the putative start time, the value of this
         # variable is post-processed above to guarantee that each worker is
         # presented with a start time which is at least as low as the lowest
         # time in its current mini-batch.
         start_time_update = state_ops.assign(statistics.start_time,
                                              gen_math_ops.minimum(
                                                  statistics.start_time,
                                                  math_ops.reduce_min(times)))
     inter_observation_duration_estimate = (
         auxiliary_variables.inter_observation_duration_sum / math_ops.cast(
             auxiliary_variables.chunk_count, self._dtype))
     # Estimate the total number of observations as:
     #   (end time - start time + 1) * average intra-chunk time density
     total_observation_count_update = state_ops.assign(
         statistics.total_observation_count,
         math_ops.cast(
             gen_math_ops.round(
                 math_ops.cast(auxiliary_variables.max_time_seen -
                               statistics.start_time + 1, self._dtype) /
                 inter_observation_duration_estimate), dtypes.int64))
     per_chunk_stat_updates = control_flow_ops.group(
         overall_feature_mean_update, overall_feature_var_update,
         series_start_update, start_time_update,
         total_observation_count_update)
   return per_chunk_stat_updates
Ejemplo n.º 34
0
def bincount(arr,
             weights=None,
             minlength=None,
             maxlength=None,
             dtype=dtypes.int32,
             name=None,
             axis=None,
             binary_output=False):
    """Counts the number of occurrences of each value in an integer array.

  If `minlength` and `maxlength` are not given, returns a vector with length
  `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise.
  If `weights` are non-None, then index `i` of the output stores the sum of the
  value in `weights` at each index where the corresponding value in `arr` is
  `i`.

  ```python
  values = tf.constant([1,1,2,3,2,4,4,5])
  tf.math.bincount(values) #[0 2 2 1 2 1]
  ```
  Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6
                  will be the vector length.

  Each bin value in the output indicates number of occurrences of the particular
  index. Here, index 1 in output has a value 2. This indicates value 1 occurs
  two times in `values`.

  ```python
  values = tf.constant([1,1,2,3,2,4,4,5])
  weights = tf.constant([1,5,0,1,0,5,4,5])
  tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5]
  ```
  Bin will be incremented by the corresponding weight instead of 1.
  Here, index 1 in output has a value 6. This is the summation of weights
  corresponding to the value in `values`.

  **Bin-counting on a certain axis**

  This example takes a 2 dimensional input and returns a `Tensor` with
  bincounting on each sample.

  >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
  >>> tf.math.bincount(data, axis=-1)
  <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
    array([[1, 1, 1, 1],
           [2, 1, 1, 0]], dtype=int32)>


  **Bin-counting with binary_output**

  This example gives binary output instead of counting the occurrence.

  >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
  >>> tf.math.bincount(data, axis=-1, binary_output=True)
  <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
    array([[1, 1, 1, 1],
           [1, 1, 1, 0]], dtype=int32)>

  Args:
    arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted.
      These tensors must have a rank of 2 if `axis=-1`.
    weights: If non-None, must be the same shape as arr. For each value in
      `arr`, the bin will be incremented by the corresponding weight instead of
      1.
    minlength: If given, ensures the output has length at least `minlength`,
      padding with zeros at the end if necessary.
    maxlength: If given, skips values in `arr` that are equal or greater than
      `maxlength`, ensuring that the output has length at most `maxlength`.
    dtype: If `weights` is None, determines the type of the output bins.
    name: A name scope for the associated operations (optional).
    axis: The axis to slice over. Axes at and below `axis` will be flattened
      before bin counting. Currently, only `0`, and `-1` are supported. If None,
      all axes will be flattened (identical to passing `0`).
    binary_output: If True, this op will output 1 instead of the number of times
      a token appears (equivalent to one_hot + reduce_any instead of one_hot +
      reduce_add). Defaults to False.

  Returns:
    A vector with the same dtype as `weights` or the given `dtype`. The bin
    values.

  Raises:
    `InvalidArgumentError` if negative values are provided as an input.

  """
    name = "bincount" if name is None else name
    with ops.name_scope(name):
        # Somehow forward compatible needs to be False.
        if not binary_output and axis is None:
            arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32)
            array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
            output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * (
                math_ops.reduce_max(arr) + 1)
            if minlength is not None:
                minlength = ops.convert_to_tensor(minlength,
                                                  name="minlength",
                                                  dtype=dtypes.int32)
                output_size = gen_math_ops.maximum(minlength, output_size)
            if maxlength is not None:
                maxlength = ops.convert_to_tensor(maxlength,
                                                  name="maxlength",
                                                  dtype=dtypes.int32)
                output_size = gen_math_ops.minimum(maxlength, output_size)
            if weights is not None:
                weights = ops.convert_to_tensor(weights, name="weights")
                return gen_math_ops.unsorted_segment_sum(
                    weights, arr, output_size)
            weights = constant_op.constant([], dtype)
            arr = array_ops.reshape(arr, [-1])
            return gen_math_ops.bincount(arr, output_size, weights)

        if not isinstance(arr, sparse_tensor.SparseTensor):
            arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr,
                                                                   name="arr")
        if weights is not None:
            if not isinstance(weights, sparse_tensor.SparseTensor):
                weights = ragged_tensor.convert_to_tensor_or_ragged_tensor(
                    weights, name="weights")

        if weights is not None and binary_output:
            raise ValueError(
                "Arguments `binary_output` and `weights` are mutually "
                "exclusive. Please specify only one.")

        if not arr.dtype.is_integer:
            arr = math_ops.cast(arr, dtypes.int32)
        if axis is None:
            axis = 0

        if axis not in [0, -1]:
            raise ValueError(
                f"Unsupported value for argument axis={axis}. Only 0 and"
                " -1 are currently supported.")

        if isinstance(arr, ragged_tensor.RaggedTensor):
            array_is_nonempty = math_ops.reduce_prod(
                array_ops.shape(arr.values)) > 0
        else:
            array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
        if isinstance(arr, sparse_tensor.SparseTensor):
            output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (
                math_ops.reduce_max(arr.values) + 1)
        else:
            output_size = math_ops.cast(
                array_is_nonempty, arr.dtype) * (math_ops.reduce_max(arr) + 1)
        if minlength is not None:
            minlength = ops.convert_to_tensor(minlength,
                                              name="minlength",
                                              dtype=arr.dtype)
            output_size = gen_math_ops.maximum(minlength, output_size)
        if maxlength is not None:
            maxlength = ops.convert_to_tensor(maxlength,
                                              name="maxlength",
                                              dtype=arr.dtype)
            output_size = gen_math_ops.minimum(maxlength, output_size)

        if axis == 0:
            if isinstance(arr, sparse_tensor.SparseTensor):
                if weights is not None:
                    weights = validate_sparse_weights(arr, weights, dtype)
                arr = arr.values
            elif isinstance(arr, ragged_tensor.RaggedTensor):
                if weights is not None:
                    weights = validate_ragged_weights(arr, weights, dtype)
                arr = arr.values
            else:
                if weights is not None:
                    weights = array_ops.reshape(weights, [-1])
                arr = array_ops.reshape(arr, [-1])

        if isinstance(arr, sparse_tensor.SparseTensor):
            weights = validate_sparse_weights(arr, weights, dtype)
            return gen_math_ops.sparse_bincount(indices=arr.indices,
                                                values=arr.values,
                                                dense_shape=arr.dense_shape,
                                                size=output_size,
                                                weights=weights,
                                                binary_output=binary_output)
        elif isinstance(arr, ragged_tensor.RaggedTensor):
            weights = validate_ragged_weights(arr, weights, dtype)
            return gen_math_ops.ragged_bincount(splits=arr.row_splits,
                                                values=arr.values,
                                                size=output_size,
                                                weights=weights,
                                                binary_output=binary_output)
        else:
            weights = validate_dense_weights(arr, weights, dtype)
            return gen_math_ops.dense_bincount(input=arr,
                                               size=output_size,
                                               weights=weights,
                                               binary_output=binary_output)
Ejemplo n.º 35
0
    def call(self, inputs, state):
        """Long short-term memory cell (Neat).
    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size, num_units]`, if `state_is_tuple` has been set to
        `True`.  Otherwise, a `Tensor` shaped
        `[batch_size, 2 * num_units]`.
    Returns:
      A pair containing the new hidden state, and the new state (either a
        `LSTMStateTuple` or a concatenated state, depending on
        `state_is_tuple`).
    """
        sigmoid = math_ops.sigmoid
        zero = constant_op.constant(0, dtype=dtypes.int32)
        one = constant_op.constant(1, dtype=dtypes.int32)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state,
                                   num_or_size_splits=2,
                                   axis=one,
                                   name="c_h_-_split")

        # print("c = \n{}\nh = \n{}\n".format(c.get_shape(),h.get_shape()))
        # print("i = \n{}\n".format(inputs.get_shape()))

        input_depth = int(inputs.get_shape()[1])
        shape = int(self._kernel.get_shape()[1])
        ratio = [self._num_units * 5, self._num_units * 3]

        # print("w = \n{}\n".format(self._kernel.get_shape()))
        # W_fi [5,4] W_fh [5,28]
        W_f, W_r = array_ops.split(value=self._kernel,
                                   num_or_size_splits=ratio,
                                   axis=one,
                                   name="W-f_W-r_-_split_-kernel")
        # print("w_f = \n{}\nw_r = \n{}\n".format(W_f.get_shape(),W_r.get_shape()))

        # W_fi [1,4] W_fh [4,4]
        W_fi, W_fh = array_ops.split(
            value=W_f,
            num_or_size_splits=[input_depth, self._num_units],
            axis=zero,
            name="W-fi_W-fh_-_split_W-f")
        # print("w_fi = \n{}\nw_fh = \n{}\n".format(W_fi.get_shape(),W_fh.get_shape()))

        #print("b = \n{}\n".format(self._bias.get_shape()))

        # b_f [_num_units,] b_f [_num_units*7,]
        b_f, b_r = array_ops.split(value=self._bias,
                                   num_or_size_splits=ratio,
                                   axis=zero,
                                   name="b-f_b-r_-_split_-bias")
        # print("b_f = \n{}\nb_r = \n{}\n".format(b_f.get_shape(),b_r.get_shape()))

        # a [?,_num_units]
        sw = math_ops.add(math_ops.matmul(h, W_fh),
                          math_ops.matmul(inputs, W_fi))
        # print("a = \n{}\n".format(a.get_shape()))

        sw = nn_ops.bias_add(value=sw, bias=b_f)
        # print("a = \n{}\n".format(a.get_shape()))
        s, t, u, v, w = array_ops.split(value=sw,
                                        num_or_size_splits=5,
                                        axis=one,
                                        name="s_t_v_u_w_-_split_sw")

        # W_ri [input_depth,_num_units*7] W_rh [_num_units,_num_units*7]
        W_ri, W_rh = array_ops.split(
            value=W_r,
            num_or_size_splits=[input_depth, self._num_units],
            axis=zero,
            name="W-ri_W-rh_-_split_W-r")
        # print("w_ri = \n{}\nw_rh = \n{}\n".format(W_ri.get_shape(),W_rh.get_shape()))

        # bh [?,_num_units*7]
        xz = gen_math_ops.maximum(math_ops.matmul(h, W_rh),
                                  math_ops.matmul(inputs, W_ri))
        # print("bh = \n{}\n".format(bh.get_shape()))

        xz = nn_ops.bias_add(xz, b_r)
        # print("bh = \n{}\n".format(bh.get_shape()))

        # b,...,h [?,_num_units]
        x, y, z = array_ops.split(value=xz,
                                  num_or_size_splits=3,
                                  axis=one,
                                  name="x_y_z_-_split_xz")

        add = math_ops.add
        multiply = math_ops.multiply
        tanh = math_ops.tanh
        relu = nn_ops.relu
        identity = array_ops.identity

        #Nas cell 2
        new_c = multiply(identity(add(identity(add(c, tanh(z))), identity(y))),
                         sigmoid(add(relu(v), tanh(s))))
        new_h = tanh(
            multiply(
                identity(new_c),
                sigmoid(
                    multiply(sigmoid(add(tanh(x), tanh(w))),
                             sigmoid(add(identity(u), tanh(t)))))))

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
Ejemplo n.º 36
0
    def _conjugate_gradient(self,
                            loss,
                            z,
                            grads_and_vars,
                            cg_iter,
                            fix_first_step=False,
                            init_deltas=None):
        minus_gradient = [g for g, v in grads_and_vars]
        variables = [v for g, v in grads_and_vars]

        H_vars = [array_ops.zeros_like(g) for g in minus_gradient]
        if init_deltas is not None:
            H_vars = self._Hv(loss, z, variables, init_deltas, self._damping)

        curr_dirs = [g - b for g, b in list(zip(minus_gradient, H_vars))]
        curr_residuals = [g - b for g, b in list(zip(minus_gradient, H_vars))]
        deltas = [array_ops.zeros_like(g) for g in curr_dirs]

        deltas_history = []
        residuals_history = []
        first_alpha = 1
        for i in range(cg_iter):
            Hvs = self._Hv(loss, z, variables, curr_dirs, self._damping)

            if len(Hvs) != len(variables):
                raise ValueError("xs and Hvs must have the same length.")

            curr_residuals_flatten = [
                gen_array_ops.reshape(v, [-1]) for v in curr_residuals
            ]
            curr_dirs_flatten = [
                gen_array_ops.reshape(v, [-1]) for v in curr_dirs
            ]
            Hvs_flatten = [gen_array_ops.reshape(v, [-1]) for v in Hvs]

            curr_residuals_concat = array_ops.concat(curr_residuals_flatten, 0)
            curr_dirs_concat = array_ops.concat(curr_dirs_flatten, 0)
            Hvs_concat = array_ops.concat(Hvs_flatten, 0)
            alpha = _dot(curr_residuals_concat, curr_residuals_concat) / _dot(
                curr_dirs_concat, Hvs_concat)
            alpha = control_flow_ops.cond(
                gen_math_ops.is_finite(alpha),
                lambda: gen_math_ops.maximum(alpha, 1e-6),
                lambda: ops.convert_to_tensor(1.0))
            if i == 0 and fix_first_step:
                first_alpha = alpha
            curr_deltas = [d * (alpha / first_alpha) for d in curr_dirs]
            deltas = [d1 + d0 for d0, d1 in list(zip(curr_deltas, deltas))]
            deltas_history.append(curr_deltas)
            residuals_history.append(curr_residuals)
            new_residuals = [
                r - alpha * v for r, v in list(zip(curr_residuals, Hvs))
            ]
            new_residuals_flatten = [
                gen_array_ops.reshape(v, [-1]) for v in new_residuals
            ]
            new_residuals_concat = array_ops.concat(new_residuals_flatten, 0)

            beta = _dot(new_residuals_concat, new_residuals_concat) / _dot(
                curr_residuals_concat, curr_residuals_concat)
            beta = control_flow_ops.cond(gen_math_ops.is_finite(beta),
                                         lambda: beta,
                                         lambda: ops.convert_to_tensor(0.0))
            #beta = gen_math_ops.maximum(beta, 1e-4)
            new_dirs = [
                r + beta * d for r, d in list(zip(new_residuals, curr_dirs))
            ]
            curr_dirs = new_dirs
            curr_residuals = new_residuals

        return list(zip(deltas, variables)), deltas_history, residuals_history
 def lr_function():
   return gen_math_ops.maximum(
       ending_lr,
       starting_lr + ((ending_lr - starting_lr) * step_counter) /
       num_steps_float)
Ejemplo n.º 38
0
    def _update_statistics_from_mini_batch(self, statistics,
                                           auxiliary_variables, times, values):
        """Given mini-batch input, update `statistics` and `auxiliary_variables`."""
        values = math_ops.cast(values, self._dtype)
        # The density (measured in times per observation) that we see in each part
        # of the mini-batch.
        batch_inter_observation_duration = (
            math_ops.cast(
                math_ops.reduce_max(times, axis=1) -
                math_ops.reduce_min(times, axis=1), self._dtype) /
            math_ops.cast(array_ops.shape(times)[1] - 1, self._dtype))
        # Co-locate updates with their variables to minimize race conditions when
        # updating statistics.
        with ops.colocate_with(auxiliary_variables.max_time_seen):
            # There is a race condition if this value is being updated from multiple
            # workers. However, it should eventually reach the correct value if the
            # last chunk is presented enough times.
            max_time_seen_assign = state_ops.assign(
                auxiliary_variables.max_time_seen,
                gen_math_ops.maximum(auxiliary_variables.max_time_seen,
                                     math_ops.reduce_max(times)))
        with ops.colocate_with(auxiliary_variables.chunk_count):
            chunk_count_assign = state_ops.assign_add(
                auxiliary_variables.chunk_count,
                array_ops.shape(times, out_type=dtypes.int64)[0])
        with ops.colocate_with(
                auxiliary_variables.inter_observation_duration_sum):
            inter_observation_duration_assign = state_ops.assign_add(
                auxiliary_variables.inter_observation_duration_sum,
                math_ops.reduce_sum(batch_inter_observation_duration))
        with ops.colocate_with(auxiliary_variables.example_count):
            example_count_assign = state_ops.assign_add(
                auxiliary_variables.example_count,
                array_ops.size(times, out_type=dtypes.int64))
        # Note: These mean/variance updates assume that all points are equally
        # likely, which is not true if _chunks_ are sampled uniformly from the space
        # of all possible contiguous chunks, since points at the start and end of
        # the series are then members of fewer chunks. For series which are much
        # longer than the chunk size (the usual/expected case), this effect becomes
        # irrelevant.
        with ops.colocate_with(auxiliary_variables.overall_feature_sum):
            overall_feature_sum_assign = state_ops.assign_add(
                auxiliary_variables.overall_feature_sum,
                math_ops.reduce_sum(values, axis=[0, 1]))
        with ops.colocate_with(
                auxiliary_variables.overall_feature_sum_of_squares):
            overall_feature_sum_of_squares_assign = state_ops.assign_add(
                auxiliary_variables.overall_feature_sum_of_squares,
                math_ops.reduce_sum(values**2, axis=[0, 1]))
        per_chunk_aux_updates = control_flow_ops.group(
            max_time_seen_assign, chunk_count_assign,
            inter_observation_duration_assign, example_count_assign,
            overall_feature_sum_assign, overall_feature_sum_of_squares_assign)
        with ops.control_dependencies([per_chunk_aux_updates]):
            example_count_float = math_ops.cast(
                auxiliary_variables.example_count, self._dtype)
            new_feature_mean = (auxiliary_variables.overall_feature_sum /
                                example_count_float)
            overall_feature_mean_update = state_ops.assign(
                statistics.overall_feature_moments.mean, new_feature_mean)
            overall_feature_var_update = state_ops.assign(
                statistics.overall_feature_moments.variance,
                # De-biased n / (n - 1) variance correction
                example_count_float / (example_count_float - 1.) *
                (auxiliary_variables.overall_feature_sum_of_squares /
                 example_count_float - new_feature_mean**2))
            # TODO(b/35675805): Remove this cast
            min_time_batch = math_ops.cast(math_ops.argmin(times[:, 0]),
                                           dtypes.int32)

            def series_start_updates():
                # If this is the lowest-time chunk that we have seen so far, update
                # series start moments to reflect that. Note that these statistics are
                # "best effort", as there are race conditions in the update (however,
                # they should eventually converge if the start of the series is
                # presented enough times).
                mean, variance = nn.moments(values[
                    min_time_batch, :self._starting_variance_window_size],
                                            axes=[0])
                return control_flow_ops.group(
                    state_ops.assign(statistics.series_start_moments.mean,
                                     mean),
                    state_ops.assign(statistics.series_start_moments.variance,
                                     variance))

            with ops.colocate_with(statistics.start_time):
                series_start_update = control_flow_ops.cond(
                    # Update moments whenever we even match the lowest time seen so far,
                    # to ensure that series start statistics are eventually updated to
                    # their correct values, despite race conditions (i.e. eventually
                    # statistics.start_time will reflect the global lowest time, and
                    # given that we will eventually update the series start moments to
                    # their correct values).
                    math_ops.less_equal(times[min_time_batch, 0],
                                        statistics.start_time),
                    series_start_updates,
                    control_flow_ops.no_op)
                with ops.control_dependencies([series_start_update]):
                    # There is a race condition if this update is performed in parallel on
                    # multiple workers. Since models may be sensitive to being presented
                    # with times before the putative start time, the value of this
                    # variable is post-processed above to guarantee that each worker is
                    # presented with a start time which is at least as low as the lowest
                    # time in its current mini-batch.
                    start_time_update = state_ops.assign(
                        statistics.start_time,
                        gen_math_ops.minimum(statistics.start_time,
                                             math_ops.reduce_min(times)))
            inter_observation_duration_estimate = (
                auxiliary_variables.inter_observation_duration_sum /
                math_ops.cast(auxiliary_variables.chunk_count, self._dtype))
            # Estimate the total number of observations as:
            #   (end time - start time + 1) * average intra-chunk time density
            total_observation_count_update = state_ops.assign(
                statistics.total_observation_count,
                math_ops.cast(
                    gen_math_ops.round(
                        math_ops.cast(
                            auxiliary_variables.max_time_seen -
                            statistics.start_time + 1, self._dtype) /
                        inter_observation_duration_estimate), dtypes.int64))
            per_chunk_stat_updates = control_flow_ops.group(
                overall_feature_mean_update, overall_feature_var_update,
                series_start_update, start_time_update,
                total_observation_count_update)
        return per_chunk_stat_updates