def testAssertDivideByZero(self):
   with self.test_session() as sess:
     epsilon = ops.convert_to_tensor(1e-20)
     x = ops.convert_to_tensor(0.0)
     y = ops.convert_to_tensor(1.0)
     z = ops.convert_to_tensor(2.0)
     # assert(epsilon < y)
     # z / y
     with sess.graph.control_dependencies([
         control_flow_ops.Assert(
             math_ops.less(epsilon, y), ["Divide-by-zero"])
     ]):
       out = math_ops.div(z, y)
     self.assertAllEqual(2.0, out.eval())
     # assert(epsilon < x)
     # z / x
     #
     # This tests printing out multiple tensors
     with sess.graph.control_dependencies([
         control_flow_ops.Assert(
             math_ops.less(epsilon, x), ["Divide-by-zero", "less than x"])
     ]):
       out = math_ops.div(z, x)
     with self.assertRaisesOpError("less than x"):
       out.eval()
Example #2
0
def _safe_div(numerator, denominator, name="value"):
  """Computes a safe divide which returns 0 if the denominator is zero.

  Note that the function contains an additional conditional check that is
  necessary for avoiding situations where the loss is zero causing NaNs to
  creep into the gradient computation.

  Args:
    numerator: An arbitrary `Tensor`.
    denominator: `Tensor` whose shape matches `numerator` and whose values are
      assumed to be non-negative.
    name: An optional name for the returned op.

  Returns:
    The element-wise value of the numerator divided by the denominator.
  """
  if isinstance(denominator, float):
    if math_ops.equal(denominator, 0.0):
      return ops.convert_to_tensor(0.0, dtype=numerator.dtype)
    return math_ops.div(numerator, denominator)
  if context.in_eager_mode() and denominator._rank() == 0:  # pylint: disable=protected-access
    if math_ops.equal(denominator, 0.0):
      return ops.convert_to_tensor(0.0, dtype=numerator.dtype)
    return math_ops.div(numerator, denominator)
  return array_ops.where(
      math_ops.greater(denominator, 0),
      math_ops.div(numerator, array_ops.where(
          math_ops.equal(denominator, 0),
          array_ops.ones_like(denominator), denominator)),
      array_ops.zeros_like(numerator),
      name=name)
 def compute_best_f1_score(tp, fp, fn, name):
   precision_at_t = math_ops.div(tp, epsilon + tp + fp,
                                 name='precision_' + name)
   recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name)
   # Compute F1 score.
   f1_at_thresholds = (
       2.0 * precision_at_t * recall_at_t /
       (precision_at_t + recall_at_t + epsilon))
   return math_ops.reduce_max(f1_at_thresholds)
  def inference_graph(self, input_data, **inference_args):
    """Constructs a TF graph for evaluating a random forest.

    Args:
      input_data: A tensor or dict of string->Tensor for the input data.
                  This input_data must generate the same spec as the
                  input_data used in training_graph:  the dict must have
                  the same keys, for example, and all tensors must have
                  the same size in their first dimension.
      **inference_args: Keyword arguments to pass through to each tree.

    Returns:
      A tuple of (probabilities, tree_paths, variance), where variance
      is the variance over all the trees for regression problems only.

    Raises:
      NotImplementedError: If trying to use feature bagging with sparse
        features.
    """
    processed_dense_features, processed_sparse_features, data_spec = (
        data_ops.ParseDataTensorOrDict(input_data))

    probabilities = []
    paths = []
    for i in range(self.params.num_trees):
      with ops.device(self.variables.device_dummies[i].device):
        tree_data = processed_dense_features
        if self.params.bagged_features:
          if processed_sparse_features is not None:
            raise NotImplementedError(
                'Feature bagging not supported with sparse features.')
          tree_data = self._bag_features(i, tree_data)
        probs, path = self.trees[i].inference_graph(
            tree_data,
            data_spec,
            sparse_features=processed_sparse_features,
            **inference_args)
        probabilities.append(probs)
        paths.append(path)
    with ops.device(self.variables.device_dummies[0].device):
      # shape of all_predict should be [batch_size, num_trees, num_outputs]
      all_predict = array_ops.stack(probabilities, axis=1)
      average_values = math_ops.div(
          math_ops.reduce_sum(all_predict, 1),
          self.params.num_trees,
          name='probabilities')
      tree_paths = array_ops.stack(paths, axis=1)
      regression_variance = None
      if self.params.regression:
        expected_squares = math_ops.div(
            math_ops.reduce_sum(all_predict * all_predict, 1),
            self.params.num_trees)
        regression_variance = math_ops.maximum(
            0., expected_squares - average_values * average_values)
      return average_values, tree_paths, regression_variance
def _DivGrad(op, grad):
  """The gradient for the Div operator."""
  x = op.inputs[0]
  y = op.inputs[1]
  sx = array_ops.shape(x)
  sy = array_ops.shape(y)
  rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
  x = math_ops.conj(x)
  y = math_ops.conj(y)
  return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx),
          array_ops.reshape(
              math_ops.reduce_sum(grad * math_ops.div(math_ops.div(-x, y), y),
                                  ry), sy))
Example #6
0
  def _update_mask(self, weights, threshold):
    """Updates the mask for a given weight tensor.

    This functions first computes the cdf of the weight tensor, and estimates
    the threshold value such that 'desired_sparsity' fraction of weights
    have magnitude less than the threshold.

    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold

    Returns:
      new_threshold: The new value of the threshold based on weights, and
        sparsity at the current global_step
      new_mask: A numpy array of the same size and shape as weights containing
        0 or 1 to indicate which of the values in weights falls below
        the threshold

    Raises:
      ValueError: if sparsity is not defined
    """
    if self._sparsity is None:
      raise ValueError('Sparsity variable undefined')

    with ops.name_scope(weights.op.name + '_pruning_ops'):
      abs_weights = math_ops.abs(weights)
      max_value = math_ops.reduce_max(abs_weights)
      histogram = _histogram(
          abs_weights, [0.0, max_value],
          nbins=self._spec.nbins,
          dtype=np.float32)

      cdf = math_ops.cumsum(histogram)
      norm_cdf = math_ops.div(cdf, math_ops.reduce_sum(histogram))
      current_threshold = math_ops.multiply(
          math_ops.div(
              math_ops.reduce_sum(
                  math_ops.cast(
                      math_ops.less(norm_cdf, self._sparsity), np.float32)),
              float(self._spec.nbins)), max_value)

      smoothed_threshold = math_ops.add_n([
          math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay),
          math_ops.multiply(threshold, self._spec.threshold_decay)
      ])
      new_mask = math_ops.cast(
          math_ops.greater(abs_weights, smoothed_threshold), np.float32)
    return smoothed_threshold, new_mask
Example #7
0
def _DivGrad(op, grad):
  """The gradient for the Div operator."""
  x = op.inputs[0]
  y = op.inputs[1]
  sx = array_ops.shape(x)
  sy = array_ops.shape(y)
  # pylint: disable=protected-access
  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
  # pylint: enable=protected-access
  x = math_ops.conj(x)
  y = math_ops.conj(y)
  return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx),
          array_ops.reshape(math_ops.reduce_sum(
              grad * math_ops.div(-x, math_ops.square(y)), ry), sy))
Example #8
0
  def GetParams(self):
    """Create a graph containing multiple segment."""
    # TODO(aaroey): test graph with different dtypes.
    dtype = dtypes.float32
    input_name = "input"
    input_dims = [100, 24, 24, 2]
    g = ops.Graph()
    with g.as_default():
      inp = array_ops.placeholder(
          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
      with g.device("/GPU:0"):
        conv_filter = constant_op.constant(
            [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]],
            name="weights",
            dtype=dtype)
        conv = nn.conv2d(
            input=inp,
            filter=conv_filter,
            strides=[1, 2, 2, 1],
            padding="SAME",
            name="conv")
        c1 = constant_op.constant(
            np.random.randn(input_dims[0], 12, 12, 6), dtype=dtype, name="c1")
        p = math_ops.mul(conv, c1, name="mul")
        c2 = constant_op.constant(
            np.random.randn(input_dims[0], 12, 12, 6), dtype=dtype, name="c2")
        q = math_ops.div(conv, c2, name="div")

        edge = self.trt_incompatible_op(q, name="incompatible")
        edge = math_ops.div(edge, edge, name="div1")
        r = math_ops.add(edge, edge, name="add")

        p = math_ops.sub(p, edge, name="sub")
        q = math_ops.mul(q, edge, name="mul1")
        s = math_ops.add(p, q, name="add1")
        s = math_ops.sub(s, r, name="sub1")
      array_ops.squeeze(s, name=self.output_name)
    return trt_test.TfTrtIntegrationTestParams(
        gdef=g.as_graph_def(),
        input_names=[input_name],
        input_dims=[input_dims],
        # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which
        # breaks the connection check, fix it.
        # - my_trt_op_0 should have ["mul", "sub", "div1", "mul1", "add1",
        #   "add", "sub1"];
        # - my_trt_op_1 should have ["weights","conv", "div"]
        expected_engines=["my_trt_op_0", "my_trt_op_1"],
        expected_output_dims=(100, 12, 12, 6),
        allclose_atol=1.e-03,
        allclose_rtol=1.e-03)
def accuracy(predictions, labels, weights=None):
  """Computes the percentage of times that predictions matches labels.

  Args:
    predictions: the predicted values, a `Tensor` whose dtype and shape
                 matches 'labels'.
    labels: the ground truth values, a `Tensor` of any shape and
            bool, integer, or string dtype.
    weights: None or `Tensor` of float values to reweight the accuracy.

  Returns:
    Accuracy `Tensor`.

  Raises:
    ValueError: if dtypes don't match or
                if dtype is not bool, integer, or string.
  """
  if not (labels.dtype.is_integer or
          labels.dtype in (dtypes.bool, dtypes.string)):
    raise ValueError(
        'Labels should have bool, integer, or string dtype, not %r' %
        labels.dtype)
  if not labels.dtype.is_compatible_with(predictions.dtype):
    raise ValueError('Dtypes of predictions and labels should match. '
                     'Given: predictions (%r) and labels (%r)' %
                     (predictions.dtype, labels.dtype))
  with ops.name_scope('accuracy', values=[predictions, labels]):
    is_correct = math_ops.cast(
        math_ops.equal(predictions, labels), dtypes.float32)
    if weights is not None:
      is_correct = math_ops.mul(is_correct, weights)
      num_values = math_ops.mul(weights, array_ops.ones_like(is_correct))
      return math_ops.div(math_ops.reduce_sum(is_correct),
                          math_ops.reduce_sum(num_values))
    return math_ops.reduce_mean(is_correct)
Example #10
0
  def _setup_sparsity(self):
    begin_step = self._spec.sparsity_function_begin_step
    end_step = self._spec.sparsity_function_end_step
    initial_sparsity = self._spec.initial_sparsity
    target_sparsity = self._spec.target_sparsity
    exponent = self._spec.sparsity_function_exponent

    if begin_step >= end_step:
      raise ValueError(
          'Pruning must begin before it can end. begin_step=%d, end_step=%d' %
          (begin_step, end_step))

    with ops.name_scope(self._spec.name):
      p = math_ops.minimum(1.0,
                           math_ops.maximum(
                               0.0,
                               math_ops.div(
                                   math_ops.cast(self._global_step - begin_step,
                                                 np.float32),
                                   end_step - begin_step)))
      sparsity = math_ops.add(
          math_ops.multiply(initial_sparsity - target_sparsity,
                            math_ops.pow(1 - p, exponent)),
          target_sparsity,
          name='sparsity')

    return sparsity
Example #11
0
def squared_loss(predicted, target, name=None):
  """Computes and returns the per-example squared loss, divided by 2.

  Computes the per-example squared difference between the target and
  predicted tensors. The tensors must have the same shape.

  Args:
    predicted: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]`
      of predicted values.
    target: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]` of
      target values. The shape of the target tensor should match the
      `predicted` tensor.
    name: A name for the operation (optional).

  Returns:
    A `[batch_size, dim_1, ..., dim_n]` tensor of per-example squared losses.

  Raises:
    ValueError: If `predicted` and `target` shapes do not match.

  """
  with ops.op_scope([predicted, target], name, "squared_loss") as scope:
    predicted = ops.convert_to_tensor(predicted, name="predicted")
    target = ops.convert_to_tensor(target, name="target")
    _validate_predicted_and_target(predicted, target)
    return math_ops.div(math_ops.square(target - predicted), 2.0, name=scope)
Example #12
0
def _NthElementGrad(op, grad):
  """Return the gradients for NthElement.

  Args:
    op: The NthElementOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the NthElementOp

  Returns:
    A list of two tensors, the first being the gradient w.r.t. the input,
    the second being the gradient w.r.t. the N (None).
  """
  input = op.inputs[0]
  output = op.outputs[0]

  # Compute the number of elements which equal to output in each reduction
  # dimension. If there are multiple elements then the gradient will be
  # divided between them.
  indicators = math_ops.cast(
      math_ops.equal(array_ops.expand_dims(output, -1), input),
      grad.dtype)

  grad = array_ops.expand_dims(grad, -1)
  num_selected = array_ops.expand_dims(
      math_ops.reduce_sum(indicators, -1), -1)

  return [math_ops.div(indicators, num_selected) * grad, None]
Example #13
0
  def inference_graph(self, input_data, data_spec=None, **inference_args):
    """Constructs a TF graph for evaluating a random forest.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      **inference_args: Keyword arguments to pass through to each tree.

    Returns:
      The last op in the random forest inference graph.
    """
    data_spec = [constants.DATA_FLOAT] if data_spec is None else data_spec
    probabilities = []
    for i in range(self.params.num_trees):
      with ops.device(self.device_assigner.get_device(i)):
        tree_data = input_data
        if self.params.bagged_features:
          tree_data = self._bag_features(i, input_data)
        probabilities.append(self.trees[i].inference_graph(
            tree_data, data_spec, **inference_args))
    with ops.device(self.device_assigner.get_device(0)):
      all_predict = array_ops.pack(probabilities)
      return math_ops.div(
          math_ops.reduce_sum(all_predict, 0), self.params.num_trees,
          name='probabilities')
  def __call__(self, step):
    with ops.name_scope(
        self.name, "PolynomialDecay",
        [self.initial_learning_rate, step, self.decay_steps,
         self.end_learning_rate, self.power]
    ) as name:
      initial_learning_rate = ops.convert_to_tensor(
          self.initial_learning_rate, name="initial_learning_rate")
      dtype = initial_learning_rate.dtype
      end_learning_rate = math_ops.cast(self.end_learning_rate, dtype)
      power = math_ops.cast(self.power, dtype)

      global_step_recomp = math_ops.cast(step, dtype)
      decay_steps_recomp = math_ops.cast(self.decay_steps, dtype)
      if self.cycle:
        # Find the first multiple of decay_steps that is bigger than
        # global_step. If global_step is zero set the multiplier to 1
        multiplier = control_flow_ops.cond(
            math_ops.equal(global_step_recomp, 0), lambda: 1.0,
            lambda: math_ops.ceil(global_step_recomp / self.decay_steps))
        decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
      else:
        # Make sure that the global_step used is not bigger than decay_steps.
        global_step_recomp = math_ops.minimum(global_step_recomp,
                                              self.decay_steps)

      p = math_ops.div(global_step_recomp, decay_steps_recomp)
      return math_ops.add(
          math_ops.multiply(initial_learning_rate - end_learning_rate,
                            math_ops.pow(1 - p, power)),
          end_learning_rate,
          name=name)
Example #15
0
def _safe_div(numerator, denominator, name="value"):
  """Computes a safe divide which returns 0 if the denominator is zero.

  Note that the function contains an additional conditional check that is
  necessary for avoiding situations where the loss is zero causing NaNs to
  creep into the gradient computation.

  Args:
    numerator: An arbitrary `Tensor`.
    denominator: A `Tensor` whose shape matches `numerator` and whose values are
      assumed to be non-negative.
    name: An optional name for the returned op.

  Returns:
    The element-wise value of the numerator divided by the denominator.
  """
  if compat.forward_compatible(2018, 11, 1):
    return math_ops.div_no_nan(numerator, denominator, name=name)
  return array_ops.where(
      math_ops.greater(denominator, 0),
      math_ops.div(numerator,
                   array_ops.where(
                       math_ops.equal(denominator, 0),
                       array_ops.ones_like(denominator), denominator)),
      array_ops.zeros_like(numerator),
      name=name)
Example #16
0
  def setUp(self):
    self.a = variables.VariableV1(2.0, name="a")
    self.b = variables.VariableV1(3.0, name="b")

    self.c = math_ops.multiply(self.a, self.b, name="c")  # Should be 6.0.
    self.d = math_ops.multiply(self.a, self.a, name="d")  # Should be 4.0.

    self.e = math_ops.multiply(self.d, self.c, name="e")  # Should be 24.0.

    self.f_y = constant_op.constant(0.30, name="f_y")
    self.f = math_ops.div(self.b, self.f_y, name="f")  # Should be 10.0.

    # The there nodes x, y and z form a graph with "cross-links" in. I.e., x
    # and y are both direct inputs to z, but x is also a direct input to y.
    self.x = variables.VariableV1(2.0, name="x")  # Should be 2.0
    self.y = math_ops.negative(self.x, name="y")  # Should be -2.0.

    self.z = math_ops.multiply(self.x, self.y, name="z")  # Should be -4.0.

    rewriter_config = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True,
        arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
        constant_folding=rewriter_config_pb2.RewriterConfig.OFF)
    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
    config = config_pb2.ConfigProto(graph_options=graph_options)
    self.sess = session.Session(config=config)
    self.sess.run(variables.global_variables_initializer())
Example #17
0
  def loss(self, logits, target, features):
    """Returns loss tensor for this head.

    Args:
      logits: logits, a float tensor.
      target: either a tensor for labels or in multihead case, a dict of string
        to target tensor.
      features: features dict.

    Returns:
      Loss tensor.
    """
    target = target[self.name] if isinstance(target, dict) else target
    loss_unweighted = self._loss_fn(logits, target)

    weight_tensor = self.get_weight_tensor(features)
    if weight_tensor is None:
      return math_ops.reduce_mean(loss_unweighted, name="loss")
    else:
      loss_unweighted = array_ops.reshape(loss_unweighted, shape=(-1,))
      loss_weighted = math_ops.mul(
          loss_unweighted, array_ops.reshape(weight_tensor, shape=(-1,)))
      return math_ops.div(
          math_ops.reduce_sum(loss_weighted),
          math_ops.to_float(math_ops.reduce_sum(weight_tensor)),
          name="loss")
Example #18
0
  def loss(self, logits, target, features):
    """Returns loss tensor for this head.

    The loss returned is the weighted average.

      L = sum_{i} w_{i} * l_{i} / sum_{i} w_{i}

    Args:
      logits: logits, a float tensor.
      target: either a tensor for labels or in multihead case, a dict of string
        to target tensor.
      features: features dict.

    Returns:
      Loss tensor.
    """
    target = target[self.name] if isinstance(target, dict) else target
    loss_unweighted = self._loss_fn(logits, target)

    weight_tensor = self.get_weight_tensor(features)
    if weight_tensor is None:
      return math_ops.reduce_mean(loss_unweighted, name="loss")
    loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
    return math_ops.div(
        math_ops.reduce_sum(loss_weighted),
        math_ops.cast(math_ops.reduce_sum(weight_tensor), dtypes.float32),
        name="loss")
def _length_penalty(sequence_lengths, penalty_factor):
  """Calculates the length penalty. See https://arxiv.org/abs/1609.08144.

  Returns the length penalty tensor:
  ```
  [(5+sequence_lengths)/6]**penalty_factor
  ```
  where all operations are performed element-wise.

  Args:
    sequence_lengths: `Tensor`, the sequence lengths of each hypotheses.
    penalty_factor: A scalar that weights the length penalty.

  Returns:
    If the penalty is `0`, returns the scalar `1.0`.  Otherwise returns
    the length penalty factor, a tensor with the same shape as
    `sequence_lengths`.
  """
  penalty_factor = ops.convert_to_tensor(penalty_factor, name="penalty_factor")
  penalty_factor.set_shape(())  # penalty should be a scalar.
  static_penalty = tensor_util.constant_value(penalty_factor)
  if static_penalty is not None and static_penalty == 0:
    return 1.0
  return math_ops.div((5. + math_ops.to_float(sequence_lengths))
                      **penalty_factor, (5. + 1.)**penalty_factor)
Example #20
0
def weighted_moving_average(value,
                            decay,
                            weight,
                            truediv=True,
                            collections=None,
                            name=None):
  """Compute the weighted moving average of `value`.

  Conceptually, the weighted moving average is:
    `moving_average(value * weight) / moving_average(weight)`,
  where a moving average updates by the rule
    `new_value = decay * old_value + (1 - decay) * update`
  Internally, this Op keeps moving average variables of both `value * weight`
  and `weight`.

  Args:
    value: A numeric `Tensor`.
    decay: A float `Tensor` or float value.  The moving average decay.
    weight:  `Tensor` that keeps the current value of a weight.
      Shape should be able to multiply `value`.
    truediv:  Boolean, if `True`, dividing by `moving_average(weight)` is
      floating point division.  If `False`, use division implied by dtypes.
    collections:  List of graph collections keys to add the internal variables
      `value * weight` and `weight` to.
      Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
    name: Optional name of the returned operation.
      Defaults to "WeightedMovingAvg".

  Returns:
    An Operation that updates and returns the weighted moving average.
  """
  # Unlike assign_moving_average, the weighted moving average doesn't modify
  # user-visible variables. It is the ratio of two internal variables, which are
  # moving averages of the updates.  Thus, the signature of this function is
  # quite different than assign_moving_average.
  if collections is None:
    collections = [ops.GraphKeys.GLOBAL_VARIABLES]
  with variable_scope.variable_scope(name, "WeightedMovingAvg",
                                     [value, weight, decay]) as scope:
    value_x_weight_var = variable_scope.get_variable(
        "value_x_weight",
        initializer=init_ops.zeros_initializer(value.get_shape(),
                                               dtype=value.dtype),
        trainable=False,
        collections=collections)
    weight_var = variable_scope.get_variable(
        "weight",
        initializer=init_ops.zeros_initializer(weight.get_shape(),
                                               dtype=weight.dtype),
        trainable=False,
        collections=collections)
    numerator = assign_moving_average(
        value_x_weight_var, value * weight, decay, zero_debias=False)
    denominator = assign_moving_average(
        weight_var, weight, decay, zero_debias=False)

    if truediv:
      return math_ops.truediv(numerator, denominator, name=scope.name)
    else:
      return math_ops.div(numerator, denominator, name=scope.name)
  def _sample_n(self, n, seed=None):
    sample_shape = array_ops.concat(([n], array_ops.shape(self.logits)), 0)
    logits = self.logits * array_ops.ones(sample_shape)
    logits_2d = array_ops.reshape(logits, [-1, self.event_size])
    np_dtype = self.dtype.as_numpy_dtype

    # Uniform variates must be sampled from the interval (0,1] rather than
    # [0,1], as they are passed through log() to compute Gumbel variates.
    # We need to use np.finfo(np_dtype).tiny because it is the smallest,
    # positive, "normal" number.  A "normal" number is such that the mantissa
    # has an implicit leading 1.  Normal, positive numbers x, y have the
    # reasonable property that: x + y >= max(x, y).
    # minval=np.nextafter(np.float32(0),1)) can cause
    # tf.random_uniform(dtype=tf.float32) to sample 0.

    uniform = random_ops.random_uniform(shape=array_ops.shape(logits_2d),
                                        minval=np.finfo(np_dtype).tiny,
                                        maxval=1,
                                        dtype=self.dtype,
                                        seed=seed)
    gumbel = -math_ops.log(-math_ops.log(uniform))
    noisy_logits = math_ops.div(gumbel + logits_2d, self._temperature_2d)
    samples = nn_ops.log_softmax(noisy_logits)
    ret = array_ops.reshape(samples, sample_shape)
    return ret
  def decayed_lr(learning_rate, global_step, decay_steps, end_learning_rate,
                 power, cycle, name):
    """Helper to recompute learning rate; most helpful in eager-mode."""
    with ops.name_scope(
        name, "PolynomialDecay",
        [learning_rate, global_step, decay_steps, end_learning_rate, power]
    ) as name:
      learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
      dtype = learning_rate.dtype
      end_learning_rate = math_ops.cast(end_learning_rate, dtype)
      power = math_ops.cast(power, dtype)

      global_step_recomp = math_ops.cast(global_step, dtype)
      decay_steps_recomp = math_ops.cast(decay_steps, dtype)
      if cycle:
        # Find the first multiple of decay_steps that is bigger than
        # global_step. If global_step is zero set the multiplier to 1
        multiplier = control_flow_ops.cond(
            math_ops.equal(global_step_recomp, 0), lambda: 1.0,
            lambda: math_ops.ceil(global_step_recomp / decay_steps))
        decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
      else:
        # Make sure that the global_step used is not bigger than decay_steps.
        global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)

      p = math_ops.div(global_step_recomp, decay_steps_recomp)
      return math_ops.add(
          math_ops.multiply(learning_rate - end_learning_rate,
                            math_ops.pow(1 - p, power)),
          end_learning_rate,
          name=name)
Example #23
0
def central_crop(image, central_fraction):
  """Crop the central region of the image.

  Remove the outer parts of an image but retain the central region of the image
  along each dimension. If we specify central_fraction = 0.5, this function
  returns the region marked with "X" in the below diagram.

       --------
      |        |
      |  XXXX  |
      |  XXXX  |
      |        |   where "X" is the central 50% of the image.
       --------

  Args:
    image: 3-D float Tensor of shape [height, width, depth]
    central_fraction: float (0, 1], fraction of size to crop

  Raises:
    ValueError: if central_crop_fraction is not within (0, 1].

  Returns:
    3-D float Tensor
  """
  image = ops.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=False)
  if central_fraction <= 0.0 or central_fraction > 1.0:
    raise ValueError('central_fraction must be within (0, 1]')
  if central_fraction == 1.0:
    return image

  img_shape = array_ops.shape(image)
  depth = image.get_shape()[2]
  fraction_offset = int(1 / ((1 - central_fraction) / 2.0))
  bbox_h_start = math_ops.div(img_shape[0], fraction_offset)
  bbox_w_start = math_ops.div(img_shape[1], fraction_offset)

  bbox_h_size = img_shape[0] - bbox_h_start * 2
  bbox_w_size = img_shape[1] - bbox_w_start * 2

  bbox_begin = array_ops.pack([bbox_h_start, bbox_w_start, 0])
  bbox_size = array_ops.pack([bbox_h_size, bbox_w_size, -1])
  image = array_ops.slice(image, bbox_begin, bbox_size)

  # The first two dimensions are dynamic and unknown.
  image.set_shape([None, None, depth])
  return image
def convert_image_dtype(image, dtype, name=None):
  """Convert `image` to `dtype`, scaling its values if needed.

  Images that are represented using floating point values are expected to have
  values in the range [0,1). Image data stored in integer data types are
  expected to have values in the range `[0,MAX]`, wbere `MAX` is the largest
  positive representable number for the data type.

  This op converts between data types, scaling the values appropriately before
  casting.

  Note that for floating point inputs, this op expects values to lie in [0,1).
  Conversion of an image containing values outside that range may lead to
  overflow errors when converted to integer `Dtype`s.

  Args:
    image: An image.
    dtype: A `DType` to convert `image` to.
    name: A name for this operation (optional).

  Returns:
    `image`, converted to `dtype`.
  """

  if dtype == image.dtype:
    return image

  with ops.op_scope([image], name, 'convert_image') as name:
    # Both integer: use integer multiplication in the larger range
    if image.dtype.is_integer and dtype.is_integer:
      scale_in = image.dtype.max
      scale_out = dtype.max
      if scale_in > scale_out:
        # Scaling down, scale first, then cast. The scaling factor will
        # cause in.max to be mapped to above out.max but below out.max+1,
        # so that the output is safely in the supported range.
        scale = (scale_in + 1) // (scale_out + 1)
        scaled = math_ops.div(image, scale)
        return math_ops.cast(scaled, dtype)
      else:
        # Scaling up, cast first, then scale. The scale will not map in.max to
        # out.max, but converting back and forth should result in no change.
        cast = math_ops.cast(image, dtype)
        scale = (scale_out + 1) // (scale_in + 1)
        return math_ops.mul(cast, scale)
    elif image.dtype.is_floating and dtype.is_floating:
      # Both float: Just cast, no possible overflows in the allowed ranges.
      return math_ops.cast(image, dtype)
    else:
      if image.dtype.is_integer:
        # Converting to float: first cast, then scale
        cast = math_ops.cast(image, dtype)
        scale = 1. / image.dtype.max
        return math_ops.mul(cast, scale)
      else:
        # Converting from float: first scale, then cast
        scale = dtype.max + 0.5  # avoid rounding problems in the cast
        scaled = math_ops.mul(image, scale)
        return math_ops.cast(scaled, dtype)
def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate,
                       staircase=False, name=None):
  """Applies inverse time decay to the initial learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an inverse decay function
  to a provided initial learning rate.  It requires an `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  decayed_learning_rate = learning_rate / (1 + decay_rate * t)
  ```

  Example: decay 1/t with a rate of 0.5:

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  learning_rate = 0.1
  k = 0.5
  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)

  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The initial learning rate.
    global_step: A Python number.
      Global step to use for the decay computation.  Must not be negative.
    decay_rate: A Python number.  The decay rate.
    name: String.  Optional name of the operation.  Defaults to
      'InverseTimeDecay'

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.
  """

  with ops.name_scope(name, "InverseTimeDecay",
                      [learning_rate, global_step, decay_rate]) as name:
    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
    dtype = learning_rate.dtype
    global_step = math_ops.cast(global_step, dtype)
    decay_steps = math_ops.cast(decay_steps, dtype)
    decay_rate = math_ops.cast(decay_rate, dtype)
    p = global_step / decay_steps
    if staircase:
      p = math_ops.floor(p)
    const = math_ops.cast(constant_op.constant(1), learning_rate.dtype)
    denom = math_ops.add(const, math_ops.mul(decay_rate, p))
    return math_ops.div(learning_rate, denom, name=name)
Example #26
0
def _weighted_loss(loss, weight_tensor):
  unweighted_loss = array_ops.reshape(loss, shape=(-1,))
  weighted_loss = math_ops.mul(
      unweighted_loss, array_ops.reshape(weight_tensor, shape=(-1,)))
  return math_ops.div(
      math_ops.reduce_sum(weighted_loss),
      math_ops.to_float(math_ops.reduce_sum(weight_tensor)),
      name="loss")
Example #27
0
def _ProdGrad(op, grad):
  """Gradient for Prod."""
  # TODO(kearnes): this gives NaNs for 0s in the input tensor
  _, new_output_shape, input_shape = _ReductionGradAssist(op)
  tile_scaling = input_shape // new_output_shape
  grad = array_ops.reshape(grad * op.outputs[0], new_output_shape)
  grad = math_ops.div(array_ops.tile(grad, tile_scaling), op.inputs[0])
  return grad, None
  def testFindNodesWithBadTensorValues(self):
    with session.Session() as sess:
      u_name = "testFindNodesWithBadTensorValues/u"
      v_name = "testFindNodesWithBadTensorValues/v"
      w_name = "testFindNodesWithBadTensorValues/w"
      x_name = "testFindNodesWithBadTensorValues/x"
      y_name = "testFindNodesWithBadTensorValues/y"
      z_name = "testFindNodesWithBadTensorValues/z"

      u_init = constant_op.constant([2.0, 4.0])
      u = variables.Variable(u_init, name=u_name)
      v_init = constant_op.constant([2.0, 1.0])
      v = variables.Variable(v_init, name=v_name)

      # Expected output: [0.0, 3.0]
      w = math_ops.sub(u, v, name=w_name)

      # Expected output: [inf, 1.3333]
      x = math_ops.div(u, w, name=x_name)

      # Expected output: [nan, 4.0]
      y = math_ops.mul(w, x, name=y_name)

      z = math_ops.mul(y, y, name=z_name)

      u.initializer.run()
      v.initializer.run()

      run_options = config_pb2.RunOptions()
      debug_utils.watch_graph(
          run_options,
          sess.graph,
          debug_ops=["DebugIdentity"],
          debug_urls="file://%s" % self._dump_root)

      run_metadata = config_pb2.RunMetadata()
      sess.run(z, options=run_options, run_metadata=run_metadata)

      dump = debug_data.DebugDumpDir(self._dump_root)

      def has_bad_value(_, tensor):
        return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor))

      # Find all "offending tensors".
      bad_data = dump.find(has_bad_value)

      # Verify that the nodes with bad values are caught through running find
      # on the debug dump.
      self.assertEqual(3, len(bad_data))
      self.assertEqual(x_name, bad_data[0].node_name)
      self.assertEqual(y_name, bad_data[1].node_name)
      self.assertEqual(z_name, bad_data[2].node_name)

      # Test first_n kwarg of find(): Find the first offending tensor.
      first_bad_datum = dump.find(has_bad_value, first_n=1)

      self.assertEqual(1, len(first_bad_datum))
      self.assertEqual(x_name, first_bad_datum[0].node_name)
 def testBoth(self):
   with self.session(graph=ops.Graph()):
     t1 = constant_op.constant([1.0, 0.0])
     t2 = constant_op.constant([0.0, 0.0])
     a = math_ops.div(t1, t2)
     check = numerics.add_check_numerics_ops()
     a = control_flow_ops.with_dependencies([check], a)
     with self.assertRaisesOpError("Inf and NaN"):
       self.evaluate(a)
Example #30
0
def _ProdGrad(op, grad):
  """Gradient for Prod."""
  # TODO(kearnes): this gives NaNs for 0s in the input tensor
  input_shape = array_ops.shape(op.inputs[0])
  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
  tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
  grad = array_ops.reshape(grad * op.outputs[0], output_shape_kept_dims)
  grad = math_ops.div(array_ops.tile(grad, tile_scaling), op.inputs[0])
  return grad, None
Example #31
0
def _rescale_eval_loss(loss, weights):
    """Rescales evaluation loss according to the given weights.

  The rescaling is needed because in the training loss weights are not
  considered in the denominator, whereas  for the evaluation loss we should
  divide by the sum of weights.

  The rescaling factor is:
    R = sum_{i} 1 / sum_{i} w_{i}

  Args:
    loss: the scalar weighted loss.
    weights: weight coefficients. Either a scalar, or a `Tensor` of shape
      [batch_size].

  Returns:
    The given loss multiplied by the rescaling factor.
  """
    rescaling_factor = math_ops.reduce_mean(weights)
    return math_ops.div(loss, rescaling_factor)
Example #32
0
    def _loss(self, logits, target, weight_tensor):
        if self._n_classes < 2:
            loss_vec = math_ops.square(logits - math_ops.to_float(target))
        elif self._n_classes == 2:
            loss_vec = nn.sigmoid_cross_entropy_with_logits(
                logits, array_ops.reshape(math_ops.to_float(target), [-1, 1]))
        else:
            loss_vec = nn.sparse_softmax_cross_entropy_with_logits(
                logits, array_ops.reshape(target, [-1]))

        if weight_tensor is None:
            return math_ops.reduce_mean(loss_vec, name="loss")
        else:
            loss_vec = array_ops.reshape(loss_vec, shape=(-1, ))
            loss_vec = math_ops.mul(
                loss_vec, array_ops.reshape(weight_tensor, shape=(-1, )))
            return math_ops.div(math_ops.reduce_sum(loss_vec),
                                math_ops.to_float(
                                    math_ops.reduce_sum(weight_tensor)),
                                name="loss")
Example #33
0
def _length_penalty(sequence_lengths, penalty_factor):
    """Calculates the length penalty. See https://arxiv.org/abs/1609.08144.

  Args:
    sequence_lengths: The sequence length of all hypotheses, a tensor
      of shape [beam_size, vocab_size].
    penalty_factor: A scalar that weights the length penalty.

  Returns:
    The length penalty factor, a tensor fo shape [beam_size].
  """
    penalty_factor = ops.convert_to_tensor(penalty_factor,
                                           name="penalty_factor")
    penalty_factor.set_shape(())  # penalty should be a scalar.
    static_penalty = tensor_util.constant_value(penalty_factor)
    if static_penalty is not None and static_penalty == 0:
        return 1.0
    return math_ops.div(
        (5. + math_ops.to_float(sequence_lengths))**penalty_factor,
        (5. + 1.)**penalty_factor)
Example #34
0
def sig_smooth_sqr(images, batch_size, cube_len, label_num, is_loss=False):
    sum_axis = [1, 2, 3]

    if is_loss:
        smooth_limit = 2.89
        images = my_sigmoid(images)
    else:
        smooth_limit = 3
        images_max = tf.argmax(images, axis=4)
        images = tf.one_hot(images_max, depth=label_num)

    chair_voxels = tf.reshape(tf.reduce_sum(images[:, :, :, :, 1:], axis=4),
                              (batch_size, cube_len, cube_len, cube_len, 1))
    backgroung_voxels = tf.reshape(
        images[:, :, :, :, 0], (batch_size, cube_len, cube_len, cube_len, 1))
    images = tf.concat((backgroung_voxels, chair_voxels), 4)

    pixel_dif1 = math_ops.abs(images[:, 1:, :, :, :] - images[:, :-1, :, :, :])
    pixel_dif2 = math_ops.abs(images[:, :, 1:, :, :] - images[:, :, :-1, :, :])
    pixel_dif3 = math_ops.abs(images[:, :, :, 1:, :] - images[:, :, :, :-1, :])

    shape = [batch_size, 1, cube_len, cube_len, 2]
    padding = tf.zeros(shape, tf.float32)
    tv_ax_1 = tf.concat((pixel_dif1, padding), axis=1) + tf.concat(
        (padding, pixel_dif1), axis=1)
    shape = [batch_size, cube_len, 1, cube_len, 2]
    padding = tf.zeros(shape, tf.float32)
    tv_ax_2 = tf.concat((pixel_dif2, padding), axis=2) + tf.concat(
        (padding, pixel_dif2), axis=2)
    shape = [batch_size, cube_len, cube_len, 1, 2]
    padding = tf.zeros(shape, tf.float32)
    tv_ax_3 = tf.concat((pixel_dif3, padding), axis=3) + tf.concat(
        (padding, pixel_dif3), axis=3)
    smooth = math_ops.reduce_sum(tv_ax_1 + tv_ax_2 + tv_ax_3, axis=4)

    smooth_matrix = tf.square(
        tf.nn.relu(
            math_ops.div(smooth, 2) - tf.ones(smooth.shape) * smooth_limit))
    smooth_sqr = math_ops.reduce_sum(smooth_matrix, axis=sum_axis)

    return smooth_sqr
Example #35
0
    def decayed_lr():
      """Helper to recompute learning rate; most helpful in eager-mode."""
      global_step_recomp = math_ops.cast(global_step, dtype)
      decay_steps_recomp = math_ops.cast(decay_steps, dtype)
      if cycle:
        # Find the first multiple of decay_steps that is bigger than
        # global_step. If global_step is zero set the multiplier to 1
        multiplier = control_flow_ops.cond(
            math_ops.equal(global_step_recomp, 0), lambda: 1.0,
            lambda: math_ops.ceil(global_step_recomp / decay_steps))
        decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
      else:
        # Make sure that the global_step used is not bigger than decay_steps.
        global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)

      p = math_ops.div(global_step_recomp, decay_steps_recomp)
      return math_ops.add(
          math_ops.multiply(learning_rate - end_learning_rate,
                            math_ops.pow(1 - p, power)),
          end_learning_rate,
          name=name)
Example #36
0
  def setUp(self):
    self.a = variables.Variable(2.0, name="a")
    self.b = variables.Variable(3.0, name="b")

    self.c = math_ops.multiply(self.a, self.b, name="c")  # Should be 6.0.
    self.d = math_ops.multiply(self.a, self.a, name="d")  # Should be 4.0.

    self.e = math_ops.multiply(self.d, self.c, name="e")  # Should be 24.0.

    self.f_y = constant_op.constant(0.30, name="f_y")
    self.f = math_ops.div(self.b, self.f_y, name="f")  # Should be 10.0.

    # The there nodes x, y and z form a graph with "cross-links" in. I.e., x
    # and y are both direct inputs to z, but x is also a direct input to y.
    self.x = variables.Variable(2.0, name="x")  # Should be 2.0
    self.y = math_ops.negative(self.x, name="y")  # Should be -2.0.

    self.z = math_ops.multiply(self.x, self.y, name="z")  # Should be -4.0.

    self.sess = session.Session()
    self.sess.run(variables.global_variables_initializer())
Example #37
0
def rescale_image(image):
    """Rescale `image` to range $[0, 1]$.

    It does this by dividing the largest value in the images.

    Args:
        image: 3-D tensor of shape `[height, width, channels]`.

    Returns:
        The rescaled image with same shape as `image`.

    Raises:
        ValueError: if the shape of 'image' is incompatible with this function.
    """
    _Check3DImage(image)

    image = math_ops.cast(image, dtype=dtypes.float32)
    image_max = tf.reduce_max(image)

    image = math_ops.div(image, image_max)
    return image
Example #38
0
def tf_safe_div(numerator, denominator):
    """ Computes a safe division which returns 0 if the denominator is zero.
    Note that the function contains an additional conditional check that is
    necessary for avoiding situations where the loss is zero causing NaNs to
    creep into the gradient computation.

    Args:
      numerator: tf.tensor
      denominator: tf.tensor

    Returns: tf.tensor

    """

    return array_ops.where(
        math_ops.greater(denominator, 0),
        math_ops.div(
            numerator,
            array_ops.where(math_ops.equal(denominator, 0),
                            array_ops.ones_like(denominator), denominator)),
        array_ops.zeros_like(numerator))
Example #39
0
def compute_cdf_from_histogram(values, value_range, **kwargs):
  """Returns the normalized cumulative distribution of the given values tensor.

  Computes the histogram and uses tf.cumsum to arrive at cdf

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
    **kwargs: keyword arguments: nbins, name

  Returns:
    A 1-D `Tensor` holding normalized cdf of values.

  """
  nbins = kwargs.get('nbins', _NBINS)
  name = kwargs.get('name', None)
  with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
    histogram = _histogram(
        values, value_range, dtype=dtypes.float32, nbins=nbins)
    cdf = math_ops.cumsum(histogram)
    return math_ops.div(cdf, math_ops.reduce_max(cdf))
Example #40
0
def _safe_div(numerator, denominator, name="value"):
    """Computes a safe divide which returns 0 if the denominator is zero.
  Note that the function contains an additional conditional check that is
  necessary for avoiding situations where the loss is zero causing NaNs to
  creep into the gradient computation.
  Args:
    numerator: An arbitrary `Tensor`.
    denominator: `Tensor` whose shape matches `numerator` and whose values are
      assumed to be non-negative.
    name: An optional name for the returned op.
  Returns:
    The element-wise value of the numerator divided by the denominator.
  """
    return array_ops.where(math_ops.greater(denominator, 0),
                           math_ops.div(
                               numerator,
                               array_ops.where(
                                   math_ops.equal(denominator, 0),
                                   array_ops.ones_like(denominator),
                                   denominator)),
                           array_ops.zeros_like(numerator),
                           name=name)
Example #41
0
def per_image_standardization(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The standardized image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=False)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.sub(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image
Example #42
0
def _SparseReduceMinOrMaxGrad(op, out_grad):
    sp_indices = op.inputs[0]
    sp_values = op.inputs[1]
    sp_shape = op.inputs[2]
    reduction_axes = op.inputs[3]
    output = op.outputs[0]

    # Handle keepdims
    output_shape_kept_dims = math_ops.reduced_shape(sp_shape, op.inputs[3])
    out_grad = array_ops.reshape(out_grad, output_shape_kept_dims)
    output = array_ops.reshape(output, output_shape_kept_dims)

    # Map input and output coefficients
    scale = sp_shape // math_ops.to_int64(output_shape_kept_dims)
    scaled_indices = sp_indices // scale

    # Map pooled values with corresponding max/min values
    sp_max_val = array_ops.gather_nd(output, scaled_indices)
    indicators = math_ops.cast(math_ops.equal(sp_values, sp_max_val),
                               out_grad.dtype)
    grad_values = array_ops.gather_nd(out_grad, scaled_indices)

    # Compute the number of selected (maximum or minimum) elements in each
    # reduction dimension. If there are multiple minimum or maximum elements
    # then the gradient will be divided between them.
    # (same as for MaxGrad)
    sp_indicators = sparse_tensor.SparseTensor(sp_indices, indicators,
                                               sp_shape)
    num_selected = array_ops.gather_nd(
        sparse_ops.sparse_reduce_sum(sp_indicators,
                                     axis=reduction_axes,
                                     keep_dims=True), scaled_indices)

    # (input_indices, input_values, input_shape, reduction_axes)
    return [
        None,
        math_ops.div(indicators, math_ops.maximum(num_selected, 1)) *
        grad_values, None, None
    ]
Example #43
0
    def _setup_sparsity(self):
        begin_step = self._spec.sparsity_function_begin_step
        end_step = self._spec.sparsity_function_end_step
        initial_sparsity = self._spec.initial_sparsity
        target_sparsity = self._spec.target_sparsity
        exponent = self._spec.sparsity_function_exponent

        with ops.name_scope(self._spec.name):
            p = math_ops.minimum(
                1.0,
                math_ops.maximum(
                    0.0,
                    math_ops.div(
                        math_ops.cast(self._global_step - begin_step,
                                      dtypes.float32), end_step - begin_step)))
            sparsity = math_ops.add(math_ops.multiply(
                initial_sparsity - target_sparsity,
                math_ops.pow(1 - p, exponent)),
                                    target_sparsity,
                                    name='sparsity')

        return sparsity
Example #44
0
 def _sample_n(self, n, seed=None):
     sample_shape = array_ops.concat([[n], array_ops.shape(self.logits)], 0)
     logits = self.logits * array_ops.ones(sample_shape)
     logits_2d = array_ops.reshape(logits, [-1, self.event_size])
     # Uniform variates must be sampled from the open-interval `(0, 1)` rather
     # than `[0, 1)`. To do so, we use `np.finfo(self.dtype.as_numpy_dtype).tiny`
     # because it is the smallest, positive, "normal" number. A "normal" number
     # is such that the mantissa has an implicit leading 1. Normal, positive
     # numbers x, y have the reasonable property that, `x + y >= max(x, y)`. In
     # this case, a subnormal number (i.e., np.nextafter) can cause us to sample
     # 0.
     uniform = random_ops.random_uniform(
         shape=array_ops.shape(logits_2d),
         minval=np.finfo(self.dtype.as_numpy_dtype).tiny,
         maxval=1.,
         dtype=self.dtype,
         seed=seed)
     gumbel = -math_ops.log(-math_ops.log(uniform))
     noisy_logits = math_ops.div(gumbel + logits_2d, self._temperature_2d)
     samples = nn_ops.log_softmax(noisy_logits)
     ret = array_ops.reshape(samples, sample_shape)
     return ret
Example #45
0
def sum_squared_loss(predicted, target, name=None):
    # pylint: disable=line-too-long
    """Calculates 1/2 the sum of the squared loss across batches.

  Computes the squared difference between the target and predicted
  tensors, sums across all dimensions except dimension 0, and divides
  by 2:

      losses = reduce_batch_sum(squared_loss(predicted, target)) / 2.0

  where `losses` is a tensor with dimensions [batch_size].

  The tensors must have the same shape.

  This function is equivalent to typical formulations of L2 loss, and similar
  to TensorFlow's l2_loss function. It differs from the l2_loss function
  by allowing the caller to specify both the predicted and target tensors.

  Args:
    predicted: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]`
      of predicted values.
    target: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]` of
      target values. The shape of the target tensor should match the
      `predicted` tensor.
    name: A name for the operation (optional).

  Returns:
    A `[batch_size]` tensor of squared losses summed across all dimensions
    except dimension 0, divided by 2.

  Raises:
    ValueError: If `predicted` and `target` shapes do not match.

  """
    # pylint: enable=line-too-long
    with ops.op_scope([predicted, target], name, "sum_squared_loss") as scope:
        return math_ops.div(reduce_batch_sum(squared_loss(predicted, target)),
                            2.0,
                            name=scope)
Example #46
0
  def __call__(self, inputs, state, scope=None):
    """Gated recurrent unit (GRU) with nunits cells."""
    with vs.variable_scope(scope or type(self).__name__):  # "GRUCell"
      with vs.variable_scope("Gates"):  # Reset gate and update gate.
        # We start with bias of 1.0 to not reset and not update.
        r, u, g = array_ops.split(_linear([inputs, state],3 * self._num_units, True, 1.0), 3, 1)
        r, u, g = sigmoid(r), sigmoid(u), sigmoid(g)
      with vs.variable_scope("Candidate"):
        c = self._activation(_linear([inputs, r * state],
                                     self._num_units, True))
      new_h = u * state + (1 - u) * c

      eps = 1e-13
      temp = math_ops.div(math_ops.reduce_sum(math_ops.multiply(new_h, state),1), \
                          math_ops.reduce_sum(math_ops.multiply(state,state),1) + eps)

      m = array_ops.transpose(g)

      t1 = math_ops.multiply(m , temp)
      t1 = array_ops.transpose(t1) 
 
      distract_h = new_h  -  state * t1
    return distract_h, distract_h
Example #47
0
def accuracy(predictions, labels, weights=None, name=None):
    """Computes the percentage of times that predictions matches labels.

  Args:
    predictions: the predicted values, a `Tensor` whose dtype and shape
                 matches 'labels'.
    labels: the ground truth values, a `Tensor` of any shape and
            bool, integer, or string dtype.
    weights: None or `Tensor` of float values to reweight the accuracy.
    name: A name for the operation (optional).

  Returns:
    Accuracy `Tensor`.

  Raises:
    ValueError: if dtypes don't match or
                if dtype is not bool, integer, or string.
  """
    if not (labels.dtype.is_integer
            or labels.dtype in (dtypes.bool, dtypes.string)):
        raise ValueError(
            'Labels should have bool, integer, or string dtype, not %r' %
            labels.dtype)
    if not labels.dtype.is_compatible_with(predictions.dtype):
        raise ValueError('Dtypes of predictions and labels should match. '
                         'Given: predictions (%r) and labels (%r)' %
                         (predictions.dtype, labels.dtype))
    with ops.name_scope(name, 'accuracy', values=[predictions, labels]):
        is_correct = math_ops.cast(math_ops.equal(predictions, labels),
                                   dtypes.float32)
        if weights is not None:
            is_correct = math_ops.multiply(is_correct, weights)
            num_values = math_ops.multiply(weights,
                                           array_ops.ones_like(is_correct))
            return math_ops.div(math_ops.reduce_sum(is_correct),
                                math_ops.reduce_sum(num_values))
        return math_ops.reduce_mean(is_correct)
Example #48
0
    def inference_graph(self, input_data, **inference_args):
        """Constructs a TF graph for evaluating a random forest.

    Args:
      input_data: A tensor or dict of string->Tensor for input data.
      **inference_args: Keyword arguments to pass through to each tree.

    Returns:
      The last op in the random forest inference graph.

    Raises:
      NotImplementedError: If trying to use feature bagging with sparse
        features.
    """
        processed_dense_features, processed_sparse_features, data_spec = (
            data_ops.ParseDataTensorOrDict(input_data))

        probabilities = []
        for i in range(self.params.num_trees):
            with ops.device(self.device_assigner.get_device(i)):
                tree_data = processed_dense_features
                if self.params.bagged_features:
                    if processed_sparse_features is not None:
                        raise NotImplementedError(
                            'Feature bagging not supported with sparse features.'
                        )
                    tree_data = self._bag_features(i, input_data)
                probabilities.append(self.trees[i].inference_graph(
                    tree_data,
                    data_spec,
                    sparse_features=processed_sparse_features,
                    **inference_args))
        with ops.device(self.device_assigner.get_device(0)):
            all_predict = array_ops.stack(probabilities)
            return math_ops.div(math_ops.reduce_sum(all_predict, 0),
                                self.params.num_trees,
                                name='probabilities')
def per_image_standardization(image):
    """Linearly scales `image` to have zero mean and unit variance.
    This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
    of all values in image, and
    `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.
    `stddev` is the standard deviation of all values in `image`. It is capped
    away from zero to protect against division by 0 when handling uniform images.
    Args:
    image: An n-D Tensor where the last 3 dimensions are
           `[height, width, channels]`.
    Returns:
    The standardized image with same shape as `image`.
    Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
    """
    with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
        image = ops.convert_to_tensor(image, name='image')
        num_pixels = math_ops.reduce_prod(array_ops.shape(image)[1:4])
        image = math_ops.cast(image, dtype=dtypes.float32)
        image_mean = math_ops.reduce_mean(image,
                                          axis=[-1, -2, -3],
                                          keepdims=True)
        variance = (math_ops.reduce_mean(
            math_ops.square(image), axis=[-1, -2, -3], keepdims=True) -
                    math_ops.square(image_mean))
        variance = gen_nn_ops.relu(variance)
        stddev = math_ops.sqrt(variance)

        # Apply a minimum normalization that protects us against uniform images.
        min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
        pixel_value_scale = math_ops.maximum(stddev, min_stddev)
        pixel_value_offset = image_mean

        image = math_ops.subtract(image, pixel_value_offset)
        image = math_ops.div(image, pixel_value_scale, name=scope)
        return image
Example #50
0
def _SegmentMinOrMaxGrad(op, grad, is_sorted):
    """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code."""
    zeros = array_ops.zeros(array_ops.shape(op.inputs[0]),
                            dtype=op.inputs[0].dtype)

    # Get the number of selected (minimum or maximum) elements in each segment.
    gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1])
    is_selected = math_ops.equal(op.inputs[0], gathered_outputs)
    if is_sorted:
        num_selected = math_ops.segment_sum(
            math_ops.cast(is_selected, grad.dtype), op.inputs[1])
    else:
        num_selected = math_ops.unsorted_segment_sum(
            math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2])

    # Compute the gradient for each segment. The gradient for the ith segment is
    # divided evenly among the selected elements in that segment.
    weighted_grads = math_ops.div(grad, num_selected)
    gathered_grads = array_ops.gather(weighted_grads, op.inputs[1])

    if is_sorted:
        return array_ops.where(is_selected, gathered_grads, zeros), None
    else:
        return array_ops.where(is_selected, gathered_grads, zeros), None, None
Example #51
0
def _NthElementGrad(op, grad):
  """Return the gradients for NthElement.

  Args:
    op: The NthElementOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the NthElementOp

  Returns:
    A list of two tensors, the first being the gradient w.r.t. the input,
    the second being the gradient w.r.t. the N (None).
  """
  input = op.inputs[0]  # pylint: disable=redefined-builtin
  output = op.outputs[0]

  # Compute the number of elements which equal to output in each reduction
  # dimension. If there are multiple elements then the gradient will be
  # divided between them.
  indicators = math_ops.cast(
      math_ops.equal(array_ops.expand_dims(output, -1), input), grad.dtype)

  grad = array_ops.expand_dims(grad, -1)
  num_selected = array_ops.expand_dims(math_ops.reduce_sum(indicators, -1), -1)

  return [math_ops.div(indicators, num_selected) * grad, None]
def _length_penalty(sequence_lengths, penalty_factor):
    """Calculates the length penalty. See https://arxiv.org/abs/1609.08144.
  Returns the length penalty tensor:
  ```
  [(5+sequence_lengths)/6]**penalty_factor
  ```
  where all operations are performed element-wise.
  Args:
    sequence_lengths: `Tensor`, the sequence lengths of each hypotheses.
    penalty_factor: A scalar that weights the length penalty.
  Returns:
    If the penalty is `0`, returns the scalar `1.0`.  Otherwise returns
    the length penalty factor, a tensor with the same shape as
    `sequence_lengths`.
  """
    penalty_factor = ops.convert_to_tensor(penalty_factor,
                                           name="penalty_factor")
    penalty_factor.set_shape(())  # penalty should be a scalar.
    static_penalty = tensor_util.constant_value(penalty_factor)
    if static_penalty is not None and static_penalty == 0:
        return 1.0
    return math_ops.div(
        (5. + math_ops.to_float(sequence_lengths))**penalty_factor,
        (5. + 1.)**penalty_factor)
Example #53
0
def safe_div(numerator, denominator):
    """Computes a safe divide which returns 0 if the denominator is zero.

  Note that the function contains an additional conditional check that is
  necessary for avoiding situations where the loss is zero causing NaNs to
  creep into the gradient computation.

  Args:
    numerator: An arbitrary `Tensor`.
    denominator: A `Tensor` whose shape matches `numerator` and whose values are
      assumed to be non-negative.

  Returns:
    The element-wise value of the numerator divided by the denominator.
  """
    if compat.forward_compatible(2018, 11, 1):
        return math_ops.div_no_nan(numerator, denominator)
    return array_ops.where(
        math_ops.greater(denominator, 0),
        math_ops.div(
            numerator,
            array_ops.where(math_ops.equal(denominator, 0),
                            array_ops.ones_like(denominator), denominator)),
        array_ops.zeros_like(numerator))
Example #54
0
    def __call__(self, inputs, state, reuse=False):
        scope = self.scope
        with vs.variable_scope(scope, reuse=reuse):  # "GRUCell"
            with vs.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not update.
                r = linear(tf.concat((inputs, state), 1),
                           self._num_units,
                           activation_fn=None)
                g = linear(tf.concat((inputs, state), 1),
                           self._num_units,
                           activation_fn=None)
                u = linear(tf.concat((inputs, state), 1),
                           self._num_units,
                           activation_fn=None)
                r, u, g = tf.cast(sigmoid(r), tf.float32), tf.cast(
                    sigmoid(u), tf.float32), tf.cast(sigmoid(g), tf.float32)
            #print ("R SHAPE: ", r)
            #print ("STATE: ", state)
            #print ("INPUTS: ", inputs)
            #print ("CONCAT: ", tf.concat((inputs, r * state), 1))
            with vs.variable_scope("Candidate"):
                c = self._activation(
                    linear(tf.concat((inputs, r * state), 1),
                           self._num_units,
                           activation_fn=None))
            new_h = u * state + (1 - u) * c
            eps = 1e-13
            temp = math_ops.div(
                math_ops.reduce_sum(math_ops.mul(new_h, state), 1),
                math_ops.reduce_sum(math_ops.mul(state, state), 1) + eps)
            m = array_ops.transpose(g)
            t1 = math_ops.mul(m, temp)
            t1 = array_ops.transpose(t1)
            distract_h = new_h - state * t1

        return distract_h, distract_h
def polynomial_decay(learning_rate,
                     global_step,
                     decay_steps,
                     end_learning_rate=0.0001,
                     power=1.0,
                     cycle=False,
                     name=None):
    """Applies a polynomial decay to the learning rate.

  It is commonly observed that a monotonically decreasing learning rate, whose
  degree of change is carefully chosen, results in a better performing model.
  This function applies a polynomial decay function to a provided initial
  `learning_rate` to reach an `end_learning_rate` in the given `decay_steps`.

  It requires a `global_step` value to compute the decayed learning rate.  You
  can just pass a TensorFlow variable that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  global_step = min(global_step, decay_steps)
  decayed_learning_rate = (learning_rate - end_learning_rate) *
                          (1 - global_step / decay_steps) ^ (power) +
                          end_learning_rate

  ```

  If `cycle` is True then a multiple of `decay_steps` is used, the first one
  that is bigger than `global_steps`.

  ```python
  decay_steps = decay_steps * ceil(global_step / decay_steps)
  decayed_learning_rate = (learning_rate - end_learning_rate) *
                          (1 - global_step / decay_steps) ^ (power) +
                          end_learning_rate

  ```

  Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5):

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  starter_learning_rate = 0.1
  end_learning_rate = 0.01
  decay_steps = 10000
  learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step,
                                            decay_steps, end_learning_rate,
                                            power=0.5)
  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The initial learning rate.
    global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
      Global step to use for the decay computation.  Must not be negative.
    decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
      Must be positive.  See the decay computation above.
    end_learning_rate: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The minimal end learning rate.
    power: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The power of the polynomial. Defaults to linear, 1.0.
    cycle: A boolean, whether or not it should cycle beyond decay_steps.
    name: String.  Optional name of the operation. Defaults to
      'PolynomialDecay'.

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.

  Raises:
    ValueError: if `global_step` is not supplied.
  """
    if global_step is None:
        raise ValueError("global_step is required for polynomial_decay.")
    with ops.name_scope(
            name, "PolynomialDecay",
        [learning_rate, global_step, decay_steps, end_learning_rate, power
         ]) as name:
        learning_rate = ops.convert_to_tensor(learning_rate,
                                              name="learning_rate")
        dtype = learning_rate.dtype
        global_step = math_ops.cast(global_step, dtype)
        decay_steps = math_ops.cast(decay_steps, dtype)
        end_learning_rate = math_ops.cast(end_learning_rate, dtype)
        power = math_ops.cast(power, dtype)
        if cycle:
            # Find the first multiple of decay_steps that is bigger than global_step.
            # If global_step is zero set the multiplier to 1
            multiplier = control_flow_ops.cond(
                math_ops.equal(global_step, 0), lambda: 1.0,
                lambda: math_ops.ceil(global_step / decay_steps))
            decay_steps = math_ops.multiply(decay_steps, multiplier)
        else:
            # Make sure that the global_step used is not bigger than decay_steps.
            global_step = math_ops.minimum(global_step, decay_steps)

        p = math_ops.div(global_step, decay_steps)
        return math_ops.add(math_ops.multiply(
            learning_rate - end_learning_rate, math_ops.pow(1 - p, power)),
                            end_learning_rate,
                            name=name)
def embedding_lookup_sparse_with_distributed_aggregation(
    params,
    sp_ids,
    sp_weights,
    partition_strategy="mod",
    name=None,
    combiner=None,
    max_norm=None):
  """Computes embeddings for the given ids and weights.

  Embeddings belonging to same param are aggregated on that device first. This
  op is intended to decrease data transmission and improve parallelism. See
  `tf.nn.embedding_lookup_sparse` for the functionality and example of this op.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.
      "sqrtn" is the weighted sum divided by the square root of the sum of the
      squares of the weights.
    max_norm: If not None, each embedding is normalized to have l2 norm equal
      to max_norm before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"mean\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "mean"
  if combiner not in ("mean", "sqrtn", "sum"):
    raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]
  if not isinstance(sp_ids, sparse_tensor.SparseTensor):
    raise TypeError("sp_ids must be SparseTensor")
  ignore_weights = sp_weights is None
  if not ignore_weights:
    if not isinstance(sp_weights, sparse_tensor.SparseTensor):
      raise TypeError("sp_weights must be either None or SparseTensor")
    sp_ids.values.get_shape().assert_is_compatible_with(
        sp_weights.values.get_shape())
    sp_ids.indices.get_shape().assert_is_compatible_with(
        sp_weights.indices.get_shape())
    sp_ids.dense_shape.get_shape().assert_is_compatible_with(
        sp_weights.dense_shape.get_shape())
    # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
    # sp_weights have equal indices and shapes.

  with ops.name_scope(name, "embedding_lookup_sparse",
                      params + [sp_ids]) as name:
    segment_ids = sp_ids.indices[:, 0]
    if segment_ids.dtype != dtypes.int32:
      segment_ids = math_ops.cast(segment_ids, dtypes.int32)

    ids = sp_ids.values
    if ignore_weights:
      ids, idx = array_ops.unique(ids)
    else:
      idx = None

    weights = None if ignore_weights else sp_weights.values
    embeddings = _embedding_lookup_with_distributed_aggregation(
        params,
        ids,
        partition_strategy=partition_strategy,
        max_norm=max_norm,
        weights=weights,
        idx=idx,
        segment_ids=segment_ids)
    # Set weights to all one if ignore weights.
    if ignore_weights:
      weights = array_ops.fill([array_ops.shape(segment_ids)[0]], 1)
    if weights.dtype != embeddings.dtype:
      weights = math_ops.cast(weights, embeddings.dtype)
    # Reshape weights.
    ones = array_ops.fill(
        array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
    bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones], 0)
    orig_weights_shape = weights.get_shape()
    weights = array_ops.reshape(weights, bcast_weights_shape)
    if embeddings.get_shape().ndims is not None:
      weights.set_shape(
          orig_weights_shape.concatenate(
              [1 for _ in range(embeddings.get_shape().ndims - 1)]))

    if combiner == "mean":
      weight_sum = math_ops.segment_sum(weights, segment_ids)
      embeddings = math_ops.div(embeddings, weight_sum)
    elif combiner == "sqrtn":
      weights_squared = math_ops.pow(weights, 2)
      weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
      weight_sum_sqrt = math_ops.sqrt(weight_sum)
      embeddings = math_ops.div(embeddings, weight_sum_sqrt)
    elif combiner != "sum":
      assert False, "Unrecognized combiner"
    return embeddings
Example #57
0
def embedding_lookup_sparse(params,
                            sp_ids,
                            sp_weights,
                            partition_strategy="mod",
                            name=None,
                            combiner=None,
                            max_norm=None):
    """Computes embeddings for the given ids and weights.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.
      "sqrtn" is the weighted sum divided by the square root of the sum of the
      squares of the weights.
    max_norm: If not None, each embedding is normalized to have l2 norm equal
      to max_norm before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

    In other words, if

      shape(combined params) = [p0, p1, ..., pm]

    and

      shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]

    then

      shape(output) = [d0, d1, ..., dn-1, p1, ..., pm].

    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are

      [0, 0]: id 1, weight 2.0
      [0, 1]: id 3, weight 0.5
      [1, 0]: id 0, weight 1.0
      [2, 3]: id 1, weight 3.0

    with `combiner`="mean", then the output will be a 3x20 matrix where

      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
      output[1, :] = params[0, :] * 1.0
      output[2, :] = params[1, :] * 3.0

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
    if combiner is None:
        logging.warn("The default value of combiner will change from \"mean\" "
                     "to \"sqrtn\" after 2016/11/01.")
        combiner = "mean"
    if combiner not in ("mean", "sqrtn", "sum"):
        raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
    if isinstance(params, variables.PartitionedVariable):
        params = list(params)  # Iterate to get the underlying Variables.
    if not isinstance(params, list):
        params = [params]
    if not isinstance(sp_ids, sparse_tensor.SparseTensor):
        raise TypeError("sp_ids must be SparseTensor")
    ignore_weights = sp_weights is None
    if not ignore_weights:
        if not isinstance(sp_weights, sparse_tensor.SparseTensor):
            raise TypeError("sp_weights must be either None or SparseTensor")
        sp_ids.values.get_shape().assert_is_compatible_with(
            sp_weights.values.get_shape())
        sp_ids.indices.get_shape().assert_is_compatible_with(
            sp_weights.indices.get_shape())
        sp_ids.dense_shape.get_shape().assert_is_compatible_with(
            sp_weights.dense_shape.get_shape())
        # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
        # sp_weights have equal indices and shapes.

    with ops.name_scope(name, "embedding_lookup_sparse",
                        params + [sp_ids]) as name:
        segment_ids = sp_ids.indices[:, 0]
        if segment_ids.dtype != dtypes.int32:
            segment_ids = math_ops.cast(segment_ids, dtypes.int32)

        ids = sp_ids.values
        if ignore_weights:
            ids, idx = array_ops.unique(ids)
        else:
            idx = None

        embeddings = embedding_lookup(params,
                                      ids,
                                      partition_strategy=partition_strategy,
                                      max_norm=max_norm)
        if not ignore_weights:
            weights = sp_weights.values
            if weights.dtype != embeddings.dtype:
                weights = math_ops.cast(weights, embeddings.dtype)

            # Reshape weights to allow broadcast
            ones = array_ops.fill(
                array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
            bcast_weights_shape = array_ops.concat_v2(
                [array_ops.shape(weights), ones], 0)

            orig_weights_shape = weights.get_shape()
            weights = array_ops.reshape(weights, bcast_weights_shape)

            # Set the weight shape, since after reshaping to bcast_weights_shape,
            # the shape becomes None.
            if embeddings.get_shape().ndims is not None:
                weights.set_shape(
                    orig_weights_shape.concatenate(
                        [1 for _ in range(embeddings.get_shape().ndims - 1)]))

            embeddings *= weights

            if combiner == "sum":
                embeddings = math_ops.segment_sum(embeddings,
                                                  segment_ids,
                                                  name=name)
            elif combiner == "mean":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weight_sum = math_ops.segment_sum(weights, segment_ids)
                embeddings = math_ops.div(embeddings, weight_sum, name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.segment_sum(embeddings, segment_ids)
                weights_squared = math_ops.pow(weights, 2)
                weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
                weight_sum_sqrt = math_ops.sqrt(weight_sum)
                embeddings = math_ops.div(embeddings,
                                          weight_sum_sqrt,
                                          name=name)
            else:
                assert False, "Unrecognized combiner"
        else:
            assert idx is not None
            if combiner == "sum":
                embeddings = math_ops.sparse_segment_sum(embeddings,
                                                         idx,
                                                         segment_ids,
                                                         name=name)
            elif combiner == "mean":
                embeddings = math_ops.sparse_segment_mean(embeddings,
                                                          idx,
                                                          segment_ids,
                                                          name=name)
            elif combiner == "sqrtn":
                embeddings = math_ops.sparse_segment_sqrt_n(embeddings,
                                                            idx,
                                                            segment_ids,
                                                            name=name)
            else:
                assert False, "Unrecognized combiner"

        return embeddings
Example #58
0
def convert_image_dtype(image, dtype, saturate=False, name=None):
    """Convert `image` to `dtype`, scaling its values if needed.

  Images that are represented using floating point values are expected to have
  values in the range [0,1). Image data stored in integer data types are
  expected to have values in the range `[0,MAX]`, where `MAX` is the largest
  positive representable number for the data type.

  This op converts between data types, scaling the values appropriately before
  casting.

  Note that converting from floating point inputs to integer types may lead to
  over/underflow problems. Set saturate to `True` to avoid such problem in
  problematic conversions. If enabled, saturation will clip the output into the
  allowed range before performing a potentially dangerous cast (and only before
  performing such a cast, i.e., when casting from a floating point to an integer
  type, and when casting from a signed to an unsigned type; `saturate` has no
  effect on casts between floats, or on casts that increase the type's range).

  Args:
    image: An image.
    dtype: A `DType` to convert `image` to.
    saturate: If `True`, clip the input before casting (if necessary).
    name: A name for this operation (optional).

  Returns:
    `image`, converted to `dtype`.
  """

    if dtype == image.dtype:
        return image

    with ops.op_scope([image], name, 'convert_image') as name:
        # Both integer: use integer multiplication in the larger range
        if image.dtype.is_integer and dtype.is_integer:
            scale_in = image.dtype.max
            scale_out = dtype.max
            if scale_in > scale_out:
                # Scaling down, scale first, then cast. The scaling factor will
                # cause in.max to be mapped to above out.max but below out.max+1,
                # so that the output is safely in the supported range.
                scale = (scale_in + 1) // (scale_out + 1)
                scaled = math_ops.div(image, scale)

                if saturate:
                    return saturate_cast(scaled, dtype)
                else:
                    return math_ops.cast(scaled, dtype)
            else:
                # Scaling up, cast first, then scale. The scale will not map in.max to
                # out.max, but converting back and forth should result in no change.
                if saturate:
                    cast = saturate_cast(scaled, dtype)
                else:
                    cast = math_ops.cast(image, dtype)
                scale = (scale_out + 1) // (scale_in + 1)
                return math_ops.mul(cast, scale)
        elif image.dtype.is_floating and dtype.is_floating:
            # Both float: Just cast, no possible overflows in the allowed ranges.
            # Note: We're ignoreing float overflows. If your image dynamic range
            # exceeds float range you're on your own.
            return math_ops.cast(image, dtype)
        else:
            if image.dtype.is_integer:
                # Converting to float: first cast, then scale. No saturation possible.
                cast = math_ops.cast(image, dtype)
                scale = 1. / image.dtype.max
                return math_ops.mul(cast, scale)
            else:
                # Converting from float: first scale, then cast
                scale = dtype.max + 0.5  # avoid rounding problems in the cast
                scaled = math_ops.mul(image, scale)
                if saturate:
                    return saturate_cast(scaled, dtype)
                else:
                    return math_ops.cast(scaled, dtype)
def inverse_time_decay(learning_rate,
                       global_step,
                       decay_steps,
                       decay_rate,
                       staircase=False,
                       name=None):
    """Applies inverse time decay to the initial learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an inverse decay function
  to a provided initial learning rate.  It requires an `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  ```python
  decayed_learning_rate = learning_rate / (1 + decay_rate * t)
  ```

  Example: decay 1/t with a rate of 0.5:

  ```python
  ...
  global_step = tf.Variable(0, trainable=False)
  learning_rate = 0.1
  k = 0.5
  learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k)

  # Passing global_step to minimize() will increment it at each step.
  learning_step = (
      tf.train.GradientDescentOptimizer(learning_rate)
      .minimize(...my loss..., global_step=global_step)
  )
  ```

  Args:
    learning_rate: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The initial learning rate.
    global_step: A Python number.
      Global step to use for the decay computation.  Must not be negative.
    decay_steps: How often to apply decay.
    decay_rate: A Python number.  The decay rate.
    staircase: Whether to apply decay in a discrete staircase, as opposed to
      continuous, fashion.
    name: String.  Optional name of the operation.  Defaults to
      'InverseTimeDecay'.

  Returns:
    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.

  Raises:
    ValueError: if `global_step` is not supplied.
  """
    if global_step is None:
        raise ValueError("global_step is required for inverse_time_decay.")
    with ops.name_scope(name, "InverseTimeDecay",
                        [learning_rate, global_step, decay_rate]) as name:
        learning_rate = ops.convert_to_tensor(learning_rate,
                                              name="learning_rate")
        dtype = learning_rate.dtype
        global_step = math_ops.cast(global_step, dtype)
        decay_steps = math_ops.cast(decay_steps, dtype)
        decay_rate = math_ops.cast(decay_rate, dtype)
        p = global_step / decay_steps
        if staircase:
            p = math_ops.floor(p)
        const = math_ops.cast(constant_op.constant(1), learning_rate.dtype)
        denom = math_ops.add(const, math_ops.multiply(decay_rate, p))
        return math_ops.div(learning_rate, denom, name=name)
 def testComplexDiv(self):
     foo = array_ops.constant([1. + 3.j])
     with self.cached_session():
         _ = math_ops.divide(foo, 1.).eval()
         _ = math_ops.div(foo, 2.).eval()