def test_assert_scalar(self):
   check_ops.assert_scalar(constant_op.constant(3))
   check_ops.assert_scalar(constant_op.constant("foo"))
   check_ops.assert_scalar(3)
   check_ops.assert_scalar("foo")
   with self.assertRaisesRegexp(ValueError, "Expected scalar"):
     check_ops.assert_scalar(constant_op.constant([3, 4]))
def true_mean_confidence_interval_by_dkwm(samples,
                                          low,
                                          high,
                                          error_rate=1e-6,
                                          name=None):
    """Computes a confidence interval for the mean of a scalar distribution.

  In batch mode, computes confidence intervals for all distributions
  in the batch (which need not be identically distributed).

  Relies on the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality]
  (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval).

  The probability (over the randomness of drawing the given samples)
  that any true mean is outside the corresponding returned interval is
  no more than the given `error_rate`.  The size of the intervals
  scale as
  `O(1 / sqrt(#samples))`, as `O(high - low)`, and as `O(-log(error_rate))`.

  Note that `error_rate` is a total error rate for all the confidence
  intervals in the batch.  As such, if the batch is nontrivial, the
  error rate is not broadcast but divided (evenly) among the batch
  members.

  Args:
    samples: Floating-point `Tensor` of samples from the distribution(s)
      of interest.  Entries are assumed IID across the 0th dimension.
      The other dimensions must broadcast with `low` and `high`.
      The support is bounded: `low <= samples <= high`.
    low: Floating-point `Tensor` of lower bounds on the distributions'
      supports.
    high: Floating-point `Tensor` of upper bounds on the distributions'
      supports.
    error_rate: *Scalar* floating-point `Tensor` admissible total rate
      of mistakes.
    name: A name for this operation (optional).

  Returns:
    low: A floating-point `Tensor` of stochastic lower bounds on the
      true means.
    high: A floating-point `Tensor` of stochastic upper bounds on the
      true means.
  """
    with ops.name_scope(name, "true_mean_confidence_interval_by_dkwm",
                        [samples, low, high, error_rate]):
        samples = ops.convert_to_tensor(samples, name="samples")
        low = ops.convert_to_tensor(low, name="low")
        high = ops.convert_to_tensor(high, name="high")
        error_rate = ops.convert_to_tensor(error_rate, name="error_rate")
        samples = _check_shape_dominates(samples, [low, high])
        check_ops.assert_scalar(error_rate)  # Static shape
        error_rate = _itemwise_error_rate(error_rate, [low, high], samples)
        n = array_ops.shape(samples)[0]
        envelope = _dkwm_cdf_envelope(n, error_rate)
        min_mean = _minimum_mean(samples, envelope, low)
        max_mean = _maximum_mean(samples, envelope, high)
        return min_mean, max_mean
def true_mean_confidence_interval_by_dkwm(
    samples, low, high, error_rate=1e-6, name=None):
  """Computes a confidence interval for the mean of a scalar distribution.

  In batch mode, computes confidence intervals for all distributions
  in the batch (which need not be identically distributed).

  Relies on the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality]
  (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval).

  The probability (over the randomness of drawing the given samples)
  that any true mean is outside the corresponding returned interval is
  no more than the given `error_rate`.  The size of the intervals
  scale as
  `O(1 / sqrt(#samples))`, as `O(high - low)`, and as `O(-log(error_rate))`.

  Note that `error_rate` is a total error rate for all the confidence
  intervals in the batch.  As such, if the batch is nontrivial, the
  error rate is not broadcast but divided (evenly) among the batch
  members.

  Args:
    samples: Floating-point `Tensor` of samples from the distribution(s)
      of interest.  Entries are assumed IID across the 0th dimension.
      The other dimensions must broadcast with `low` and `high`.
      The support is bounded: `low <= samples <= high`.
    low: Floating-point `Tensor` of lower bounds on the distributions'
      supports.
    high: Floating-point `Tensor` of upper bounds on the distributions'
      supports.
    error_rate: *Scalar* floating-point `Tensor` admissible total rate
      of mistakes.
    name: A name for this operation (optional).

  Returns:
    low: A floating-point `Tensor` of stochastic lower bounds on the
      true means.
    high: A floating-point `Tensor` of stochastic upper bounds on the
      true means.
  """
  with ops.name_scope(
      name, "true_mean_confidence_interval_by_dkwm",
      [samples, low, high, error_rate]):
    samples = ops.convert_to_tensor(samples, name="samples")
    low = ops.convert_to_tensor(low, name="low")
    high = ops.convert_to_tensor(high, name="high")
    error_rate = ops.convert_to_tensor(error_rate, name="error_rate")
    samples = _check_shape_dominates(samples, [low, high])
    check_ops.assert_scalar(error_rate)  # Static shape
    error_rate = _itemwise_error_rate(error_rate, [low, high], samples)
    n = array_ops.shape(samples)[0]
    envelope = _dkwm_cdf_envelope(n, error_rate)
    min_mean = _minimum_mean(samples, envelope, low)
    max_mean = _maximum_mean(samples, envelope, high)
    return min_mean, max_mean
    def _check_scale(self, scale, dtype):
        """Check that the init arg `scale` defines a valid operator."""
        if scale is None:
            return constant_op.constant(1.0, dtype=dtype)

        scale = ops.convert_to_tensor(scale, dtype=dtype, name="scale")

        if not self._verify_pd:
            return scale

        # Further check that this is a rank 0, positive tensor.
        scale = check_ops.assert_scalar(scale)
        return control_flow_ops.with_dependencies(
            [check_ops.assert_positive(scale)], scale)
  def _check_scale(self, scale, dtype):
    """Check that the init arg `scale` defines a valid operator."""
    if scale is None:
      return constant_op.constant(1.0, dtype=dtype)

    scale = ops.convert_to_tensor(scale, dtype=dtype, name="scale")

    if not self._verify_pd:
      return scale

    # Further check that this is a rank 0, positive tensor.
    scale = check_ops.assert_scalar(scale)
    return control_flow_ops.with_dependencies(
        [check_ops.assert_positive(scale)], scale)
def assert_scalar_int(tensor, name=None):
  """Assert `tensor` is 0-D, of type `tf.int32` or `tf.int64`.

  Args:
    tensor: `Tensor` to test.
    name: Name of the op and of the new `Tensor` if one is created.
  Returns:
    `tensor`, for chaining.
  Raises:
    ValueError: if `tensor` is not 0-D, of integer type.
  """
  with ops.name_scope(name, 'assert_scalar_int', [tensor]) as name_scope:
    tensor = ops.convert_to_tensor(tensor)
    data_type = tensor.dtype
    if not data_type.base_dtype.is_integer:
      raise ValueError('Expected integer type for %s, received type: %s.'
                       % (tensor.name, data_type))
    return check_ops.assert_scalar(tensor, name=name_scope)
Beispiel #7
0
def assert_scalar_int(tensor, name=None):
  """Assert `tensor` is 0-D, of type `tf.int32` or `tf.int64`.

  Args:
    tensor: `Tensor` to test.
    name: Name of the op and of the new `Tensor` if one is created.
  Returns:
    `tensor`, for chaining.
  Raises:
    ValueError: if `tensor` is not 0-D, of integer type.
  """
  with ops.name_scope(name, 'assert_scalar_int', [tensor]) as name_scope:
    tensor = ops.convert_to_tensor(tensor)
    data_type = tensor.dtype
    if not data_type.base_dtype.is_integer:
      raise ValueError('Expected integer type for %s, received type: %s.'
                       % (tensor.name, data_type))
    return check_ops.assert_scalar(tensor, name=name_scope)
    def __init__(self,
                 dim,
                 dtype=dtypes.float32,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="HypersphericalUniform"):
        """Initialize a batch of Hyperspherical Uniform distributions.

        Args:
          dim: Integer tensor, dimensionality of the distribution(s). Must
            be `dim > 0`.
          validate_args: Python `bool`, default `False`. When `True` distribution
            parameters are checked for validity despite possibly degrading runtime
            performance. When `False` invalid inputs may silently render incorrect
            outputs.
          allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
            (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
            result is undefined. When `False`, an exception is raised if one or
            more of the statistic's batch members are undefined.
          name: Python `str` name prefixed to Ops created by this class.

        Raises:
          InvalidArgumentError: if `dim > 0` and `validate_args=False`.
        """
        parameters = locals()
        with ops.name_scope(name, values=[dim]):
            with ops.control_dependencies([
                    check_ops.assert_positive(dim),
                    check_ops.assert_integer(dim),
                    check_ops.assert_scalar(dim)
            ] if validate_args else []):
                self._dim = dim

            super(HypersphericalUniform, self).__init__(
                dtype=dtype,
                reparameterization_type=distributions.FULLY_REPARAMETERIZED,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                parameters=parameters,
                graph_parents=[],
                name=name)
def optimize_loss(loss,
                  global_step,
                  learning_rate,
                  optimizer,
                  gradient_noise_scale=None,
                  gradient_multipliers=None,
                  clip_gradients=None,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None,
                  summaries=None,
                  colocate_gradients_with_ops=False,
                  increment_global_step=True):
    """Given loss and parameters for optimizer, returns a training op.

  Various ways of passing optimizers include:

  - by string specifying the name of the optimizer. See OPTIMIZER_CLS_NAMES
      for full list. E.g. `optimize_loss(..., optimizer='Adam')`.
  - by function taking learning rate `Tensor` as argument and returning an
      `Optimizer` instance. E.g. `optimize_loss(...,
      optimizer=lambda lr: tf.compat.v1.train.MomentumOptimizer(lr,
      momentum=0.5))`.
    Alternatively, if `learning_rate` is `None`, the function takes no
    arguments. E.g. `optimize_loss(..., learning_rate=None,
      optimizer=lambda: tf.compat.v1.train.MomentumOptimizer(0.5,
      momentum=0.5))`.
  - by a subclass of `Optimizer` having a single-argument constructor
      (the argument is the learning rate), such as AdamOptimizer or
      AdagradOptimizer. E.g. `optimize_loss(...,
      optimizer=tf.compat.v1.train.AdagradOptimizer)`.
  - by an instance of a subclass of `Optimizer`.
      E.g., `optimize_loss(...,
      optimizer=tf.compat.v1.train.AdagradOptimizer(0.5))`.

  Args:
    loss: Scalar `Tensor`.
    global_step: Scalar int `Tensor`, step counter to update on each step unless
      `increment_global_step` is `False`. If not supplied, it will be fetched
      from the default graph (see `tf.compat.v1.train.get_global_step` for
      details). If it has not been created, no step will be incremented with
      each weight update. `learning_rate_decay_fn` requires `global_step`.
    learning_rate: float or `Tensor`, magnitude of update per each training
      step. Can be `None`.
    optimizer: string, class or optimizer instance, used as trainer. string
      should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in
      OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer`
      that implements `compute_gradients` and `apply_gradients` functions.
      optimizer instance should be instantiation of `tf.Optimizer` sub-class and
      have `compute_gradients` and `apply_gradients` functions.
    gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this
      value.
    gradient_multipliers: dict of variables or variable names to floats. If
      present, gradients for specified variables will be multiplied by given
      constant.
    clip_gradients: float, callable or `None`. If a float is provided, a global
      clipping is applied to prevent the norm of the gradient from exceeding
      this value. Alternatively, a callable can be provided, e.g.,
      `adaptive_clipping_fn()`.  This callable takes a list of `(gradients,
      variables)` tuples and returns the same thing with the gradients modified.
    learning_rate_decay_fn: function, takes `learning_rate` and `global_step`
      `Tensor`s, returns `Tensor`. Can be used to implement any learning rate
      decay functions.
                            For example: `tf.compat.v1.train.exponential_decay`.
                              Ignored if `learning_rate` is not supplied.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
      uses elements of UPDATE_OPS collection. The order of execution between
      `update_ops` and `loss` is non-deterministic.
    variables: list of variables to optimize or `None` to use all trainable
      variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
      set, the loss, the learning rate, and the global norm of the gradients
      will be reported. The complete list of possible values is in
      OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the
      corresponding op.
    increment_global_step: Whether to increment `global_step`. If your model
      calls `optimize_loss` multiple times per training step (e.g. to optimize
      different parts of the model), use this arg to avoid incrementing
      `global_step` more times than necessary.

  Returns:
    Training op.

  Raises:
    ValueError: if:
        * `loss` is an invalid type or shape.
        * `global_step` is an invalid type or shape.
        * `learning_rate` is an invalid type or value.
        * `optimizer` has the wrong type.
        * `clip_gradients` is neither float nor callable.
        * `learning_rate` and `learning_rate_decay_fn` are supplied, but no
          `global_step` is available.
        * `gradients` is empty.
  """
    loss = ops.convert_to_tensor(loss)
    check_ops.assert_scalar(loss)
    if global_step is None:
        global_step = train.get_global_step()
    else:
        train.assert_global_step(global_step)
    with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
        # Update ops take UPDATE_OPS collection if not provided.
        if update_ops is None:
            update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
        # Make sure update ops are ran before computing loss.
        if update_ops:
            loss = control_flow_ops.with_dependencies(list(update_ops), loss)

        # Learning rate variable, with possible decay.
        lr = None
        if learning_rate is not None:
            if (isinstance(learning_rate, ops.Tensor)
                    and learning_rate.get_shape().ndims == 0):
                lr = learning_rate
            elif isinstance(learning_rate, float):
                if learning_rate < 0.0:
                    raise ValueError("Invalid learning_rate %s." %
                                     learning_rate)
                lr = vs.get_variable(
                    "learning_rate", [],
                    trainable=False,
                    initializer=init_ops.constant_initializer(learning_rate))
            else:
                raise ValueError(
                    "Learning rate should be 0d Tensor or float. "
                    "Got %s of type %s" %
                    (str(learning_rate), str(type(learning_rate))))
        if summaries is None:
            summaries = ["loss", "learning_rate", "global_gradient_norm"]
        else:
            for summ in summaries:
                if summ not in OPTIMIZER_SUMMARIES:
                    raise ValueError(
                        "Summaries should be one of [%s], you provided %s." %
                        (", ".join(OPTIMIZER_SUMMARIES), summ))
        if learning_rate is not None and learning_rate_decay_fn is not None:
            if global_step is None:
                raise ValueError(
                    "global_step is required for learning_rate_decay_fn.")
            lr = learning_rate_decay_fn(lr, global_step)
            if "learning_rate" in summaries:
                summary.scalar("learning_rate", lr)

        # Create optimizer, given specified parameters.
        if isinstance(optimizer, six.string_types):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is string (%s)." % optimizer)
            if optimizer not in OPTIMIZER_CLS_NAMES:
                raise ValueError(
                    "Optimizer name should be one of [%s], you provided %s." %
                    (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
            opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
        elif (isinstance(optimizer, type)
              and issubclass(optimizer, optimizer_.Optimizer)):
            if lr is None:
                raise ValueError(
                    "Learning rate is None, but should be specified if "
                    "optimizer is class (%s)." % optimizer)
            opt = optimizer(learning_rate=lr)
        elif isinstance(optimizer, optimizer_.Optimizer):
            opt = optimizer
        elif callable(optimizer):
            if learning_rate is not None:
                opt = optimizer(lr)
            else:
                opt = optimizer()
            if not isinstance(opt, optimizer_.Optimizer):
                raise ValueError(
                    "Unrecognized optimizer: function should return "
                    "subclass of Optimizer. Got %s." % str(opt))
        else:
            raise ValueError(
                "Unrecognized optimizer: should be string, "
                "subclass of Optimizer, instance of "
                "subclass of Optimizer or function with one argument. "
                "Got %s." % str(optimizer))

        # All trainable variables, if specific variables are not specified.
        if variables is None:
            variables = vars_.trainable_variables()

        # Compute gradients.
        gradients = opt.compute_gradients(
            loss,
            variables,
            colocate_gradients_with_ops=colocate_gradients_with_ops)

        # Optionally add gradient noise.
        if gradient_noise_scale is not None:
            gradients = _add_scaled_noise_to_gradients(gradients,
                                                       gradient_noise_scale)

        # Multiply some gradients.
        if gradient_multipliers is not None:
            gradients = _multiply_gradients(gradients, gradient_multipliers)
            if not gradients:
                raise ValueError(
                    "Empty list of (gradient, var) pairs encountered. This is most "
                    "likely to be caused by an improper value of gradient_multipliers."
                )

        if "global_gradient_norm" in summaries or "gradient_norm" in summaries:
            summary.scalar("global_norm/gradient_norm",
                           clip_ops.global_norm(list(zip(*gradients))[0]))

        # Optionally clip gradients by global norm.
        if isinstance(clip_gradients, float):
            gradients = _clip_gradients_by_norm(gradients, clip_gradients)
        elif callable(clip_gradients):
            gradients = clip_gradients(gradients)
        elif clip_gradients is not None:
            raise ValueError("Unknown type %s for clip_gradients" %
                             type(clip_gradients))

        # Add scalar summary for loss.
        if "loss" in summaries:
            summary.scalar("loss", loss)

        # Add histograms for variables, gradients and gradient norms.
        for gradient, variable in gradients:
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if grad_values is not None:
                var_name = variable.name.replace(":", "_")
                if "gradients" in summaries:
                    summary.histogram("gradients/%s" % var_name, grad_values)
                if "gradient_norm" in summaries:
                    summary.scalar("gradient_norm/%s" % var_name,
                                   clip_ops.global_norm([grad_values]))

        if clip_gradients is not None and ("global_gradient_norm" in summaries
                                           or "gradient_norm" in summaries):
            summary.scalar("global_norm/clipped_gradient_norm",
                           clip_ops.global_norm(list(zip(*gradients))[0]))

        # Create gradient updates.
        grad_updates = opt.apply_gradients(
            gradients,
            global_step=global_step if increment_global_step else None,
            name="train")

        # Ensure the train_tensor computes grad_updates.
        train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)

        return train_tensor