def test_assert_scalar(self): check_ops.assert_scalar(constant_op.constant(3)) check_ops.assert_scalar(constant_op.constant("foo")) check_ops.assert_scalar(3) check_ops.assert_scalar("foo") with self.assertRaisesRegexp(ValueError, "Expected scalar"): check_ops.assert_scalar(constant_op.constant([3, 4]))
def true_mean_confidence_interval_by_dkwm(samples, low, high, error_rate=1e-6, name=None): """Computes a confidence interval for the mean of a scalar distribution. In batch mode, computes confidence intervals for all distributions in the batch (which need not be identically distributed). Relies on the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). The probability (over the randomness of drawing the given samples) that any true mean is outside the corresponding returned interval is no more than the given `error_rate`. The size of the intervals scale as `O(1 / sqrt(#samples))`, as `O(high - low)`, and as `O(-log(error_rate))`. Note that `error_rate` is a total error rate for all the confidence intervals in the batch. As such, if the batch is nontrivial, the error rate is not broadcast but divided (evenly) among the batch members. Args: samples: Floating-point `Tensor` of samples from the distribution(s) of interest. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low` and `high`. The support is bounded: `low <= samples <= high`. low: Floating-point `Tensor` of lower bounds on the distributions' supports. high: Floating-point `Tensor` of upper bounds on the distributions' supports. error_rate: *Scalar* floating-point `Tensor` admissible total rate of mistakes. name: A name for this operation (optional). Returns: low: A floating-point `Tensor` of stochastic lower bounds on the true means. high: A floating-point `Tensor` of stochastic upper bounds on the true means. """ with ops.name_scope(name, "true_mean_confidence_interval_by_dkwm", [samples, low, high, error_rate]): samples = ops.convert_to_tensor(samples, name="samples") low = ops.convert_to_tensor(low, name="low") high = ops.convert_to_tensor(high, name="high") error_rate = ops.convert_to_tensor(error_rate, name="error_rate") samples = _check_shape_dominates(samples, [low, high]) check_ops.assert_scalar(error_rate) # Static shape error_rate = _itemwise_error_rate(error_rate, [low, high], samples) n = array_ops.shape(samples)[0] envelope = _dkwm_cdf_envelope(n, error_rate) min_mean = _minimum_mean(samples, envelope, low) max_mean = _maximum_mean(samples, envelope, high) return min_mean, max_mean
def true_mean_confidence_interval_by_dkwm( samples, low, high, error_rate=1e-6, name=None): """Computes a confidence interval for the mean of a scalar distribution. In batch mode, computes confidence intervals for all distributions in the batch (which need not be identically distributed). Relies on the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). The probability (over the randomness of drawing the given samples) that any true mean is outside the corresponding returned interval is no more than the given `error_rate`. The size of the intervals scale as `O(1 / sqrt(#samples))`, as `O(high - low)`, and as `O(-log(error_rate))`. Note that `error_rate` is a total error rate for all the confidence intervals in the batch. As such, if the batch is nontrivial, the error rate is not broadcast but divided (evenly) among the batch members. Args: samples: Floating-point `Tensor` of samples from the distribution(s) of interest. Entries are assumed IID across the 0th dimension. The other dimensions must broadcast with `low` and `high`. The support is bounded: `low <= samples <= high`. low: Floating-point `Tensor` of lower bounds on the distributions' supports. high: Floating-point `Tensor` of upper bounds on the distributions' supports. error_rate: *Scalar* floating-point `Tensor` admissible total rate of mistakes. name: A name for this operation (optional). Returns: low: A floating-point `Tensor` of stochastic lower bounds on the true means. high: A floating-point `Tensor` of stochastic upper bounds on the true means. """ with ops.name_scope( name, "true_mean_confidence_interval_by_dkwm", [samples, low, high, error_rate]): samples = ops.convert_to_tensor(samples, name="samples") low = ops.convert_to_tensor(low, name="low") high = ops.convert_to_tensor(high, name="high") error_rate = ops.convert_to_tensor(error_rate, name="error_rate") samples = _check_shape_dominates(samples, [low, high]) check_ops.assert_scalar(error_rate) # Static shape error_rate = _itemwise_error_rate(error_rate, [low, high], samples) n = array_ops.shape(samples)[0] envelope = _dkwm_cdf_envelope(n, error_rate) min_mean = _minimum_mean(samples, envelope, low) max_mean = _maximum_mean(samples, envelope, high) return min_mean, max_mean
def _check_scale(self, scale, dtype): """Check that the init arg `scale` defines a valid operator.""" if scale is None: return constant_op.constant(1.0, dtype=dtype) scale = ops.convert_to_tensor(scale, dtype=dtype, name="scale") if not self._verify_pd: return scale # Further check that this is a rank 0, positive tensor. scale = check_ops.assert_scalar(scale) return control_flow_ops.with_dependencies( [check_ops.assert_positive(scale)], scale)
def assert_scalar_int(tensor, name=None): """Assert `tensor` is 0-D, of type `tf.int32` or `tf.int64`. Args: tensor: `Tensor` to test. name: Name of the op and of the new `Tensor` if one is created. Returns: `tensor`, for chaining. Raises: ValueError: if `tensor` is not 0-D, of integer type. """ with ops.name_scope(name, 'assert_scalar_int', [tensor]) as name_scope: tensor = ops.convert_to_tensor(tensor) data_type = tensor.dtype if not data_type.base_dtype.is_integer: raise ValueError('Expected integer type for %s, received type: %s.' % (tensor.name, data_type)) return check_ops.assert_scalar(tensor, name=name_scope)
def __init__(self, dim, dtype=dtypes.float32, validate_args=False, allow_nan_stats=True, name="HypersphericalUniform"): """Initialize a batch of Hyperspherical Uniform distributions. Args: dim: Integer tensor, dimensionality of the distribution(s). Must be `dim > 0`. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. Raises: InvalidArgumentError: if `dim > 0` and `validate_args=False`. """ parameters = locals() with ops.name_scope(name, values=[dim]): with ops.control_dependencies([ check_ops.assert_positive(dim), check_ops.assert_integer(dim), check_ops.assert_scalar(dim) ] if validate_args else []): self._dim = dim super(HypersphericalUniform, self).__init__( dtype=dtype, reparameterization_type=distributions.FULLY_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=[], name=name)
def optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None, colocate_gradients_with_ops=False, increment_global_step=True): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers include: - by string specifying the name of the optimizer. See OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - by function taking learning rate `Tensor` as argument and returning an `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.compat.v1.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.compat.v1.train.MomentumOptimizer(0.5, momentum=0.5))`. - by a subclass of `Optimizer` having a single-argument constructor (the argument is the learning rate), such as AdamOptimizer or AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.compat.v1.train.AdagradOptimizer)`. - by an instance of a subclass of `Optimizer`. E.g., `optimize_loss(..., optimizer=tf.compat.v1.train.AdagradOptimizer(0.5))`. Args: loss: Scalar `Tensor`. global_step: Scalar int `Tensor`, step counter to update on each step unless `increment_global_step` is `False`. If not supplied, it will be fetched from the default graph (see `tf.compat.v1.train.get_global_step` for details). If it has not been created, no step will be incremented with each weight update. `learning_rate_decay_fn` requires `global_step`. learning_rate: float or `Tensor`, magnitude of update per each training step. Can be `None`. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer` that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantiation of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float, callable or `None`. If a float is provided, a global clipping is applied to prevent the norm of the gradient from exceeding this value. Alternatively, a callable can be provided, e.g., `adaptive_clipping_fn()`. This callable takes a list of `(gradients, variables)` tuples and returns the same thing with the gradients modified. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: `tf.compat.v1.train.exponential_decay`. Ignored if `learning_rate` is not supplied. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set, the loss, the learning rate, and the global norm of the gradients will be reported. The complete list of possible values is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. increment_global_step: Whether to increment `global_step`. If your model calls `optimize_loss` multiple times per training step (e.g. to optimize different parts of the model), use this arg to avoid incrementing `global_step` more times than necessary. Returns: Training op. Raises: ValueError: if: * `loss` is an invalid type or shape. * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` has the wrong type. * `clip_gradients` is neither float nor callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. * `gradients` is empty. """ loss = ops.convert_to_tensor(loss) check_ops.assert_scalar(loss) if global_step is None: global_step = train.get_global_step() else: train.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if (isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0): lr = learning_rate elif isinstance(learning_rate, float): if learning_rate < 0.0: raise ValueError("Invalid learning_rate %s." % learning_rate) lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError( "Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate", "global_gradient_norm"] else: for summ in summaries: if summ not in OPTIMIZER_SUMMARIES: raise ValueError( "Summaries should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_SUMMARIES), summ)) if learning_rate is not None and learning_rate_decay_fn is not None: if global_step is None: raise ValueError( "global_step is required for learning_rate_decay_fn.") lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: summary.scalar("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr) else: opt = optimizer() if not isinstance(opt, optimizer_.Optimizer): raise ValueError( "Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError( "Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops=colocate_gradients_with_ops) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) if not gradients: raise ValueError( "Empty list of (gradient, var) pairs encountered. This is most " "likely to be caused by an improper value of gradient_multipliers." ) if "global_gradient_norm" in summaries or "gradient_norm" in summaries: summary.scalar("global_norm/gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Optionally clip gradients by global norm. if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) elif callable(clip_gradients): gradients = clip_gradients(gradients) elif clip_gradients is not None: raise ValueError("Unknown type %s for clip_gradients" % type(clip_gradients)) # Add scalar summary for loss. if "loss" in summaries: summary.scalar("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, grad_values) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if clip_gradients is not None and ("global_gradient_norm" in summaries or "gradient_norm" in summaries): summary.scalar("global_norm/clipped_gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Create gradient updates. grad_updates = opt.apply_gradients( gradients, global_step=global_step if increment_global_step else None, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor