def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1): """Make inputs into input and feed functions.""" if input_fn is None: if x is None: raise ValueError('Either x or input_fn must be provided.') if contrib_framework.is_tensor(x) or (y is not None and contrib_framework.is_tensor(y)): raise ValueError( 'Inputs cannot be tensors. Please provide input_fn.') if feed_fn is not None: raise ValueError('Can not provide both feed_fn and x or y.') df = data_feeder.setup_train_data_feeder(x, y, n_classes=None, batch_size=batch_size, shuffle=shuffle, epochs=epochs) return df.input_builder, df.get_feed_dict_fn() if (x is not None) or (y is not None): raise ValueError('Can not provide both input_fn and x or y.') if batch_size is not None: raise ValueError('Can not provide both input_fn and batch_size.') return input_fn, feed_fn
def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1): """Make inputs into input and feed functions.""" if input_fn is None: if x is None: raise ValueError('Either x or input_fn must be provided.') if contrib_framework.is_tensor(x) or (y is not None and contrib_framework.is_tensor(y)): raise ValueError('Inputs cannot be tensors. Please provide input_fn.') if feed_fn is not None: raise ValueError('Can not provide both feed_fn and x or y.') df = data_feeder.setup_train_data_feeder(x, y, n_classes=None, batch_size=batch_size, shuffle=shuffle, epochs=epochs) return df.input_builder, df.get_feed_dict_fn() if (x is not None) or (y is not None): raise ValueError('Can not provide both input_fn and x or y.') if batch_size is not None: raise ValueError('Can not provide both input_fn and batch_size.') return input_fn, feed_fn
def dropout(dropout_rate, share=shared_mask_dropout, flip_prob=None, kind='bernoulli', scaler=1.0): if dropout_rate is not None: # The same graph is used for training and evaluation with different # dropout rates. Passing the constant configured dropout rate here would # be a subtle error. assert contrib_framework.is_tensor(dropout_rate) if flip_prob is not None: assert kind == 'bernoulli' return DriftingDropout(1 - dropout_rate, flip_prob=flip_prob, scaler=scaler) elif kind == 'bernoulli': return Dropout(1 - dropout_rate, share_mask=share, scaler=scaler) elif kind == 'dirichlet': return DirichletDropout(1 - dropout_rate, share_mask=share, scaler=scaler) elif kind == 'gaussian': return GaussianDropout(1 - dropout_rate, share_mask=share, scaler=scaler) else: assert False
def __init__(self, dtype, graph_parents=None, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, name=None): r"""Initialize the `LinearOperator`. **This is a private method for subclass use.** **Subclasses should copy-paste this `__init__` documentation.** Args: dtype: The type of the this `LinearOperator`. Arguments to `apply` and `solve` will have to be this type. graph_parents: Python list of graph prerequisites of this `LinearOperator` Typically tensors that are passed during initialization. is_non_singular: Expect that this operator is non-singular. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. If `dtype` is real, this is equivalent to being symmetric. is_positive_definite: Expect that this operator is positive definite, meaning the real part of all eigenvalues is positive. We do not require the operator to be self-adjoint to be positive-definite. See: https://en.wikipedia.org/wiki/Positive-definite_matrix\ #Extension_for_non_symmetric_matrices name: A name for this `LinearOperator`. Raises: ValueError: if any member of graph_parents is `None` or not a `Tensor`. """ # Check and auto-set flags. if is_positive_definite: if is_non_singular is False: raise ValueError( "A positive definite matrix is always non-singular.") is_non_singular = True graph_parents = [] if graph_parents is None else graph_parents for i, t in enumerate(graph_parents): if t is None or not contrib_framework.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) self._dtype = dtype self._graph_parents = graph_parents self._is_non_singular = is_non_singular self._is_self_adjoint = is_self_adjoint self._is_positive_definite = is_positive_definite self._name = name or type(self).__name__ # We will cache some tensors to avoid repeatedly adding shape # manipulation ops to the graph. # Naming convention: # self._cached_X_tensor is the cached version of self._X_tensor. self._cached_shape_tensor = None self._cached_batch_shape_tensor = None self._cached_domain_dimension_tensor = None self._cached_range_dimension_tensor = None self._cached_tensor_rank_tensor = None
def __init__(self, dtype, graph_parents=None, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, name=None): r"""Initialize the `LinearOperator`. **This is a private method for subclass use.** **Subclasses should copy-paste this `__init__` documentation.** Args: dtype: The type of the this `LinearOperator`. Arguments to `apply` and `solve` will have to be this type. graph_parents: Python list of graph prerequisites of this `LinearOperator` Typically tensors that are passed during initialization. is_non_singular: Expect that this operator is non-singular. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. If `dtype` is real, this is equivalent to being symmetric. is_positive_definite: Expect that this operator is positive definite, meaning the real part of all eigenvalues is positive. We do not require the operator to be self-adjoint to be positive-definite. See: https://en.wikipedia.org/wiki/Positive-definite_matrix\ #Extension_for_non_symmetric_matrices name: A name for this `LinearOperator`. Raises: ValueError: if any member of graph_parents is `None` or not a `Tensor`. """ # Check and auto-set flags. if is_positive_definite: if is_non_singular is False: raise ValueError("A positive definite matrix is always non-singular.") is_non_singular = True graph_parents = [] if graph_parents is None else graph_parents for i, t in enumerate(graph_parents): if t is None or not contrib_framework.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) self._dtype = dtype self._graph_parents = graph_parents self._is_non_singular = is_non_singular self._is_self_adjoint = is_self_adjoint self._is_positive_definite = is_positive_definite self._name = name or type(self).__name__ # We will cache some tensors to avoid repeatedly adding shape # manipulation ops to the graph. # Naming convention: # self._cached_X_tensor is the cached version of self._X_tensor. self._cached_shape_tensor = None self._cached_batch_shape_tensor = None self._cached_domain_dimension_tensor = None self._cached_range_dimension_tensor = None self._cached_tensor_rank_tensor = None
def collate(batch): if is_tensor(batch[0]): return [b.unsqueeze(0) for b in batch] elif isinstance(batch[0], np.ndarray): return batch elif isinstance(batch[0], int): return tf.constant(batch, dtype=tf.int64) elif isinstance(batch[0], collections.Iterable): transposed = zip(*batch) return [collate(samples) for samples in transposed]
def __init__(self, dtype, is_continuous, reparameterization_type, validate_args, allow_nan_stats, parameters=None, graph_parents=None, name=None): """Constructs the `Distribution`. **This is a private method for subclass use.** Args: dtype: The type of the event samples. `None` implies no type-enforcement. is_continuous: Python `bool`. If `True` this `Distribution` is continuous over its supported domain. reparameterization_type: Instance of `ReparameterizationType`. If `distributions.FULLY_REPARAMETERIZED`, this `Distribution` can be reparameterized in terms of some standard distribution with a function whose Jacobian is constant for the support of the standard distribution. If `distributions.NOT_REPARAMETERIZED`, then no such reparameterization is available. validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. allow_nan_stats: Python `bool`, default `True`. When `True`, statistics (e.g., mean, mode, variance) use the value "`NaN`" to indicate the result is undefined. When `False`, an exception is raised if one or more of the statistic's batch members are undefined. parameters: Python `dict` of parameters used to instantiate this `Distribution`. graph_parents: Python `list` of graph prerequisites of this `Distribution`. name: Python `str` name prefixed to Ops created by this class. Default: subclass name. Raises: ValueError: if any member of graph_parents is `None` or not a `Tensor`. """ graph_parents = [] if graph_parents is None else graph_parents for i, t in enumerate(graph_parents): if t is None or not contrib_framework.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) self._dtype = dtype self._is_continuous = is_continuous self._reparameterization_type = reparameterization_type self._allow_nan_stats = allow_nan_stats self._validate_args = validate_args self._parameters = parameters or {} self._graph_parents = graph_parents self._name = name or type(self).__name__
def __init__(self, dtype, graph_parents=None, is_non_singular=None, is_self_adjoint=None, is_positive_definite=None, name=None): """Initialize the `LinearOperator`. **This is a private method for subclass use.** **Subclasses should copy-paste this `__init__` documentation.** For `X = non_singular, self_adjoint` etc... `is_X` is a Python `bool` initialization argument with the following meaning * If `is_X == True`, callers should expect the operator to have the attribute `X`. This is a promise that should be fulfilled, but is *not* a runtime assert. Issues, such as floating point error, could mean the operator violates this promise. * If `is_X == False`, callers should expect the operator to not have `X`. * If `is_X == None` (the default), callers should have no expectation either way. Args: dtype: The type of the this `LinearOperator`. Arguments to `apply` and `solve` will have to be this type. graph_parents: Python list of graph prerequisites of this `LinearOperator` Typically tensors that are passed during initialization. is_non_singular: Expect that this operator is non-singular. is_self_adjoint: Expect that this operator is equal to its hermitian transpose. If `dtype` is real, this is equivalent to being symmetric. is_positive_definite: Expect that this operator is positive definite. name: A name for this `LinearOperator`. Default: subclass name. Raises: ValueError: if any member of graph_parents is `None` or not a `Tensor`. """ if is_positive_definite and not is_self_adjoint: raise ValueError( "A positive definite matrix is by definition self adjoint") if is_positive_definite and not is_non_singular: raise ValueError( "A positive definite matrix is by definition non-singular") graph_parents = [] if graph_parents is None else graph_parents for i, t in enumerate(graph_parents): if t is None or not contrib_framework.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) self._dtype = dtype self._graph_parents = graph_parents self._is_non_singular = is_non_singular self._is_self_adjoint = is_self_adjoint self._is_positive_definite = is_positive_definite self._name = name or type(self).__name__
def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1): """Make inputs into input and feed functions. Args: x: Numpy, Pandas or Dask matrix or iterable. y: Numpy, Pandas or Dask matrix or iterable. input_fn: Pre-defined input function for training data. feed_fn: Pre-defined data feeder function. batch_size: Size to split data into parts. Must be >= 1. shuffle: Whether to shuffle the inputs. epochs: Number of epochs to run. Returns: Data input and feeder function based on training data. Raises: ValueError: Only one of `(x & y)` or `input_fn` must be provided. """ if input_fn is None: if x is None: raise ValueError('Either x or input_fn must be provided.') if contrib_framework.is_tensor(x) or (y is not None and contrib_framework.is_tensor(y)): raise ValueError('Inputs cannot be tensors. Please provide input_fn.') if feed_fn is not None: raise ValueError('Can not provide both feed_fn and x or y.') df = data_feeder.setup_train_data_feeder(x, y, n_classes=None, batch_size=batch_size, shuffle=shuffle, epochs=epochs) return df.input_builder, df.get_feed_dict_fn() if (x is not None) or (y is not None): raise ValueError('Can not provide both input_fn and x or y.') if batch_size is not None: raise ValueError('Can not provide both input_fn and batch_size.') return input_fn, feed_fn
def __init__(self, dtype, is_continuous, reparameterization_type, validate_args, allow_nan_stats, parameters=None, graph_parents=None, name=None): """Constructs the `Distribution`. **This is a private method for subclass use.** Args: dtype: The type of the event samples. `None` implies no type-enforcement. is_continuous: Python boolean. If `True` this `Distribution` is continuous over its supported domain. reparameterization_type: Instance of `ReparameterizationType`. If `distributions.FULLY_REPARAMETERIZED`, this `Distribution` can be reparameterized in terms of some standard distribution with a function whose Jacobian is constant for the support of the standard distribution. If `distributions.NOT_REPARAMETERIZED`, then no such reparameterization is available. validate_args: Python boolean. Whether to validate input with asserts. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. allow_nan_stats: Python boolean. If `False`, raise an exception if a statistic (e.g., mean, mode) is undefined for any batch member. If True, batch members with valid parameters leading to undefined statistics will return `NaN` for this statistic. parameters: Python dictionary of parameters used to instantiate this `Distribution`. graph_parents: Python list of graph prerequisites of this `Distribution`. name: A name for this distribution. Default: subclass name. Raises: ValueError: if any member of graph_parents is `None` or not a `Tensor`. """ graph_parents = [] if graph_parents is None else graph_parents for i, t in enumerate(graph_parents): if t is None or not contrib_framework.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) parameters = parameters or {} self._dtype = dtype self._is_continuous = is_continuous self._reparameterization_type = reparameterization_type self._allow_nan_stats = allow_nan_stats self._validate_args = validate_args self._parameters = parameters self._graph_parents = graph_parents self._name = name or type(self).__name__
def common_dtype(args_list, preferred_dtype=None): """Returns explict dtype from `args_list` if there is one.""" dtype = None for a in args_list: if isinstance(a, (np.ndarray, np.generic)): dt = a.dtype.type elif contrib_framework.is_tensor(a): dt = a.dtype.as_numpy_dtype else: continue if dtype is None: dtype = dt elif dtype != dt: raise TypeError('Found incompatible dtypes, {} and {}.'.format(dtype, dt)) return preferred_dtype if dtype is None else dtype
def common_dtype(args_list, preferred_dtype=None): """Returns explict dtype from `args_list` if there is one.""" dtype = None for a in args_list: if isinstance(a, (np.ndarray, np.generic)): dt = a.dtype.type elif contrib_framework.is_tensor(a): dt = a.dtype.as_numpy_dtype else: continue if dtype is None: dtype = dt elif dtype != dt: raise TypeError('Found incompatible dtypes, {} and {}.'.format( dtype, dt)) return preferred_dtype if dtype is None else dtype
def log_norm(X, axis=1, scale_factor=10000): """ Seurat log-normalize y = log(X / (sum(X, axis) + epsilon) * scale_factor) where log is natural logarithm """ if is_tensor(X): return tf.log1p( X / (tf.reduce_sum(X, axis=axis, keepdims=True) + EPS) * scale_factor) elif isinstance(X, np.ndarray): X = X.astype('float64') return np.log1p( X / (np.sum(X, axis=axis, keepdims=True) + np.finfo(X.dtype).eps) * scale_factor) elif isinstance(X, pd.DataFrame): X = X.astype('float64') return X.apply(lambda x: log1p(x/(X.sum() + EPS) * scale_factor)) else: raise ValueError("Only support numpy.ndarray or tensorflow.Tensor")
def tril_ids(n): """Internal helper to create vector of linear indices into y.""" # Build the ids statically; chose 512 because it implies 1MiB. if not contrib_framework.is_tensor(n) and n <= 512: ids = np.arange(n**2, dtype=np.int32) rows = (ids / n).astype(np.int32) # Implicit floor. # We need to stop incrementing the index when we encounter # upper-triangular elements. The idea here is to compute the # lower-right number of zeros then by "symmetry" subtract this from the # total number of zeros, n(n-1)/2. # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2 offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32) # We could also zero out when (rows < cols) == (rows < ids-n*rows). # mask = (ids <= (n + 1) * rows).astype(np.int32) else: ids = math_ops.range(n**2) rows = math_ops.cast(ids / n, dtype=dtypes.int32) offset = math_ops.cast(rows * (2 * n - rows - 1) / 2, dtype=dtypes.int32) return ids - offset
def common_dtype(args_list, preferred_dtype=None): """Returns explict dtype from `args_list` if there is one.""" dtype = None while args_list: a = args_list.pop() if isinstance(a, (np.ndarray, np.generic)): dt = a.dtype.type elif contrib_framework.is_tensor(a): dt = a.dtype.base_dtype.as_numpy_dtype else: if isinstance(a, list): # Allows for nested types, e.g. Normal([np.float16(1.0)], [2.0]) args_list.extend(a) continue if dtype is None: dtype = dt elif dtype != dt: raise TypeError('Found incompatible dtypes, {} and {}.'.format( dtype, dt)) return preferred_dtype if dtype is None else tf.as_dtype(dtype)
def do_train_step(self, additional_ops): """ Does one training step. Look at the class documentation to get the requirements needed for a successful update step. Approximates a 1 dimensional parabolic function along the negative gradient direction. If the approximation is a negative line, a step of measuring_step_size is done in line direction. If the approximation is an other, unsuited parabola, no update step is done. :param additional_ops: additional operations that infer information from the graph :return: loss (before parameter update), additional_ops_results """ max_step_size = self._sess.run(self.max_step_size) if is_tensor( self.max_step_size) else self.max_step_size loose_approximation_factor = self._sess.run(self.loose_approximation_factor) if \ is_tensor(self.loose_approximation_factor) else self.loose_approximation_factor measuring_step = self._sess.run(self.measuring_step_size) if \ is_tensor(self.measuring_step_size) else self.measuring_step_size # does step to position on line, which got inferred in the last call of this function loss_at_current_position, line_derivative_current_pos, additional_ops_results = \ self._get_loss_directional_deriv_and_save_gradient(additional_ops) loss1 = loss_at_current_position loss2 = self._do_line_step(measuring_step) first_derivative_current_line_pos_plus_one_half = ( loss2 - loss1) / measuring_step second_derivative_current_line_pos = loose_approximation_factor * ( first_derivative_current_line_pos_plus_one_half - line_derivative_current_pos) / \ (measuring_step / 2) if np.isnan(second_derivative_current_line_pos) or np.isnan(line_derivative_current_pos) \ or np.isinf(second_derivative_current_line_pos) or np.isinf(line_derivative_current_pos): return loss1, additional_ops_results if second_derivative_current_line_pos > 0 and line_derivative_current_pos < 0: # approximation is positive (convex) square function. # Minimum is located in positive line direction. Should be the primary case. step_size_on_line = -line_derivative_current_pos / second_derivative_current_line_pos elif second_derivative_current_line_pos <= 0 and line_derivative_current_pos < 0: # l''<0, l'<0 approximation is negative (concave) square function. # maximum is located in negative line direction. # l''==0, l'<0 approximation is negative line # Second step was more negative. so we jump there. step_size_on_line = measuring_step else: # l'>0 can't happen since the first derivative is the norm of the gradient # l'==0 # the current position is already an optimum step_size_on_line = 0 if step_size_on_line > max_step_size: step_size_on_line = max_step_size step_to_target_point = step_size_on_line - measuring_step # plotting if self.is_plot: global_step = self._sess.run(self._global_step) if global_step % self.plot_step_interval == 1: self.plot_loss_line_and_approximation( measuring_step / 10, step_to_target_point, measuring_step, second_derivative_current_line_pos, line_derivative_current_pos, loss1, loss2, self.save_dir) if step_to_target_point != 0: self._sess.run( self.weight_vars_assign_ops, feed_dict={self._step_on_line_plh: step_to_target_point}) self._sess.run(self._increase_global_step_op) return loss1, additional_ops_results
def amari_alpha(logu, alpha=1., self_normalized=False, name=None): """The Amari-alpha Csiszar-function in log-space. A Csiszar-function is a member of, ```none F = { f:R_+ to R : f convex }. ``` When `self_normalized = True`, the Amari-alpha Csiszar-function is: ```none f(u) = { -log(u) + (u - 1), alpha = 0 { u log(u) - (u - 1), alpha = 1 { [(u**alpha - 1) - alpha (u - 1)] / (alpha (alpha - 1)), otherwise ``` When `self_normalized = False` the `(u - 1)` terms are omitted. Warning: when `alpha != 0` and/or `self_normalized = True` this function makes non-log-space calculations and may therefore be numerically unstable for `|logu| >> 0`. For more information, see: A. Cichocki and S. Amari. "Families of Alpha-Beta-and GammaDivergences: Flexible and Robust Measures of Similarities." Entropy, vol. 12, no. 6, pp. 1532-1568, 2010. Args: logu: `float`-like `Tensor` representing `log(u)` from above. alpha: `float`-like Python scalar. (See Mathematical Details for meaning.) self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even when `p, q` are unnormalized measures. name: Python `str` name prefixed to Ops created by this function. Returns: amari_alpha_of_u: `float`-like `Tensor` of the Csiszar-function evaluated at `u = exp(logu)`. Raises: TypeError: if `alpha` is `None` or a `Tensor`. TypeError: if `self_normalized` is `None` or a `Tensor`. """ with ops.name_scope(name, "amari_alpha", [logu]): if alpha is None or contrib_framework.is_tensor(alpha): raise TypeError("`alpha` cannot be `None` or `Tensor` type.") if self_normalized is None or contrib_framework.is_tensor( self_normalized): raise TypeError( "`self_normalized` cannot be `None` or `Tensor` type.") logu = ops.convert_to_tensor(logu, name="logu") if alpha == 0.: f = -logu elif alpha == 1.: f = math_ops.exp(logu) * logu else: f = math_ops.expm1(alpha * logu) / (alpha * (alpha - 1.)) if not self_normalized: return f if alpha == 0.: return f + math_ops.expm1(logu) elif alpha == 1.: return f - math_ops.expm1(logu) else: return f - math_ops.expm1(logu) / (alpha - 1.)
def test_combines_static_dynamic_shape(self): tensor = tf.placeholder(tf.float32, shape=(None, 2, 3)) combined_shape = shape_utils.combined_static_and_dynamic_shape( tensor) self.assertTrue(contrib_framework.is_tensor(combined_shape[0])) self.assertListEqual(combined_shape[1:], [2, 3])
def input_x(self, x): if is_tensor(x): self._input_x = x else: tf.logging.error("Input_x is not a tensor")
def __init__(self, shift=None, scale_identity_multiplier=None, scale_diag=None, scale_tril=None, scale_perturb_factor=None, scale_perturb_diag=None, event_ndims=1, validate_args=False, name="affine"): """Instantiates the `Affine` bijector. This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments, giving the forward operation: ```none Y = g(X) = scale @ X + shift ``` where the `scale` term is logically equivalent to: ```python scale = ( scale_identity_multiplier * tf.diag(tf.ones(d)) + tf.diag(scale_diag) + scale_tril + scale_perturb_factor @ diag(scale_perturb_diag) @ tf.transpose([scale_perturb_factor]) ) ``` If none of `scale_identity_multiplier`, `scale_diag`, or `scale_tril` are specified then `scale += IdentityMatrix`. Otherwise specifying a `scale` argument has the semantics of `scale += Expand(arg)`, i.e., `scale_diag != None` means `scale += tf.diag(scale_diag)`. Args: shift: Floating-point `Tensor`. If this is set to `None`, no shift is applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag = scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to `scale`. scale_tril: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to `scale`. The upper triangular elements above the diagonal are ignored. scale_perturb_factor: Floating-point `Tensor` representing factor matrix with last two dimensions of shape `(k, r)`. When `None`, no rank-r update is added to `scale`. scale_perturb_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_perturb_diag` has shape [N1, N2, ... r], which represents an `r x r` diagonal matrix. When `None` low rank updates will take the form `scale_perturb_factor * scale_perturb_factor.T`. event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. Must be 0 or 1. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. Raises: ValueError: if `perturb_diag` is specified but not `perturb_factor`. TypeError: if `shift` has different `dtype` from `scale` arguments. """ self._graph_parents = [] self._name = name self._validate_args = validate_args # Ambiguous definition of low rank update. if scale_perturb_diag is not None and scale_perturb_factor is None: raise ValueError("When scale_perturb_diag is specified, " "scale_perturb_factor must be specified.") # Special case, only handling a scaled identity matrix. We don't know its # dimensions, so this is special cased. # We don't check identity_multiplier, since below we set it to 1. if all # other scale args are None. self._is_only_identity_multiplier = (scale_tril is None and scale_diag is None and scale_perturb_factor is None) # When no args are specified, pretend the scale matrix is the identity # matrix. if self._is_only_identity_multiplier and scale_identity_multiplier is None: scale_identity_multiplier = 1. with self._name_scope("init", values=[ shift, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_diag, scale_perturb_factor, event_ndims ]): event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") if validate_args: is_less_than_two = check_ops.assert_less( event_ndims, 2, message="event_ndims must be 0 or 1") event_ndims = control_flow_ops.with_dependencies( [is_less_than_two], event_ndims) self._shift = _as_tensor(shift, "shift") # self._create_scale_operator returns an OperatorPD in all cases except if # self._is_only_identity_multiplier; in which case it returns a scalar # Tensor. self._scale = self._create_scale_operator( identity_multiplier=scale_identity_multiplier, diag=scale_diag, tril=scale_tril, perturb_diag=scale_perturb_diag, perturb_factor=scale_perturb_factor, event_ndims=event_ndims, validate_args=validate_args) if (self._shift is not None and self._shift.dtype.base_dtype != self._scale.dtype.base_dtype): raise TypeError( "shift.dtype({}) does not match scale.dtype({})".format( self._shift.dtype, self._scale.dtype)) self._shaper = _DistributionShape( batch_ndims=self._infer_batch_ndims(), event_ndims=event_ndims, validate_args=validate_args) super(Affine, self).__init__( event_ndims=event_ndims, graph_parents=( [event_ndims] + [self._scale] if contrib_framework.is_tensor( self._scale) else self._scale.inputs + [self._shift] if self._shift is not None else []), is_constant_jacobian=True, dtype=self._scale.dtype, validate_args=validate_args, name=name)
def dropout_keep_prob(self, prob): if is_tensor(prob): self._dropout_keep_prob = prob else: tf.logging.error("Dropout_keep_prob is not a tensor")
def input_y(self, y): if is_tensor(y): self._input_y = y else: tf.logging.error("Input_y is not a tensor")
def amari_alpha(logu, alpha=1., self_normalized=False, name=None): """The Amari-alpha Csiszar-function in log-space. A Csiszar-function is a member of, ```none F = { f:R_+ to R : f convex }. ``` When `self_normalized = True`, the Amari-alpha Csiszar-function is: ```none f(u) = { -log(u) + (u - 1), alpha = 0 { u log(u) - (u - 1), alpha = 1 { [(u**alpha - 1) - alpha (u - 1)] / (alpha (alpha - 1)), otherwise ``` When `self_normalized = False` the `(u - 1)` terms are omitted. Warning: when `alpha != 0` and/or `self_normalized = True` this function makes non-log-space calculations and may therefore be numerically unstable for `|logu| >> 0`. For more information, see: A. Cichocki and S. Amari. "Families of Alpha-Beta-and GammaDivergences: Flexible and Robust Measures of Similarities." Entropy, vol. 12, no. 6, pp. 1532-1568, 2010. Args: logu: Floating-type `Tensor` representing `log(u)` from above. alpha: Floating-type Python scalar. (See Mathematical Details for meaning.) self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even when `p, q` are unnormalized measures. name: Python `str` name prefixed to Ops created by this function. Returns: amari_alpha_of_u: Floating-type `Tensor` of the Csiszar-function evaluated at `u = exp(logu)`. Raises: TypeError: if `alpha` is `None` or a `Tensor`. TypeError: if `self_normalized` is `None` or a `Tensor`. """ with ops.name_scope(name, "amari_alpha", [logu]): if alpha is None or contrib_framework.is_tensor(alpha): raise TypeError("`alpha` cannot be `None` or `Tensor` type.") if self_normalized is None or contrib_framework.is_tensor(self_normalized): raise TypeError("`self_normalized` cannot be `None` or `Tensor` type.") logu = ops.convert_to_tensor(logu, name="logu") if alpha == 0.: f = -logu elif alpha == 1.: f = math_ops.exp(logu) * logu else: f = math_ops.expm1(alpha * logu) / (alpha * (alpha - 1.)) if not self_normalized: return f if alpha == 0.: return f + math_ops.expm1(logu) elif alpha == 1.: return f - math_ops.expm1(logu) else: return f - math_ops.expm1(logu) / (alpha - 1.)
def _generate(self, feature_map_shape_list, im_height=1, im_width=1): """Generates a collection of bounding boxes to be used as anchors. The number of anchors generated for a single grid with shape MxM where we place k boxes over each grid center is k*M^2 and thus the total number of anchors is the sum over all grids. In our box_specs_list example (see the constructor docstring), we would place two boxes over each grid point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the output anchors follows the order of how the grid sizes and box_specs are specified (with box_spec index varying the fastest, followed by width index, then height index, then grid index). Args: feature_map_shape_list: list of pairs of convnet layer resolutions in the format [(height_0, width_0), (height_1, width_1), ...]. For example, setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that correspond to an 8x8 layer followed by a 7x7 layer. im_height: the height of the image to generate the grid for. If both im_height and im_width are 1, the generated anchors default to absolute coordinates, otherwise normalized coordinates are produced. im_width: the width of the image to generate the grid for. If both im_height and im_width are 1, the generated anchors default to absolute coordinates, otherwise normalized coordinates are produced. Returns: boxes_list: a list of BoxLists each holding anchor boxes corresponding to the input feature map shapes. Raises: ValueError: if feature_map_shape_list, box_specs_list do not have the same length. ValueError: if feature_map_shape_list does not consist of pairs of integers """ if not (isinstance(feature_map_shape_list, list) and len(feature_map_shape_list) == len(self._box_specs)): raise ValueError( 'feature_map_shape_list must be a list with the same ' 'length as self._box_specs') if not all([ isinstance(list_item, tuple) and len(list_item) == 2 for list_item in feature_map_shape_list ]): raise ValueError('feature_map_shape_list must be a list of pairs.') im_height = tf.cast(im_height, dtype=tf.float32) im_width = tf.cast(im_width, dtype=tf.float32) if not self._anchor_strides: anchor_strides = [(1.0 / tf.cast(pair[0], dtype=tf.float32), 1.0 / tf.cast(pair[1], dtype=tf.float32)) for pair in feature_map_shape_list] else: anchor_strides = [ (tf.cast(stride[0], dtype=tf.float32) / im_height, tf.cast(stride[1], dtype=tf.float32) / im_width) for stride in self._anchor_strides ] if not self._anchor_offsets: anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1]) for stride in anchor_strides] else: anchor_offsets = [ (tf.cast(offset[0], dtype=tf.float32) / im_height, tf.cast(offset[1], dtype=tf.float32) / im_width) for offset in self._anchor_offsets ] for arg, arg_name in zip([anchor_strides, anchor_offsets], ['anchor_strides', 'anchor_offsets']): if not (isinstance(arg, list) and len(arg) == len(self._box_specs)): raise ValueError('%s must be a list with the same length ' 'as self._box_specs' % arg_name) if not all([ isinstance(list_item, tuple) and len(list_item) == 2 for list_item in arg ]): raise ValueError('%s must be a list of pairs.' % arg_name) anchor_grid_list = [] min_im_shape = tf.minimum(im_height, im_width) scale_height = min_im_shape / im_height scale_width = min_im_shape / im_width if not contrib_framework.is_tensor(self._base_anchor_size): base_anchor_size = [ scale_height * tf.constant(self._base_anchor_size[0], dtype=tf.float32), scale_width * tf.constant(self._base_anchor_size[1], dtype=tf.float32) ] else: base_anchor_size = [ scale_height * self._base_anchor_size[0], scale_width * self._base_anchor_size[1] ] for feature_map_index, (grid_size, scales, aspect_ratios, stride, offset) in enumerate( zip(feature_map_shape_list, self._scales, self._aspect_ratios, anchor_strides, anchor_offsets)): tiled_anchors = grid_anchor_generator.tile_anchors( grid_height=grid_size[0], grid_width=grid_size[1], scales=scales, aspect_ratios=aspect_ratios, base_anchor_size=base_anchor_size, anchor_stride=stride, anchor_offset=offset) if self._clip_window is not None: tiled_anchors = box_list_ops.clip_to_window( tiled_anchors, self._clip_window, filter_nonoverlapping=False) num_anchors_in_layer = tiled_anchors.num_boxes_static() if num_anchors_in_layer is None: num_anchors_in_layer = tiled_anchors.num_boxes() anchor_indices = feature_map_index * tf.ones( [num_anchors_in_layer]) tiled_anchors.add_field('feature_map_index', anchor_indices) anchor_grid_list.append(tiled_anchors) return anchor_grid_list
def __init__(self, shift=None, scale_identity_multiplier=None, scale_diag=None, scale_tril=None, scale_perturb_factor=None, scale_perturb_diag=None, event_ndims=1, validate_args=False, name="affine"): """Instantiates the `Affine` bijector. This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments, giving the forward operation: ```none Y = g(X) = scale @ X + shift ``` where the `scale` term is logically equivalent to: ```python scale = ( scale_identity_multiplier * tf.diag(tf.ones(d)) + tf.diag(scale_diag) + scale_tril + scale_perturb_factor @ diag(scale_perturb_diag) @ tf.transpose([scale_perturb_factor]) ) ``` If none of `scale_identity_multiplier`, `scale_diag`, or `scale_tril` are specified then `scale += IdentityMatrix`. Otherwise specifying a `scale` argument has the semantics of `scale += Expand(arg)`, i.e., `scale_diag != None` means `scale += tf.diag(scale_diag)`. Args: shift: Floating-point `Tensor`. If this is set to `None`, no shift is applied. scale_identity_multiplier: floating point rank 0 `Tensor` representing a scaling done to the identity matrix. When `scale_identity_multiplier = scale_diag = scale_tril = None` then `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added to `scale`. scale_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k], which represents a k x k diagonal matrix. When `None` no diagonal term is added to `scale`. scale_tril: Floating-point `Tensor` representing the diagonal matrix. `scale_diag` has shape [N1, N2, ... k, k], which represents a k x k lower triangular matrix. When `None` no `scale_tril` term is added to `scale`. The upper triangular elements above the diagonal are ignored. scale_perturb_factor: Floating-point `Tensor` representing factor matrix with last two dimensions of shape `(k, r)`. When `None`, no rank-r update is added to `scale`. scale_perturb_diag: Floating-point `Tensor` representing the diagonal matrix. `scale_perturb_diag` has shape [N1, N2, ... r], which represents an `r x r` diagonal matrix. When `None` low rank updates will take the form `scale_perturb_factor * scale_perturb_factor.T`. event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. Must be 0 or 1. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. Raises: ValueError: if `perturb_diag` is specified but not `perturb_factor`. TypeError: if `shift` has different `dtype` from `scale` arguments. """ self._graph_parents = [] self._name = name self._validate_args = validate_args # Ambiguous definition of low rank update. if scale_perturb_diag is not None and scale_perturb_factor is None: raise ValueError("When scale_perturb_diag is specified, " "scale_perturb_factor must be specified.") # Special case, only handling a scaled identity matrix. We don't know its # dimensions, so this is special cased. # We don't check identity_multiplier, since below we set it to 1. if all # other scale args are None. self._is_only_identity_multiplier = (scale_tril is None and scale_diag is None and scale_perturb_factor is None) # When no args are specified, pretend the scale matrix is the identity # matrix. if self._is_only_identity_multiplier and scale_identity_multiplier is None: scale_identity_multiplier = 1. with self._name_scope("init", values=[ shift, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_diag, scale_perturb_factor, event_ndims]): event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") if validate_args: is_less_than_two = check_ops.assert_less( event_ndims, 2, message="event_ndims must be 0 or 1") event_ndims = control_flow_ops.with_dependencies( [is_less_than_two], event_ndims) self._shift = _as_tensor(shift, "shift") # self._create_scale_operator returns an OperatorPD in all cases except if # self._is_only_identity_multiplier; in which case it returns a scalar # Tensor. self._scale = self._create_scale_operator( identity_multiplier=scale_identity_multiplier, diag=scale_diag, tril=scale_tril, perturb_diag=scale_perturb_diag, perturb_factor=scale_perturb_factor, event_ndims=event_ndims, validate_args=validate_args) if (self._shift is not None and self._shift.dtype.base_dtype != self._scale.dtype.base_dtype): raise TypeError("shift.dtype({}) does not match scale.dtype({})".format( self._shift.dtype, self._scale.dtype)) self._shaper = _DistributionShape( batch_ndims=self._infer_batch_ndims(), event_ndims=event_ndims, validate_args=validate_args) super(Affine, self).__init__( event_ndims=event_ndims, graph_parents=( [event_ndims] + [self._scale] if contrib_framework.is_tensor(self._scale) else self._scale.inputs + [self._shift] if self._shift is not None else []), is_constant_jacobian=True, dtype=self._scale.dtype, validate_args=validate_args, name=name)
def __init__(self, args): self.args = args self.tf_args = [(i,a) for i,a in enumerate(args) if is_tensor(a)]
def do_train_step(self, additional_ops): """ Does one training step. Look at the class documentation to get the requirements needed for a successful update step. Approximates a 1 dimensional parabolic function along the negative gradient direction. If the approximation is a negative line, a step of measuring_step_size is done in line direction. If the approximation is an unsuited parabola, no update step is done. :param additional_ops: additional operations that infer information from the graph :return: loss (before parameter update), additional_ops_results """ max_step_size = self._sess.run(self.max_step_size) if is_tensor( self.max_step_size) else self.max_step_size update_step_adaption = self._sess.run(self.update_step_adaptation) if \ is_tensor(self.update_step_adaptation) else self.update_step_adaptation measuring_step = self._sess.run(self.measuring_step_size) if \ is_tensor(self.measuring_step_size) else self.measuring_step_size # does step to position on line, which got inferred in the last call of this function loss_at_current_position, directional_derivative, additional_ops_results = \ self._get_loss_directional_deriv_and_save_gradient(additional_ops) loss_0 = loss_at_current_position loss_mu = self._do_line_step(measuring_step) b = directional_derivative a = (loss_mu - loss_0 - directional_derivative * measuring_step) / (measuring_step**2) if np.isnan(a) or np.isnan(a) \ or np.isinf(b) or np.isinf(b): return loss_0, additional_ops_results if a > 0 and b < 0: # approximation is positive (convex) square function. # Minimum is located in positive line direction. Should be the primary case. update_step = -b / (2 * a) * update_step_adaption elif a <= 0 and b < 0: # l''<0, l'<0 approximation is negative (concave) square function. # maximum is located in negative line direction. # l''==0, l'<0 approximation is negative line # Second step was more negative. Thus we jump there. update_step = measuring_step else: # l'>0 can't happen since the first derivative is the norm of the gradient # l'==0 # the current position is already an optimum update_step = 0 if update_step > max_step_size: update_step = max_step_size step_to_target_point = update_step - measuring_step # plotting if self.is_plot: global_step = self._sess.run(self._global_step) if global_step % self.plot_step_interval == 1: self.plot_loss_line_and_approximation(measuring_step / 10, step_to_target_point, measuring_step, a, b, loss_0, loss_mu, self.save_dir) if step_to_target_point != 0: self._sess.run( self.weight_vars_assign_ops, feed_dict={self._step_on_line_plh: step_to_target_point}) self._sess.run(self._increase_global_step_op) return loss_0, additional_ops_results