def testAffineSigmoidRoundTrip(self): """Check that x = inv_affine_sigmoid(affine_sigmoid(x)) in general.""" x = np.float32(np.linspace(-10., 10., 1000)) for _ in range(10): lo = np.random.uniform(0., 0.3) hi = np.random.uniform(0.5, 4.) y = util.affine_sigmoid(x, lo=lo, hi=hi) x_recon = util.inv_affine_sigmoid(y, lo=lo, hi=hi) self.assertAllClose(x, x_recon, atol=1e-5, rtol=1e-3)
def __init__(self, num_channels, float_dtype, alpha_lo=0.001, alpha_hi=1.999, alpha_init=None, scale_lo=1e-5, scale_init=1.0, name=None): """Constructs the loss function. Args: num_channels: the number of different "channels" for the adaptive loss function, where each channel will be assigned its own shape (alpha) and scale parameters that are constructed as variables and can be optimized over. float_dtype: The expected numerical precision of the input, which will also determine the precision of the latent variables used to model scale and alpha internally. alpha_lo: The lowest possible value for loss's alpha parameters, must be >=0 and a scalar. Should probably be in (0, 2). alpha_hi: The highest possible value for loss's alpha parameters, must be >=alpha_lo and a scalar. Should probably be in (0, 2). alpha_init: The value that the loss's alpha parameters will be initialized to, must be in (`alpha_lo`, `alpha_hi`), unless `alpha_lo==alpha_hi` in which case this will be ignored. Defaults to (`alpha_lo+alpha_hi)/2`. scale_lo: The lowest possible value for the loss's scale parameters. Must be > 0 and a scalar. This value may have more of an effect than you think, as the loss is unbounded as scale approaches zero. scale_init: The initial value used for the loss's scale parameters. This also defines the zero-point of the latent representation of scales, so SGD may cause optimization to gravitate towards producing scales near this value. name: The name of the module. Raises: ValueError: If any of the arguments are invalid. """ super(AdaptiveLossFunction, self).__init__(name=name) _check_scale(scale_lo, scale_init) if not np.isscalar(alpha_lo): raise ValueError('`alpha_lo` must be a scalar, but is of type {}'.format( type(alpha_lo))) if not np.isscalar(alpha_hi): raise ValueError('`alpha_hi` must be a scalar, but is of type {}'.format( type(alpha_hi))) if alpha_init is not None and not np.isscalar(alpha_init): raise ValueError( '`alpha_init` must be None or a scalar, but is of type {}'.format( type(alpha_init))) if not alpha_lo >= 0: raise ValueError('`alpha_lo` must be >= 0, but is {}'.format(alpha_lo)) if not alpha_hi >= alpha_lo: raise ValueError('`alpha_hi` = {} must be >= `alpha_lo` = {}'.format( alpha_hi, alpha_lo)) if alpha_init is not None and alpha_lo != alpha_hi: if not (alpha_init > alpha_lo and alpha_init < alpha_hi): raise ValueError( '`alpha_init` = {} must be in (`alpha_lo`, `alpha_hi`) = ({} {})' .format(alpha_init, alpha_lo, alpha_hi)) if alpha_lo != alpha_hi: # If alpha isn't constant, construct a "latent" alpha variable. if alpha_init is None: alpha_init = (alpha_lo + alpha_hi) / 2. latent_alpha_init = ( util.inv_affine_sigmoid(alpha_init, lo=alpha_lo, hi=alpha_hi)) self._latent_alpha = tf.Variable( tf.fill((1, num_channels), tf.cast(latent_alpha_init, dtype=float_dtype)), name='LatentAlpha') if scale_lo != scale_init: # If shape isn't constant, construct a "latent" scale variable. self._latent_scale = tf.Variable( tf.zeros((1, num_channels), float_dtype), name='LatentScale') self._num_channels = num_channels self._float_dtype = tf.dtypes.as_dtype(float_dtype) self._alpha_lo = alpha_lo self._alpha_hi = alpha_hi self._scale_lo = scale_lo self._scale_init = scale_init self._distribution = distribution.Distribution()
def lossfun(x, alpha_lo=0.001, alpha_hi=1.999, alpha_init=None, scale_lo=1e-5, scale_init=1., **kwargs): """Computes the adaptive form of the robust loss on a matrix. This function behaves differently from general.lossfun() and distribution.nllfun(), which are "stateless", allow the caller to specify the shape and scale of the loss, and allow for arbitrary sized inputs. This function only allows for rank-2 inputs for the residual `x`, and expects that `x` is of the form [batch_index, dimension_index]. This function then constructs free parameters (TF variables) that define the alpha and scale parameters for each dimension of `x`, such that all alphas are in (`alpha_lo`, `alpha_hi`) and all scales are in (`scale_lo`, Infinity). The assumption is that `x` is, say, a matrix where x[i,j] corresponds to a pixel at location j for image i, with the idea being that all pixels at location j should be modeled with the same shape and scale parameters across all images in the batch. This function also returns handles to the scale and shape parameters being optimized over, mostly for debugging and introspection. If the user wants to fix alpha or scale to be a constant, this can be done by setting alpha_lo=alpha_hi or scale_lo=scale_init respectively. Args: x: The residual for which the loss is being computed. Must be a rank-2 tensor, where the innermost dimension is the batch index, and the outermost dimension corresponds to different "channels", where this function will assign each channel its own variable shape (alpha) and scale parameters that are constructed as TF variables and can be optimized over. Must be a TF tensor or numpy array of single or double precision floats. The precision of `x` will determine the precision of the latent variables used to model scale and alpha internally. alpha_lo: The lowest possible value for loss's alpha parameters, must be >= 0 and a scalar. Should probably be in (0, 2). alpha_hi: The highest possible value for loss's alpha parameters, must be >= alpha_lo and a scalar. Should probably be in (0, 2). alpha_init: The value that the loss's alpha parameters will be initialized to, must be in (`alpha_lo`, `alpha_hi`), unless `alpha_lo` == `alpha_hi` in which case this will be ignored. Defaults to (`alpha_lo` + `alpha_hi`) / 2 scale_lo: The lowest possible value for the loss's scale parameters. Must be > 0 and a scalar. This value may have more of an effect than you think, as the loss is unbounded as scale approaches zero (say, at a delta function). scale_init: The initial value used for the loss's scale parameters. This also defines the zero-point of the latent representation of scales, so SGD may cause optimization to gravitate towards producing scales near this value. **kwargs: Arguments to be passed to the underlying distribution.nllfun(). Returns: A tuple of the form (`loss`, `alpha`, `scale`). `loss`: a TF tensor of the same type and shape as input `x`, containing the loss at each element of `x` as a function of `x`, `alpha`, and `scale`. These "losses" are actually negative log-likelihoods (as produced by distribution.nllfun()) and so they are not actually bounded from below by zero. You'll probably want to minimize their sum or mean. `scale`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we construct a scale variable for each dimension of `x` but not for each batch element. This contains the current estimated scale parameter for each dimension, and will change during optimization. `alpha`: a TF tensor of the same type as x, of size (1, x.shape[1]), as we construct an alpha variable for each dimension of `x` but not for each batch element. This contains the current estimated alpha parameter for each dimension, and will change during optimization. Raises: ValueError: If any of the arguments are invalid. """ _check_scale(scale_lo, scale_init) if not np.isscalar(alpha_lo): raise ValueError( '`alpha_lo` must be a scalar, but is of type {}'.format( type(alpha_lo))) if not np.isscalar(alpha_hi): raise ValueError( '`alpha_hi` must be a scalar, but is of type {}'.format( type(alpha_hi))) if alpha_init is not None and not np.isscalar(alpha_init): raise ValueError( '`alpha_init` must be None or a scalar, but is of type {}'.format( type(alpha_init))) if not alpha_lo >= 0: raise ValueError('`alpha_lo` must be >= 0, but is {}'.format(alpha_lo)) if not alpha_hi >= alpha_lo: raise ValueError('`alpha_hi` = {} must be >= `alpha_lo` = {}'.format( alpha_hi, alpha_lo)) if alpha_init is not None and alpha_lo != alpha_hi: if not (alpha_init > alpha_lo and alpha_init < alpha_hi): raise ValueError( '`alpha_init` = {} must be in (`alpha_lo`, `alpha_hi`) = ({} {})' .format(alpha_init, alpha_lo, alpha_hi)) float_dtype = x.dtype assert_ops = [tf.Assert(tf.equal(tf.rank(x), 2), [tf.rank(x)])] with tf.control_dependencies(assert_ops): if alpha_lo == alpha_hi: # If the range of alphas is a single item, then we just fix `alpha` to be # a constant. alpha = tf.tile( tf.cast(alpha_lo, float_dtype)[tf.newaxis, tf.newaxis], (1, x.shape[1])) else: # Otherwise we construct a "latent" alpha variable and define `alpha` # As an affine function of a sigmoid on that latent variable, initialized # such that `alpha` starts off as `alpha_init`. if alpha_init is None: alpha_init = (alpha_lo + alpha_hi) / 2. latent_alpha_init = util.inv_affine_sigmoid(alpha_init, lo=alpha_lo, hi=alpha_hi) latent_alpha = tf.compat.v1.get_variable( 'LatentAlpha', initializer=tf.fill((1, x.shape[1]), tf.cast(latent_alpha_init, dtype=float_dtype))) alpha = util.affine_sigmoid(latent_alpha, lo=alpha_lo, hi=alpha_hi) scale = _construct_scale(x, scale_lo, scale_init, float_dtype) loss = distribution.nllfun(x, alpha, scale, **kwargs) return loss, alpha, scale
def testDefaultAffineSigmoidRoundTrip(self): """Check that x = inv_affine_sigmoid(affine_sigmoid(x)) by default.""" x = np.float32(np.linspace(-10., 10., 1000)) y = util.affine_sigmoid(x) x_recon = util.inv_affine_sigmoid(y) self.assertAllClose(x, x_recon, atol=1e-5, rtol=1e-3)