Beispiel #1
0
 def testAffineSigmoidRoundTrip(self):
     """Check that x = inv_affine_sigmoid(affine_sigmoid(x)) in general."""
     x = np.float32(np.linspace(-10., 10., 1000))
     for _ in range(10):
         lo = np.random.uniform(0., 0.3)
         hi = np.random.uniform(0.5, 4.)
         y = util.affine_sigmoid(x, lo=lo, hi=hi)
         x_recon = util.inv_affine_sigmoid(y, lo=lo, hi=hi)
         self.assertAllClose(x, x_recon, atol=1e-5, rtol=1e-3)
Beispiel #2
0
    def __init__(self,
                 num_dims,
                 float_dtype,
                 device,
                 alpha_lo=0.001,
                 alpha_hi=1.999,
                 alpha_init=None,
                 scale_lo=1e-5,
                 scale_init=1.0):
        """Sets up the loss function.

    Args:
      num_dims: The number of dimensions of the input to come.
      float_dtype: The floating point precision of the inputs to come.
      device: The device to run on (cpu, cuda, etc).
      alpha_lo: The lowest possible value for loss's alpha parameters, must be
        >= 0 and a scalar. Should probably be in (0, 2).
      alpha_hi: The highest possible value for loss's alpha parameters, must be
        >= alpha_lo and a scalar. Should probably be in (0, 2).
      alpha_init: The value that the loss's alpha parameters will be initialized
        to, must be in (`alpha_lo`, `alpha_hi`), unless `alpha_lo` == `alpha_hi`
        in which case this will be ignored. Defaults to (`alpha_lo` +
        `alpha_hi`) / 2
      scale_lo: The lowest possible value for the loss's scale parameters. Must
        be > 0 and a scalar. This value may have more of an effect than you
        think, as the loss is unbounded as scale approaches zero (say, at a
        delta function).
      scale_init: The initial value used for the loss's scale parameters. This
        also defines the zero-point of the latent representation of scales, so
        SGD may cause optimization to gravitate towards producing scales near
        this value.
    """
        super(AdaptiveLossFunction, self).__init__()

        if not np.isscalar(alpha_lo):
            raise ValueError(
                '`alpha_lo` must be a scalar, but is of type {}'.format(
                    type(alpha_lo)))
        if not np.isscalar(alpha_hi):
            raise ValueError(
                '`alpha_hi` must be a scalar, but is of type {}'.format(
                    type(alpha_hi)))
        if alpha_init is not None and not np.isscalar(alpha_init):
            raise ValueError(
                '`alpha_init` must be None or a scalar, but is of type {}'.
                format(type(alpha_init)))
        if not alpha_lo >= 0:
            raise ValueError(
                '`alpha_lo` must be >= 0, but is {}'.format(alpha_lo))
        if not alpha_hi >= alpha_lo:
            raise ValueError(
                '`alpha_hi` = {} must be >= `alpha_lo` = {}'.format(
                    alpha_hi, alpha_lo))
        if alpha_init is not None and alpha_lo != alpha_hi:
            if not (alpha_init > alpha_lo and alpha_init < alpha_hi):
                raise ValueError(
                    '`alpha_init` = {} must be in (`alpha_lo`, `alpha_hi`) = ({} {})'
                    .format(alpha_init, alpha_lo, alpha_hi))
        if not np.isscalar(scale_lo):
            raise ValueError(
                '`scale_lo` must be a scalar, but is of type {}'.format(
                    type(scale_lo)))
        if not np.isscalar(scale_init):
            raise ValueError(
                '`scale_init` must be a scalar, but is of type {}'.format(
                    type(scale_init)))
        if not scale_lo > 0:
            raise ValueError(
                '`scale_lo` must be > 0, but is {}'.format(scale_lo))
        if not scale_init >= scale_lo:
            raise ValueError(
                '`scale_init` = {} must be >= `scale_lo` = {}'.format(
                    scale_init, scale_lo))

        self.num_dims = num_dims
        if float_dtype == np.float32:
            float_dtype = torch.float32
        if float_dtype == np.float64:
            float_dtype = torch.float64
        self.float_dtype = float_dtype
        self.device = device
        if isinstance(device, int) or\
           (isinstance(device, str) and 'cuda' in device):
            torch.cuda.set_device(self.device)

        self.distribution = distribution.Distribution()

        if alpha_lo == alpha_hi:
            # If the range of alphas is a single item, then we just fix `alpha` to be
            # a constant.
            self.fixed_alpha = torch.tensor(
                alpha_lo, dtype=self.float_dtype,
                device=self.device)[np.newaxis,
                                    np.newaxis].repeat(1, self.num_dims)
            self.alpha = lambda: self.fixed_alpha
        else:
            # Otherwise we construct a "latent" alpha variable and define `alpha`
            # As an affine function of a sigmoid on that latent variable, initialized
            # such that `alpha` starts off as `alpha_init`.
            if alpha_init is None:
                alpha_init = (alpha_lo + alpha_hi) / 2.
            latent_alpha_init = util.inv_affine_sigmoid(alpha_init,
                                                        lo=alpha_lo,
                                                        hi=alpha_hi)
            self.register_parameter(
                'latent_alpha',
                torch.nn.Parameter(latent_alpha_init.clone().detach().to(
                    dtype=self.float_dtype,
                    device=self.device)[np.newaxis,
                                        np.newaxis].repeat(1, self.num_dims),
                                   requires_grad=True))
            self.alpha = lambda: util.affine_sigmoid(
                self.latent_alpha, lo=alpha_lo, hi=alpha_hi)

        if scale_lo == scale_init:
            # If the difference between the minimum and initial scale is zero, then
            # we just fix `scale` to be a constant.
            self.fixed_scale = torch.tensor(
                scale_init, dtype=self.float_dtype,
                device=self.device)[np.newaxis,
                                    np.newaxis].repeat(1, self.num_dims)
            self.scale = lambda: self.fixed_scale
        else:
            # Otherwise we construct a "latent" scale variable and define `scale`
            # As an affine function of a softplus on that latent variable.
            self.register_parameter(
                'latent_scale',
                torch.nn.Parameter(torch.zeros(
                    (1, self.num_dims)).to(dtype=self.float_dtype,
                                           device=self.device),
                                   requires_grad=True))
            self.scale = lambda: util.affine_softplus(
                self.latent_scale, lo=scale_lo, ref=scale_init)
Beispiel #3
0
 def testDefaultAffineSigmoidRoundTrip(self):
     """Check that x = inv_affine_sigmoid(affine_sigmoid(x)) by default."""
     x = np.float32(np.linspace(-10., 10., 1000))
     y = util.affine_sigmoid(x)
     x_recon = util.inv_affine_sigmoid(y)
     self.assertAllClose(x, x_recon, atol=1e-5, rtol=1e-3)