Ejemplo n.º 1
0
    def _gradient_matches_finite_differences(self, float_dtype):
        # Test that the loss and its approximation both return gradients that are
        # close to the numerical gradient from finite differences, with forward
        # differencing. Returning correct gradients is Torch's job, so this is
        # just an aggressive sanity check in case some implementation detail causes
        # gradients to incorrectly go to zero due to quantization or stop_gradients
        # in some op that is used by the loss.
        for approximate in [False, True]:
            num_samples = 100000

            # Normally distributed inputs.
            x = float_dtype(np.random.normal(size=num_samples))

            # Uniformly distributed values in (-16, 3), quantized to the nearest
            # 0.1 and then shifted by 0.05 so that we avoid the special cases at
            # 0 and 2 where the analytical gradient wont match finite differences.
            alpha = float_dtype(
                np.round(np.random.uniform(-16, 3, num_samples) * 10) / 10.)

            # Random uniformy distributed values in [0.5, 1.5]
            scale = float_dtype(np.random.uniform(0.5, 1.5, num_samples))

            # Compute the loss and its derivative with respect to all three inputs.
            var_x = torch.autograd.Variable(torch.tensor(x),
                                            requires_grad=True)
            var_alpha = torch.autograd.Variable(torch.tensor(alpha),
                                                requires_grad=True)
            var_scale = torch.autograd.Variable(torch.tensor(scale),
                                                requires_grad=True)
            loss = general.lossfun(var_x, var_alpha, var_scale)
            sum_loss = torch.sum(loss)
            sum_loss.backward()
            d_x = var_x.grad.detach().numpy()
            d_alpha = var_alpha.grad.detach().numpy()
            d_scale = var_scale.grad.detach().numpy()
            loss = loss.detach().numpy()

            step_size = float_dtype(1e-3)

            # Assert that the 95th percentile of errors is <= 1e-2.
            def assert_percentile_close(v1, v2):
                np.testing.assert_(np.percentile(np.abs(v1 - v2), 95) <= 1e-2)

            n_x = (np.array(general.lossfun(x + step_size, alpha, scale)) -
                   loss) / step_size
            assert_percentile_close(n_x, d_x)

            n_alpha = (np.array(general.lossfun(x, alpha + step_size, scale)) -
                       loss) / step_size
            assert_percentile_close(n_alpha, d_alpha)

            n_scale = (np.array(general.lossfun(x, alpha, scale + step_size)) -
                       loss) / step_size
            assert_percentile_close(n_scale, d_scale)
Ejemplo n.º 2
0
 def _approximate_loss_is_accurate(self, float_dtype):
     # Check that the approximate loss (lossfun() with epsilon=1e-6) reasonably
     # approximates the true loss (lossfun() with epsilon=0.) for a range of
     # values of alpha (skipping alpha=0, where the approximation is poor).
     x = np.arange(-10, 10, 0.1, float_dtype)
     scale = float_dtype(1.7)
     for alpha in [-4, -2, -0.2, -0.01, 0.01, 0.2, 1, 1.99, 2, 2.01, 4]:
         alpha = float_dtype(alpha)
         loss = general.lossfun(x, alpha, scale).detach().numpy()
         loss_approx = general.lossfun(x, alpha, scale,
                                       approximate=True).detach().numpy()
         np.testing.assert_allclose(loss, loss_approx, rtol=1e-5, atol=1e-4)
 def testApproximateLossIsAccurate(self, float_dtype, device):
   # Check that the approximate loss (lossfun() with epsilon=1e-6) reasonably
   # approximates the true loss (lossfun() with epsilon=0.) for a range of
   # values of alpha (skipping alpha=0, where the approximation is poor).
   x = np.arange(-10, 10, 0.1, float_dtype)
   scale = float_dtype(1.7)
   for alpha in [-4, -2, -0.2, -0.01, 0.01, 0.2, 1, 1.99, 2, 2.01, 4]:
     alpha = float_dtype(alpha)
     x_t = torch.tensor(x, device=device)
     alpha_t = torch.tensor(alpha).to(x_t)
     scale_t = torch.tensor(scale).to(x_t)
     loss = general.lossfun(x_t, alpha_t, scale_t).cpu().detach().numpy()
     loss_approx = general.lossfun(
         x_t, alpha_t, scale_t, approximate=True).cpu().detach().numpy()
     np.testing.assert_allclose(loss, loss_approx, rtol=1e-5, atol=1e-4)
Ejemplo n.º 4
0
    def _loss_and_gradients_are_finite(self, float_dtype):
        # Test that the loss and its approximation both give finite losses and
        # derivatives everywhere that they should for a wide range of values.
        for approximate in [False, True]:
            num_samples = 100000

            # Normally distributed inputs.
            x = float_dtype(np.random.normal(size=num_samples))

            # Uniformly distributed values in (-16, 3), quantized to the nearest
            # 0.1 to ensure that we hit the special cases at 0, 2.
            alpha = float_dtype(
                np.round(np.random.uniform(-16, 3, num_samples) * 10) / 10.)

            # Random log-normally distributed values in approx (1e-5, 100000):
            scale = float_dtype(
                np.exp(np.random.normal(size=num_samples) * 4.) + 1e-5)

            # Compute the loss and its derivative with respect to all three inputs.
            var_x = torch.autograd.Variable(torch.tensor(x),
                                            requires_grad=True)
            var_alpha = torch.autograd.Variable(torch.tensor(alpha),
                                                requires_grad=True)
            var_scale = torch.autograd.Variable(torch.tensor(scale),
                                                requires_grad=True)
            loss = general.lossfun(var_x, var_alpha, var_scale)
            sum_loss = torch.sum(loss)
            sum_loss.backward()
            d_x = var_x.grad.detach().numpy()
            d_alpha = var_alpha.grad.detach().numpy()
            d_scale = var_scale.grad.detach().numpy()
            loss = loss.detach().numpy()

            for v in [loss, d_x, d_alpha, d_scale]:
                np.testing.assert_(np.all(np.isfinite(v)))
Ejemplo n.º 5
0
    def _precompute_lossfun_inputs(self, float_dtype, device):
        """Precompute a loss and its derivatives for random inputs and parameters.

    Generates a large number of random inputs to the loss, and random
    shape/scale parameters for the loss function at each sample, and
    computes the loss and its derivative with respect to all inputs and
    parameters, returning everything to be used to assert various properties
    in our unit tests.

    Args:
      float_dtype: The float precision to be used (np.float32 or np.float64).
      device: The device to run on.

    Returns:
      A tuple containing:
       (the number (int) of samples, and the length of all following arrays,
        A np.array (float_dtype) of losses for each sample,
        A np.array (float_dtype) of residuals of each sample (the loss inputs),
        A np array (float_dtype) of shape parameters of each loss,
        A np.array (float_dtype) of scale parameters of each loss,
        A np.array (float_dtype) of derivatives of each loss wrt each x,
        A np.array (float_dtype) of derivatives of each loss wrt each alpha,
        A np.array (float_dtype) of derivatives of each loss wrt each scale)

    Typical usage example:
    (num_samples, loss, x, alpha, scale, d_x, d_alpha, d_scale)
        = self._precompute_lossfun_inputs(np.float32, 'cpu')
    """
        num_samples = 100000
        # Normally distributed inputs.
        x = float_dtype(np.random.normal(size=num_samples))

        # Uniformly distributed values in (-16, 3), quantized to the nearest 0.1
        # to ensure that we hit the special cases at 0, 2.
        alpha = float_dtype(
            np.round(np.random.uniform(-16, 3, num_samples) * 10) / 10.0)
        # Push the sampled alphas at the extents of the range to +/- infinity, so
        # that we probe those cases too.
        alpha[alpha == 3.0] = float_dtype(float("inf"))
        alpha[alpha == -16.0] = -float_dtype(float("inf"))

        # Random log-normally distributed values in approx (1e-5, 100000):
        scale = float_dtype(
            np.exp(np.random.normal(size=num_samples) * 4.0) + 1e-5)

        # Compute the loss and its derivative with respect to all three inputs.
        var_x = torch.autograd.Variable(torch.tensor(x, device=device),
                                        requires_grad=True)
        var_alpha = torch.autograd.Variable(torch.tensor(alpha, device=device),
                                            requires_grad=True)
        var_scale = torch.autograd.Variable(torch.tensor(scale, device=device),
                                            requires_grad=True)
        loss = general.lossfun(var_x, var_alpha, var_scale)
        sum_loss = torch.sum(loss)
        sum_loss.backward()
        d_x = var_x.grad.cpu().detach().numpy()
        d_alpha = var_alpha.grad.cpu().detach().numpy()
        d_scale = var_scale.grad.cpu().detach().numpy()
        loss = loss.cpu().detach().numpy()
        return (num_samples, loss, x, alpha, scale, d_x, d_alpha, d_scale)
Ejemplo n.º 6
0
 def _lossfun_preserves_dtype(self, float_dtype):
     """Check the loss's output has the same precision as its input."""
     n = 16
     x = float_dtype(np.random.normal(size=n))
     alpha = float_dtype(np.random.normal(size=n))
     scale = float_dtype(np.exp(np.random.normal(size=n)))
     y = general.lossfun(x, alpha, scale)
     np.testing.assert_equal(y.detach().numpy().dtype, float_dtype)
 def testLossfunPreservesDevice(self, float_dtype, device):
   """Check the loss's output has the same precision as its input."""
   n = 16
   x = torch.tensor(float_dtype(np.random.normal(size=n)), device=device)
   alpha = torch.tensor(float_dtype(np.random.normal(size=n)), device=device)
   scale = torch.tensor(
       float_dtype(np.exp(np.random.normal(size=n))), device=device)
   y = general.lossfun(x, alpha, scale)
   np.testing.assert_equal(y.device.type, device)
Ejemplo n.º 8
0
    def _loss_is_scale_invariant(self, float_dtype):
        # Check that loss(mult * x, alpha, mult * scale) == loss(x, alpha, scale)
        (num_samples, loss, x, alpha, scale, _, _,
         _) = self._precompute_lossfun_inputs(float_dtype)
        # Random log-normally distributed scalings in ~(0.2, 20)
        mult = float_dtype(
            np.maximum(0.2, np.exp(np.random.normal(size=num_samples))))

        # Compute the scaled loss.
        loss_scaled = general.lossfun(mult * x, alpha, mult * scale)
        np.testing.assert_allclose(loss, loss_scaled, atol=1e-4, rtol=1e-4)
Ejemplo n.º 9
0
    def _alpha_equals_two(self, float_dtype):
        # Check that alpha == 2 reproduces L2 loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(2.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale).detach().numpy()

        # L2 Loss.
        loss_true = 0.5 * (x / scale)**2

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 10
0
    def _alpha_equals_one(self, float_dtype):
        # Check that alpha == 1 reproduces Charbonnier aka pseudo-Huber loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(1.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale).detach().numpy()

        # Charbonnier loss.
        loss_true = (np.sqrt((x / scale)**2 + 1.) - 1.)

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 11
0
    def _alpha_equals_zero(self, float_dtype):
        # Check that alpha == 0 reproduces Cauchy aka Lorentzian loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(0.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale).detach().numpy()

        # Cauchy/Lorentzian loss.
        loss_true = (np.log(0.5 * (x / scale)**2 + 1.))

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 12
0
    def _alpha_equals_negative_two(self, float_dtype):
        # Check that alpha == -2 reproduces Geman-McClure loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(-2.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale).detach().numpy()

        # Geman-McClure loss.
        loss_true = 2. * (x / scale)**2 / ((x / scale)**2 + 4.)

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 13
0
    def _alpha_equals_negative_infinity(self, float_dtype):
        # Check that alpha == -Infinity reproduces Welsch aka Leclerc loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(-float('inf'))
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale).detach().numpy()

        # Welsch/Leclerc loss.
        loss_true = (1. - np.exp(-0.5 * (x / scale)**2))

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 14
0
    def _alpha_equals_infinity(self, float_dtype):
        # Check that alpha == Infinity takes the correct form.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(float('inf'))
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale).detach().numpy()

        # The true loss.
        loss_true = (np.exp(0.5 * np.square(x / scale)) - 1.)

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 15
0
    def _alpha_equals_four(self, float_dtype):
        # Check that alpha == 4 reproduces a quartic.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(4.)
        scale = float_dtype(1.7)

        # Our loss.
        loss = general.lossfun(x, alpha, scale).detach().numpy()

        # The true loss.
        loss_true = np.square(np.square(x / scale)) / 8. + np.square(
            x / scale) / 2.

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 16
0
    def testLossIsScaleInvariant(self, float_dtype, device):
        # Check that loss(mult * x, alpha, mult * scale) == loss(x, alpha, scale)
        (num_samples, loss, x, alpha, scale, _, _,
         _) = self._precompute_lossfun_inputs(float_dtype, device)
        # Random log-normally distributed scalings in ~(0.2, 20)
        mult = float_dtype(
            np.maximum(0.2, np.exp(np.random.normal(size=num_samples))))

        x = torch.tensor(np.array(mult * x, dtype=float_dtype), device=device)
        alpha = torch.tensor(np.array(alpha, dtype=float_dtype), device=device)
        scale = torch.tensor(np.array(mult * scale, dtype=float_dtype),
                             device=device)
        # Compute the scaled loss.
        loss_scaled = general.lossfun(x, alpha, scale).cpu().detach()
        np.testing.assert_allclose(loss, loss_scaled, atol=1e-4, rtol=1e-4)
 def loglikelihood(self, res, alpha, scale):
     assert alpha.view(-1).size()[0] == 1 or alpha.view(
         -1).size()[0] == len(res)
     scale = scale + 1e-5
     N = len(res)
     dist = distribution.Distribution()
     loss = general.lossfun(res, alpha, scale, approximate=False).sum()
     log_partition = torch.log(scale) + dist.log_base_partition_function(
         alpha)
     if alpha.view(-1).size()[0] == 1:
         log_partition = N * log_partition
     else:
         log_partition = log_partition.sum()
     nll = loss + log_partition
     return -nll.detach().numpy()
    def sample(self, alpha, c):
        alpha = torch.as_tensor(alpha)
        scale = torch.as_tensor(c)
        assert (alpha >= 0).all()
        assert (scale >= 0).all()
        float_dtype = alpha.dtype
        assert scale.dtype == float_dtype

        cauchy = torch.distributions.cauchy.Cauchy(0., np.sqrt(2.))
        uniform = torch.distributions.uniform.Uniform(0, 1)
        samples = torch.zeros_like(alpha)
        accepted = torch.zeros(alpha.shape).type(torch.bool)
        dist = distribution.Distribution()
        while not accepted.type(torch.uint8).all():
            # Draw N samples from a Cauchy, our proposal distribution.
            cauchy_sample = torch.reshape(
                cauchy.sample((np.prod(alpha.shape), )), alpha.shape)
            cauchy_sample = cauchy_sample.type(alpha.dtype)

            # Compute the likelihood of each sample under its target distribution.
            nll = dist.nllfun(cauchy_sample,
                              torch.as_tensor(alpha).to(cauchy_sample),
                              torch.tensor(1).to(cauchy_sample))

            # Bound the NLL. We don't use the approximate loss as it may cause
            # unpredictable behavior in the context of sampling.
            nll_bound = general.lossfun(
                cauchy_sample,
                torch.tensor(0., dtype=cauchy_sample.dtype),
                torch.tensor(1., dtype=cauchy_sample.dtype),
                approximate=False) + dist.log_base_partition_function(alpha)

            # Draw N samples from a uniform distribution, and use each uniform sample
            # to decide whether or not to accept each proposal sample.
            uniform_sample = torch.reshape(
                uniform.sample((np.prod(alpha.shape), )), alpha.shape)
            uniform_sample = uniform_sample.type(alpha.dtype)
            accept = uniform_sample <= torch.exp(nll_bound - nll)

            # If a sample is accepted, replace its element in `samples` with the
            # proposal sample, and set its bit in `accepted` to True.
            samples = torch.where(accept, cauchy_sample, samples)
            accepted = accepted | accept

            # Because our distribution is a location-scale family, we sample from
            # p(x | 0, \alpha, 1) and then scale each sample by `scale`.
            samples *= scale
        return samples
Ejemplo n.º 19
0
    def testAlphaEqualsTwo(self, float_dtype, device):
        # Check that alpha == 2 reproduces L2 loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(2.0)
        scale = float_dtype(2.0)

        # Our loss.
        x_t = torch.tensor(x, device=device)
        alpha_t = torch.tensor(alpha).to(x_t)
        scale_t = torch.tensor(scale).to(x_t)
        loss = general.lossfun(x_t, alpha_t, scale_t).cpu().detach().numpy()

        # L2 Loss.
        loss_true = 0.5 * (x / scale)**2

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 20
0
    def testAlphaEqualsOne(self, float_dtype, device):
        # Check that alpha == 1 reproduces Charbonnier aka pseudo-Huber loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(1.0)
        scale = float_dtype(2.0)

        # Our loss.
        x_t = torch.tensor(x, device=device)
        alpha_t = torch.tensor(alpha).to(x_t)
        scale_t = torch.tensor(scale).to(x_t)
        loss = general.lossfun(x_t, alpha_t, scale_t).cpu().detach().numpy()

        # Charbonnier loss.
        loss_true = np.sqrt((x / scale)**2 + 1.0) - 1.0

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 21
0
    def testAlphaEqualsZero(self, float_dtype, device):
        # Check that alpha == 0 reproduces Cauchy aka Lorentzian loss.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(0.0)
        scale = float_dtype(2.0)

        # Our loss.
        x_t = torch.tensor(x, device=device)
        alpha_t = torch.tensor(alpha).to(x_t)
        scale_t = torch.tensor(scale).to(x_t)
        loss = general.lossfun(x_t, alpha_t, scale_t).cpu().detach().numpy()

        # Cauchy/Lorentzian loss.
        loss_true = np.log(0.5 * (x / scale)**2 + 1.0)

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 22
0
    def testAlphaEqualsNegativeTwo(self, float_dtype, device):
        # Check that alpha == -2 reproduces Geman-McClure loss.
        x = float_dtype(np.arange(-20, 20, 0.1))
        alpha = float_dtype(np.array(-2.0))
        scale = float_dtype(np.array(2.0))

        # Our loss.
        x_t = torch.tensor(x, device=device)
        alpha_t = torch.tensor(alpha).to(x_t)
        scale_t = torch.tensor(scale).to(x_t)
        loss = general.lossfun(x_t, alpha_t, scale_t).cpu().detach().numpy()

        # Geman-McClure loss.
        loss_true = 2.0 * (x / scale)**2 / ((x / scale)**2 + 4.0)

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 23
0
    def testAlphaEqualsNegativeInfinity(self, float_dtype, device):
        # Check that alpha == -Infinity reproduces Welsch aka Leclerc loss.
        x = float_dtype(np.arange(-20, 20, 0.1))
        alpha = float_dtype(np.array([-float("inf")]))
        scale = float_dtype(np.array([2.0]))

        # Our loss.
        x_t = torch.tensor(x, device=device)
        alpha_t = torch.tensor(alpha).to(x_t)
        scale_t = torch.tensor(scale).to(x_t)
        loss = general.lossfun(x_t, alpha_t, scale_t).cpu().detach().numpy()

        # Welsch/Leclerc loss.
        loss_true = 1.0 - np.exp(-0.5 * (x / scale)**2)

        self._assert_all_close_according_to_type(loss, loss_true)
  def testAlphaEqualsFour(self, float_dtype, device):
    # Check that alpha == 4 reproduces a quartic.
    x = np.arange(-20, 20, 0.1, float_dtype)
    alpha = float_dtype(4.)
    scale = float_dtype(2.)

    # Our loss.
    x_t = torch.tensor(x, device=device)
    alpha_t = torch.tensor(alpha).to(x_t)
    scale_t = torch.tensor(scale).to(x_t)
    loss = general.lossfun(x_t, alpha_t, scale_t).cpu().detach().numpy()

    # The true loss.
    loss_true = np.square(np.square(x / scale)) / 8. + np.square(x / scale) / 2.

    self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 25
0
    def testAlphaEqualsInfinity(self, float_dtype, device):
        # Check that alpha == Infinity takes the correct form.
        x = np.arange(-20, 20, 0.1, float_dtype)
        alpha = float_dtype(float("inf"))
        scale = float_dtype(2.0)

        # Our loss.
        x_t = torch.tensor(x, device=device)
        alpha_t = torch.tensor(alpha).to(x_t)
        scale_t = torch.tensor(scale).to(x_t)
        loss = general.lossfun(x_t, alpha_t, scale_t).cpu().detach().numpy()

        # The true loss.
        loss_true = np.exp(0.5 * np.square(x / scale)) - 1.0

        self._assert_all_close_according_to_type(loss, loss_true)
Ejemplo n.º 26
0
    def nllfun(self, x, alpha, scale):
        r"""Implements the negative log-likelihood (NLL).

    Specifically, we implement -log(p(x | 0, \alpha, c) of Equation 16 in the
    paper as nllfun(x, alpha, shape).

    Args:
      x: The residual for which the NLL is being computed. x can have any shape,
        and alpha and scale will be broadcasted to match x's shape if necessary.
        Must be a tensor or numpy array of floats.
      alpha: The shape parameter of the NLL (\alpha in the paper), where more
        negative values cause outliers to "cost" more and inliers to "cost"
        less. Alpha can be any non-negative value, but the gradient of the NLL
        with respect to alpha has singularities at 0 and 2 so you may want to
        limit usage to (0, 2) during gradient descent. Must be a tensor or numpy
        array of floats. Varying alpha in that range allows for smooth
        interpolation between a Cauchy distribution (alpha = 0) and a Normal
        distribution (alpha = 2) similar to a Student's T distribution.
      scale: The scale parameter of the loss. When |x| < scale, the NLL is like
        that of a (possibly unnormalized) normal distribution, and when |x| >
        scale the NLL takes on a different shape according to alpha. Must be a
        tensor or numpy array of floats.

    Returns:
      The NLLs for each element of x, in the same shape and precision as x.
    """
        # `scale` and `alpha` must have the same type as `x`.
        x = torch.as_tensor(x)
        alpha = torch.as_tensor(alpha)
        scale = torch.as_tensor(scale)
        assert (alpha >= 0).all()
        assert (scale >= 0).all()
        float_dtype = x.dtype
        assert alpha.dtype == float_dtype
        assert scale.dtype == float_dtype

        loss = general.lossfun(x, alpha, scale, approximate=False)
        log_partition = torch.log(scale) + self.log_base_partition_function(
            alpha)
        nll = loss + log_partition
        return nll
def train_locally_adaptive(model, alpha, scale, trX, trY, learning_rate=0.01, epoch=500, verbose=True):
    params = list(model.parameters()) + list(alpha.parameters()) + list(scale.parameters())
    dist = distribution.Distribution()
    optimizer = torch.optim.Adam(params, lr=learning_rate, weight_decay=0.01)

    for e in tqdm(range(epoch)):
        y_hat = model(trX).view(-1)
        alphas = torch.exp(alpha(trX))
        scales = torch.exp(scale(trX))
        loss = general.lossfun((y_hat - trY)[:, None], alpha=alphas, scale=scales, approximate=False)
        scales = scales + 1e-10
        log_partition = torch.log(scales) + dist.log_base_partition_function(alphas)

        loss = (loss + log_partition).mean()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if verbose and np.mod(e, 100) == 0:
            print('{:<4}: loss={:03f}'.format(e, loss.data))
    return model, alpha, scale
Ejemplo n.º 28
0
def draw_samples(alpha, scale):
    r"""Draw samples from the robust distribution.

  This function implements Algorithm 1 the paper. This code is written to allow
  for sampling from a set of different distributions, each parametrized by its
  own alpha and scale values, as opposed to the more standard approach of
  drawing N samples from the same distribution. This is done by repeatedly
  performing N instances of rejection sampling for each of the N distributions
  until at least one proposal for each of the N distributions has been accepted.
  All samples are drawn with a zero mean, to use a non-zero mean just add each
  mean to each sample.

  Args:
    alpha: A tensor/scalar or numpy array/scalar of floats where each element is
      the shape parameter of that element's distribution.
    scale: A tensor/scalar or numpy array/scalar of floats where each element is
      the scale parameter of that element's distribution. Must be the same shape
      as `alpha`.

  Returns:
    A tensor with the same shape and precision as `alpha` and `scale` where
    each element is a sample drawn from the distribution specified for that
    element by `alpha` and `scale`.
  """
    alpha = torch.as_tensor(alpha)
    scale = torch.as_tensor(scale)
    assert (alpha >= 0).all()
    assert (scale >= 0).all()
    float_dtype = alpha.dtype
    assert scale.dtype == float_dtype

    cauchy = torch.distributions.cauchy.Cauchy(0., np.sqrt(2.))
    uniform = torch.distributions.uniform.Uniform(0, 1)
    samples = torch.zeros_like(alpha)
    accepted = torch.zeros(alpha.shape).type(torch.uint8)
    while not accepted.type(torch.uint8).all():
        # Draw N samples from a Cauchy, our proposal distribution.
        cauchy_sample = torch.reshape(cauchy.sample((np.prod(alpha.shape), )),
                                      alpha.shape)
        cauchy_sample = cauchy_sample.type(alpha.dtype)

        # Compute the likelihood of each sample under its target distribution.
        nll = nllfun(cauchy_sample, torch.as_tensor(alpha),
                     torch.tensor(1.).type(float_dtype))

        # Bound the NLL. We don't use the approximate loss as it may cause
        # unpredictable behavior in the context of sampling.
        nll_bound = general.lossfun(
            cauchy_sample, 0., 1.,
            approximate=False) + log_base_partition_function(alpha)

        # Draw N samples from a uniform distribution, and use each uniform sample
        # to decide whether or not to accept each proposal sample.
        uniform_sample = torch.reshape(
            uniform.sample((np.prod(alpha.shape), )), alpha.shape)
        uniform_sample = uniform_sample.type(alpha.dtype)
        accept = uniform_sample <= torch.exp(nll_bound - nll)

        # If a sample is accepted, replace its element in `samples` with the
        # proposal sample, and set its bit in `accepted` to True.
        samples = torch.where(accept, cauchy_sample, samples)
        accepted = accepted | accept

    # Because our distribution is a location-scale family, we sample from
    # p(x | 0, \alpha, 1) and then scale each sample by `scale`.
    samples *= scale
    return samples
Ejemplo n.º 29
0
 def loss_helper(x, a, c):
     x = torch.tensor(x, device=device)
     a = torch.tensor(a).to(x)
     c = torch.tensor(c).to(x)
     return general.lossfun(x, a, c).cpu().detach().numpy()