Example #1
0
 def gaussian_kl_divergence(self, mu1, ln_var1, mu2, ln_var2):
     # D_KL [ N(z ; mu1, var1) || N(z; mu2, var2) ]
     var1 = exponential.exp(ln_var1)
     inv_var2 = exponential.exp(-ln_var2)
     mu_diff = mu2 - mu1
     term1 = (var1 + mu_diff * mu_diff) * inv_var2
     loss = (term1 - ln_var1 + ln_var2 - 1.) * 0.5
     return sum.sum(loss)
Example #2
0
def gaussian_kl_divergence(mean, ln_var):
    """Computes the KL-divergence of Gaussian variables from the standard one.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function returns a variable
    representing the KL-divergence between the given multi-dimensional Gaussian
    :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`

    .. math::

       D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),

    where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
    and :math:`I` is an identity matrix.

    Args:
        mean (~chainer.Variable): A variable representing mean of given
            gaussian distribution, :math:`\\mu`.
        ln_var (~chainer.Variable): A variable representing logarithm of
            variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.

    Returns:
        ~chainer.Variable: A variable representing KL-divergence between
            given gaussian distribution and the standard gaussian.

    """
    assert isinstance(mean, variable.Variable)
    assert isinstance(ln_var, variable.Variable)

    J = mean.size
    var = exponential.exp(ln_var)
    return (sum.sum(mean * mean) + sum.sum(var) - sum.sum(ln_var) - J) * 0.5
Example #3
0
def gaussian_kl_divergence(mean, ln_var):
    """Computes the KL-divergence of Gaussian variables from the standard one.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function returns a variable
    representing the KL-divergence between the given multi-dimensional Gaussian
    :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`

    .. math::

       D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),

    where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
    and :math:`I` is an identity matrix.

    Args:
        mean (~chainer.Variable): A variable representing mean of given
            gaussian distribution, :math:`\\mu`.
        ln_var (~chainer.Variable): A variable representing logarithm of
            variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.

    Returns:
        ~chainer.Variable: A variable representing KL-divergence between
            given gaussian distribution and the standard gaussian.

    """
    assert isinstance(mean, variable.Variable)
    assert isinstance(ln_var, variable.Variable)

    J = mean.size
    var = exponential.exp(ln_var)
    return (sum.sum(mean * mean) + sum.sum(var) - sum.sum(ln_var) - J) * 0.5
Example #4
0
def _kl_gumbel_gumbel(dist1, dist2):
    scale_1d2 = dist1.scale / dist2.scale
    return dist2._log_scale - dist1._log_scale \
        + EULER * (scale_1d2 - 1.) \
        + exponential.exp((dist2.loc - dist1.loc) / dist2.scale
                          + lgamma.lgamma(scale_1d2 + 1.)) \
        - 1 + (dist1.loc - dist2.loc) / dist2.scale
Example #5
0
def _kl_gumbel_gumbel(dist1, dist2):
    scale_1d2 = dist1.scale / dist2.scale
    return exponential.log(dist2.scale) - exponential.log(dist1.scale) \
        + EULER * (scale_1d2 - 1.) \
        + exponential.exp((dist2.loc - dist1.loc) / dist2.scale
                          + lgamma.lgamma(scale_1d2 + 1.)) \
        - 1 + (dist1.loc - dist2.loc) / dist2.scale
Example #6
0
def gaussian_nll(x, mean, ln_var, reduce='sum'):
    """Computes the negative log-likelihood of a Gaussian distribution.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function computes in
    elementwise manner the negative log-likelihood of :math:`x` on a
    Gaussian distribution :math:`N(\\mu, S)`,

    .. math::

        -\\log N(x; \\mu, \\sigma^2) =
        \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) +
        \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu),

    where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal
    matrix where :math:`S_{ii} = \\sigma_i^2`.

    The output is a variable whose value depends on the value of
    the option ``reduce``. If it is ``'no'``, it holds the elementwise
    loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up
    or averaged respectively.

    Args:
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable.
        mean (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): A variable representing mean of a Gaussian
            distribution, :math:`\\mu`.
        ln_var (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): A variable representing logarithm of
            variance of a Gaussian distribution, :math:`\\log(\\sigma^2)`.
        reduce (str): Reduction option. Its value must be either
            ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError`
            is raised.

    Returns:
        ~chainer.Variable:
            A variable representing the negative log-likelihood.
            If ``reduce`` is ``'no'``, the output variable holds array
            whose shape is same as one of (hence both of) input variables.
            If it is ``'sum'`` or ``'mean'``, the output variable holds a
            scalar value.

    """
    if reduce not in ('sum', 'mean', 'no'):
        raise ValueError(
            "only 'sum', 'mean' and 'no' are valid for 'reduce', but '%s'"
            ' is given' % reduce)

    x_prec = exponential.exp(-ln_var)
    x_diff = x - mean
    x_power = (x_diff * x_diff) * x_prec * -0.5
    loss = (ln_var + math.log(2 * math.pi)) / 2 - x_power
    if reduce == 'sum':
        return sum.sum(loss)
    elif reduce == 'mean':
        return average.average(loss)
    else:
        return loss
Example #7
0
def _kl_laplace_laplace(dist1, dist2):
    diff = abs(dist1.loc - dist2.loc)
    return (
        exponential.log(dist2.scale)
        - exponential.log(dist1.scale)
        + diff / dist2.scale
        + dist1.scale / dist2.scale * exponential.exp(-diff / dist1.scale)
        - 1)
Example #8
0
def gaussian_nll(x, mean, ln_var, reduce='sum'):
    """Computes the negative log-likelihood of a Gaussian distribution.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function computes in
    elementwise manner the negative log-likelihood of :math:`x` on a
    Gaussian distribution :math:`N(\\mu, S)`,

    .. math::

        -\\log N(x; \\mu, \\sigma^2) =
        \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) +
        \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu),

    where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal
    matrix where :math:`S_{ii} = \\sigma_i^2`.

    The output is a variable whose value depends on the value of
    the option ``reduce``. If it is ``'no'``, it holds the elementwise
    loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up
    or averaged respectively.

    Args:
        x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable.
        mean (:class:`~chainer.Variable` or :ref:`ndarray`): A variable
            representing mean of a Gaussian distribution, :math:`\\mu`.
        ln_var (:class:`~chainer.Variable` or :ref:`ndarray`): A variable
            representing logarithm of variance of a Gaussian distribution,
            :math:`\\log(\\sigma^2)`.
        reduce (str): Reduction option. Its value must be either
            ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError`
            is raised.

    Returns:
        ~chainer.Variable:
            A variable representing the negative log-likelihood.
            If ``reduce`` is ``'no'``, the output variable holds array
            whose shape is same as one of (hence both of) input variables.
            If it is ``'sum'`` or ``'mean'``, the output variable holds a
            scalar value.

    """
    if reduce not in ('sum', 'mean', 'no'):
        raise ValueError(
            'only \'sum\', \'mean\' and \'no\' are valid for \'reduce\', but '
            '\'%s\' is given' % reduce)

    x_prec = exponential.exp(-ln_var)
    x_diff = x - mean
    x_power = (x_diff * x_diff) * x_prec * -0.5
    loss = (ln_var + math.log(2 * math.pi)) / 2 - x_power
    if reduce == 'sum':
        return sum.sum(loss)
    elif reduce == 'mean':
        return average.average(loss)
    else:
        return loss
Example #9
0
def gaussian_nll(x, mean, ln_var):
    assert isinstance(x, variable.Variable)
    assert isinstance(mean, variable.Variable)
    assert isinstance(ln_var, variable.Variable)

    x_prec = exponential.exp(-ln_var)
    x_diff = x - mean
    x_power = (x_diff * x_diff) * x_prec * -0.5
    return (ln_var + math.log(2 * math.pi)) / 2 - x_power
Example #10
0
def _e_chainer(a, b, c, beta=1.0):
    b = (a - b) * beta
    c = (a - c) * beta

    if b.data > 30 or c.data > 30:
        return 0

    if b.data < -20:
        exp_b = 0
    else:
        exp_b = exp(b)

    if c.data < -20:
        exp_c = 0
    else:
        exp_c = exp(c)

    return 1. / (1 + exp_b + exp_c)
Example #11
0
def gaussian_kl_divergence(mean, ln_var, reduce='sum'):
    """Computes the KL-divergence of Gaussian variables from the standard one.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function calculates
    the KL-divergence in elementwise manner between the given multi-dimensional
    Gaussian :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`

    .. math::

       D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),

    where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
    and :math:`I` is an identity matrix.

    The output is a variable whose value depends on the value of
    the option ``reduce``. If it is ``'no'``, it holds the elementwise
    loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up
    or averaged respectively.

    Args:
        mean (:class:`~chainer.Variable` or :ref:`ndarray`):
            A variable representing mean of given
            gaussian distribution, :math:`\\mu`.
        ln_var (:class:`~chainer.Variable` or :ref:`ndarray`):
            A variable representing logarithm of
            variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.
        reduce (str): Reduction option. Its value must be either
            ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError`
            is raised.

    Returns:
        ~chainer.Variable:
            A variable representing KL-divergence between
            given gaussian distribution and the standard gaussian.
            If ``reduce`` is ``'no'``, the output variable holds array
            whose shape is same as one of (hence both of) input variables.
            If it is ``'sum'`` or ``'mean'``, the output variable holds a
            scalar value.

    """
    if reduce not in ('sum', 'mean', 'no'):
        raise ValueError(
            "only 'sum', 'mean' and 'no' are valid for 'reduce', but '%s'"
            ' is given' % reduce)

    var = exponential.exp(ln_var)
    mean_square = mean * mean
    loss = (mean_square + var - ln_var - 1) * 0.5
    if reduce == 'sum':
        return sum.sum(loss)
    elif reduce == 'mean':
        return average.average(loss)
    else:
        return loss
Example #12
0
def gaussian_kl_divergence(mean, ln_var, reduce='sum'):
    """Computes the KL-divergence of Gaussian variables from the standard one.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function calculates
    the KL-divergence in elementwise manner between the given multi-dimensional
    Gaussian :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`

    .. math::

       D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),

    where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
    and :math:`I` is an identity matrix.

    The output is a variable whose value depends on the value of
    the option ``reduce``. If it is ``'no'``, it holds the elementwise
    loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up
    or averaged respectively.

    Args:
        mean (:class:`~chainer.Variable` or :ref:`ndarray`):
            A variable representing mean of given
            gaussian distribution, :math:`\\mu`.
        ln_var (:class:`~chainer.Variable` or :ref:`ndarray`):
            A variable representing logarithm of
            variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.
        reduce (str): Reduction option. Its value must be either
            ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError`
            is raised.

    Returns:
        ~chainer.Variable:
            A variable representing KL-divergence between
            given gaussian distribution and the standard gaussian.
            If ``reduce`` is ``'no'``, the output variable holds array
            whose shape is same as one of (hence both of) input variables.
            If it is ``'sum'`` or ``'mean'``, the output variable holds a
            scalar value.

    """
    if reduce not in ('sum', 'mean', 'no'):
        raise ValueError(
            'only \'sum\', \'mean\' and \'no\' are valid for \'reduce\', but '
            '\'%s\' is given' % reduce)

    var = exponential.exp(ln_var)
    mean_square = mean * mean
    loss = (mean_square + var - ln_var - 1) * 0.5
    if reduce == 'sum':
        return sum.sum(loss)
    elif reduce == 'mean':
        return average.average(loss)
    else:
        return loss
Example #13
0
    def sample_n(self, n):
        xp = cuda.get_array_module(self.mu)
        if xp is cuda.cupy:
            eps = xp.random.standard_normal(
                (n,)+self.mu.shape, dtype=self.mu.dtype)
        else:
            eps = xp.random.standard_normal(
                (n,)+self.mu.shape).astype(self.mu.dtype)

        noise = broadcast.broadcast_to(self.sigma, eps.shape) * eps
        noise += broadcast.broadcast_to(self.mu, eps.shape)

        return exponential.exp(noise)
Example #14
0
    def sample_n(self, n):
        xp = backend.get_array_module(self.mu)
        if xp is cuda.cupy:
            eps = xp.random.standard_normal((n, ) + self.mu.shape,
                                            dtype=self.mu.dtype)
        else:
            eps = xp.random.standard_normal((n, ) + self.mu.shape).astype(
                self.mu.dtype)

        noise = self.sigma * eps
        noise += self.mu

        return exponential.exp(noise)
Example #15
0
    def sample_n(self, n):
        xp = cuda.get_array_module(self.mu)
        if xp is cuda.cupy:
            eps = xp.random.standard_normal(
                (n,)+self.mu.shape, dtype=self.mu.dtype)
        else:
            eps = xp.random.standard_normal(
                (n,)+self.mu.shape).astype(self.mu.dtype)

        noise = self.sigma * eps
        noise += self.mu

        return exponential.exp(noise)
Example #16
0
def black_out(x, t, W, samples):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    Args:
        x (~chainer.Variable): Batch of input vectors.
        t (~chainer.Variable): Vector of ground truth labels.
        W (~chainer.Variable): Weight matrix.
        samples (~chainer.Variable): Negative samples.

    Returns:
        ~chainer.Variable: Loss value.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.batch_matmul(neg_emb, x)
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.batch_matmul(pos_emb, x)
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(
        reshape.reshape(logz, (batch_size, 1)), neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size,))
    loss = py - logz + _sum.sum(ny, axis=1)
    return -_sum.sum(loss) / batch_size
Example #17
0
def black_out(x, t, W, samples):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    Args:
        x (~chainer.Variable): Batch of input vectors.
        t (~chainer.Variable): Vector of ground truth labels.
        W (~chainer.Variable): Weight matrix.
        samples (~chainer.Variable): Negative samples.

    Returns:
        ~chainer.Variable: Loss value.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.batch_matmul(neg_emb, x)
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.batch_matmul(pos_emb, x)
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(reshape.reshape(logz, (batch_size, 1)),
                                        neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size, ))
    loss = py - logz + _sum.sum(ny, axis=1)
    return -_sum.sum(loss) / batch_size
    def backward(self, indexes, grad_outputs):
        gy, = grad_outputs
        logit, x = self.get_retained_inputs()
        xp = backend.get_array_module(x)
        dlogit = x - 1. / (1. + exponential.exp(-logit))

        # extreme logit
        nan = xp.array(xp.nan).astype(dlogit.dtype)
        logit_isinf = xp.bitwise_or(self.logit_ispinf, self.logit_isminf)
        dlogit = where.where(logit_isinf, nan, dlogit)
        if self.binary_check:
            dlogit = where.where(self.invalid, nan, dlogit)

        return sum.sum_to(gy * dlogit, logit.shape), None
Example #19
0
    def backward(self, indexes, grad_outputs):
        gy, = grad_outputs
        logit, x = self.get_retained_inputs()
        xp = cuda.get_array_module(x)
        dlogit = x - 1. / (1. + exponential.exp(-logit))

        # extreme logit
        nan_dlogit = xp.zeros_like(dlogit.array)
        nan_dlogit[self.invalid] = xp.nan
        nan_dlogit[self.to_zero] = xp.nan
        nan_dlogit[self.to_m_inf] = xp.nan
        dlogit += nan_dlogit

        return gy * dlogit, None
Example #20
0
    def backward(self, indexes, grad_outputs):
        gy, = grad_outputs
        logit, x = self.get_retained_inputs()
        xp = backend.get_array_module(x)
        dlogit = x - 1. / (1. + exponential.exp(-logit))

        # extreme logit
        nan_dlogit = xp.zeros_like(dlogit.array)
        if self.binary_check:
            nan_dlogit[self.invalid] = xp.nan
        nan_dlogit[self.logit_ispinf] = xp.nan
        nan_dlogit[self.logit_isminf] = xp.nan
        dlogit += nan_dlogit

        return gy * dlogit, None
Example #21
0
    def backward(self, indexes, grad_outputs):
        gy, = grad_outputs
        logit, x = self.get_retained_inputs()
        xp = backend.get_array_module(x)
        dlogit = x - 1. / (1. + exponential.exp(-logit))

        # extreme logit
        nan_dlogit = xp.zeros_like(dlogit.array)
        if self.binary_check:
            nan_dlogit[self.invalid] = xp.nan
        nan_dlogit[self.logit_ispinf] = xp.nan
        nan_dlogit[self.logit_isminf] = xp.nan
        dlogit += nan_dlogit

        return gy * dlogit, None
Example #22
0
    def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            v = tanh(self.av(array.concat.concat((a, state['h2']), axis=1)))
            w = self.vw(v)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)
        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e),
                                        (batch_size, self.hidden_size))
        return context, e_list, sum_e
Example #23
0
    def __init__(self, p=None, **kwargs):
        logit = None
        if kwargs:
            logit, = argument.parse_kwargs(kwargs, ('logit', logit))
        if not (p is None) ^ (logit is None):
            raise ValueError(
                "Either `p` or `logit` (not both) must have a value.")

        with chainer.using_config('enable_backprop', True):
            if p is None:
                logit = chainer.as_variable(logit)
                self.__log_p = log_softmax.log_softmax(logit, axis=-1)
                self.__p = exponential.exp(self.__log_p)
            else:
                self.__p = chainer.as_variable(p)
                self.__log_p = exponential.log(self.__p)
Example #24
0
    def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            w = self.aw(a, state['h2'])
            w.data = xp.clip(w.data, -20, 20)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)
        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e),
                                        (batch_size, self.hidden_size))
        return context, e_list, sum_e
Example #25
0
    def __init__(self, p=None, **kwargs):
        logit = None
        if kwargs:
            logit, = argument.parse_kwargs(
                kwargs, ('logit', logit))
        if not (p is None) ^ (logit is None):
            raise ValueError(
                "Either `p` or `logit` (not both) must have a value.")

        with chainer.using_config('enable_backprop', True):
            if p is None:
                logit = chainer.as_variable(logit)
                self.__log_p = log_softmax.log_softmax(logit, axis=-1)
                self.__p = exponential.exp(self.__log_p)
            else:
                self.__p = chainer.as_variable(p)
                self.__log_p = exponential.log(self.__p)
Example #26
0
    def __init__(self, loc, scale=None, **kwargs):
        super(Normal, self).__init__()
        log_scale = None
        if kwargs:
            log_scale, = argument.parse_kwargs(
                kwargs, ('log_scale', log_scale))
        if not (scale is None) ^ (log_scale is None):
            raise ValueError(
                "Either `scale` or `log_scale` (not both) must have a value.")
        self.loc = chainer.as_variable(loc)

        with chainer.using_config('enable_backprop', True):
            if scale is None:
                self.__log_scale = chainer.as_variable(log_scale)
                self.__scale = exponential.exp(self.log_scale)
            else:
                self.__scale = chainer.as_variable(scale)
                self.__log_scale = exponential.log(self.scale)
Example #27
0
    def __init__(self, loc, scale=None, **kwargs):
        super(Normal, self).__init__()
        log_scale = None
        if kwargs:
            log_scale, = argument.parse_kwargs(
                kwargs, ('log_scale', log_scale))
        if not (scale is None) ^ (log_scale is None):
            raise ValueError(
                "Either `scale` or `log_scale` (not both) must have a value.")
        self.loc = chainer.as_variable(loc)

        with chainer.using_config('enable_backprop', True):
            if scale is None:
                self.__log_scale = chainer.as_variable(log_scale)
                self.__scale = exponential.exp(self.log_scale)
            else:
                self.__scale = chainer.as_variable(scale)
                self.__log_scale = exponential.log(self.scale)
Example #28
0
def softmin_chainer(a, b, c, beta=1.0):
    xp = cuda.get_array_module(a)
    s = Variable(xp.zeros(1))[0]
    if a.data < b.data:
        if a.data < c.data:
            s += exp(-(b - a) * beta)
            s += exp(-(c - a) * beta)
            return -1 / beta * log1p(s) + a
        else:
            s += exp(-(a - c) * beta)
            s += exp(-(b - c) * beta)
            return -1 / beta * log1p(s) + c
    else:
        if b.data < c.data:
            s += exp(-(a - b) * beta)
            s += exp(-(c - b) * beta)
            return -1 / beta * log1p(s) + b
        else:
            s += exp(-(a - c) * beta)
            s += exp(-(b - c) * beta)
            return -1 / beta * log1p(s) + c
Example #29
0
def gaussian_nll(x, mean, ln_var):
    """Computes the negative log-likelihood of a Gaussian distribution.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function returns the negative
    log-likelihood of :math:`x` on a Gaussian distribution :math:`N(\\mu, S)`,

    .. math::

        -\\log N(x; \\mu, \\sigma^2) =
        \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) +
        \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu),

    where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal
    matrix where :math:`S_{ii} = \\sigma_i^2`.

    Args:
        x (~chainer.Variable): Input variable.
        mean (~chainer.Variable): A variable representing mean of a Gaussian
            distribution, :math:`\\mu`.
        ln_var (~chainer.Variable): A variable representing logarithm of
            variance of a Gaussian distribution, :math:`\\log(\\sigma^2)`.

    Returns:
        ~chainer.Variable: A variable representing the negative log-likelihood.

    """
    assert isinstance(x, variable.Variable)
    assert isinstance(mean, variable.Variable)
    assert isinstance(ln_var, variable.Variable)

    D = x.size
    x_prec = exponential.exp(-ln_var)
    x_diff = x - mean
    x_power = (x_diff * x_diff) * x_prec * -0.5
    return (sum.sum(ln_var) + D * math.log(2 * math.pi)) / 2 - sum.sum(x_power)
Example #30
0
def gaussian_nll(x, mean, ln_var):
    """Computes the negative log-likelihood of a Gaussian distribution.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function returns the negative
    log-likelihood of :math:`x` on a Gaussian distribution :math:`N(\\mu, S)`,

    .. math::

        -\\log N(x; \\mu, \\sigma^2) =
        \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) +
        \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu),

    where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal
    matrix where :math:`S_{ii} = \\sigma_i^2`.

    Args:
        x (~chainer.Variable): Input variable.
        mean (~chainer.Variable): A variable representing mean of a Gaussian
            distribution, :math:`\\mu`.
        ln_var (~chainer.Variable): A variable representing logarithm of
            variance of a Gaussian distribution, :math:`\\log(\\sigma^2)`.

    Returns:
        ~chainer.Variable: A variable representing the negative log-likelihood.

    """
    assert isinstance(x, variable.Variable)
    assert isinstance(mean, variable.Variable)
    assert isinstance(ln_var, variable.Variable)

    D = x.size
    x_prec = exponential.exp(-ln_var)
    x_diff = x - mean
    x_power = (x_diff * x_diff) * x_prec * -0.5
    return (sum.sum(ln_var) + D * math.log(2 * math.pi)) / 2 - sum.sum(x_power)
Example #31
0
 def prob(self, x):
     scale = self.scale
     return 0.5 / scale * exponential.exp(- abs(x - self.loc) / scale)
Example #32
0
 def log_prob(self, x):
     y = (x - self.loc) / self.scale
     return - self._log_scale - y - exponential.exp(-y)
Example #33
0
 def variance(self):
     return (exponential.exp(2 * self.mu + self.sigma**2) *
             (exponential.exp(self.sigma**2) - 1))
Example #34
0
 def p(self):
     if self.__p is not None:
         return chainer.as_variable(self.__p)
     else:
         return exponential.exp(self.log_p)
Example #35
0
 def mean(self):
     return exponential.exp(self.mu + 0.5 * self.sigma**2)
Example #36
0
def _kl_laplace_laplace(dist1, dist2):
    diff = abs(dist1.loc - dist2.loc)
    return exponential.log(dist2.scale) - exponential.log(dist1.scale) \
        + diff / dist2.scale \
        + dist1.scale / dist2.scale * exponential.exp(- diff / dist1.scale) - 1
Example #37
0
 def prob(self, x):
     return PROBC / broadcast.broadcast_to(self.scale, x.shape) * \
         exponential.exp(
             - 0.5 * (x - broadcast.broadcast_to(self.loc, x.shape)) ** 2
             / broadcast.broadcast_to(self.scale, x.shape) ** 2)
Example #38
0
def _kl_bernoulli_bernoulli(dist1, dist2):
    return (dist1.logit - dist2.logit) * (dist1.p - 1.) \
        - exponential.log(exponential.exp(-dist1.logit) + 1) \
        + exponential.log(exponential.exp(-dist2.logit) + 1)
Example #39
0
 def log_prob(self, x):
     y = (x - self.loc) / self.scale
     return -exponential.log(self.scale) - y - exponential.exp(-y)
Example #40
0
 def scale(self):
     if self.__scale is not None:
         return chainer.as_variable(self.__scale)
     else:
         return exponential.exp(self.log_scale)
Example #41
0
def _kl_bernoulli_bernoulli(dist1, dist2):
    return (dist1.logit - dist2.logit) * (dist1.p - 1.) \
        - exponential.log(exponential.exp(-dist1.logit) + 1) \
        + exponential.log(exponential.exp(-dist2.logit) + 1)
Example #42
0
 def prob(self, x):
     return PROBC / broadcast.broadcast_to(self.scale, x.shape) * \
         exponential.exp(
             - 0.5 * (x - broadcast.broadcast_to(self.loc, x.shape)) ** 2
             / broadcast.broadcast_to(self.scale, x.shape) ** 2)
Example #43
0
 def variance(self):
     return exponential.exp(2 * self.mu + self.sigma ** 2) \
         * (exponential.exp(self.sigma ** 2) - 1)
Example #44
0
 def backward(self, indexes, gy):
     x = self.get_retained_inputs()[0]
     return (2 * numpy.pi) ** -0.5 * exponential.exp(-0.5 * x ** 2) * gy[0],
Example #45
0
 def mean(self):
     return exponential.exp(self.mu + 0.5 * self.sigma ** 2)
Example #46
0
 def prob(self, x):
     return (PROBC / self.scale) * exponential.exp(
         - 0.5 * (x - self.loc) ** 2 / self.scale ** 2)
Example #47
0
 def backward(self, indexes, gy):
     x = self.get_retained_inputs()[0]
     return (2 * numpy.pi)**-0.5 * exponential.exp(-0.5 * x**2) * gy[0],
Example #48
0
def black_out(x, t, W, samples, reduce='mean'):
    """BlackOut loss function.

    BlackOut loss function is defined as

    .. math::

      -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)),

    where :math:`t` is the correct label, :math:`S` is a set of negative
    examples and :math:`p(\\cdot)` is likelihood of a given label.
    And, :math:`p` is defined as

    .. math::

       p(y) = \\frac{\\exp(W_y^\\top x)}{
       \\sum_{s \\in samples} \\exp(W_s^\\top x)}.

    The output is a variable whose value depends on the value of
    the option ``reduce``. If it is ``'no'``, it holds the
    no loss values. If it is ``'mean'``, this function takes
    a mean of loss values.

    Args:
        x (~chainer.Variable): Batch of input vectors.
            Its shape should be :math:`(N, D)`.
        t (~chainer.Variable): Vector of ground truth labels.
            Its shape should be :math:`(N,)`. Each elements :math:`v`
            should satisfy :math:`0 \\geq v \\geq V` or :math:`-1`
            where :math:`V` is the number of label types.
        W (~chainer.Variable): Weight matrix.
            Its shape should be :math:`(V, D)`
        samples (~chainer.Variable): Negative samples.
            Its shape should be :math:`(N, S)` where :math:`S` is
            the number of negative samples.
        reduce (str): Reduction option. Its value must be either
            ``'no'`` or ``'mean'``. Otherwise,
            :class:`ValueError` is raised.

    Returns:
        ~chainer.Variable:
            A variable object holding loss value(s).
            If ``reduce`` is ``'no'``, the output variable holds an
            array whose shape is :math:`(N,)` .
            If it is ``'mean'``, it holds a scalar.

    See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \
         Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_

    .. seealso:: :class:`~chainer.links.BlackOut`.

    """

    batch_size = x.shape[0]

    neg_emb = embed_id.embed_id(samples, W)
    neg_y = matmul.matmul(neg_emb, x[:, :, None])
    neg_y = reshape.reshape(neg_y, neg_y.shape[:-1])

    pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1)
    pos_y = matmul.matmul(pos_emb, x[:, :, None])
    pos_y = reshape.reshape(pos_y, pos_y.shape[:-1])

    logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1)
    blogz, bneg_y = broadcast.broadcast(
        reshape.reshape(logz, (batch_size, 1)), neg_y)
    ny = exponential.log(1 - exponential.exp(bneg_y - blogz))
    py = reshape.reshape(pos_y, (batch_size,))
    loss = -(py - logz + _sum.sum(ny, axis=1))
    if reduce == 'mean':
        loss = average.average(loss)
    return loss
Example #49
0
 def prob(self, x):
     bl = broadcast.broadcast_to(self.loc, x.shape)
     bs = broadcast.broadcast_to(self.scale, x.shape)
     return 0.5 / bs * exponential.exp(-abs(x - bl) / bs)
Example #50
0
 def prob(self, x):
     scale = self.scale
     return 0.5 / scale * exponential.exp(- abs(x - self.loc) / scale)
 def p(self):
     if self.__p is not None:
         return chainer.as_variable(self.__p)
     else:
         return exponential.exp(self.log_p)
Example #52
0
def _kl_gumbel_gumbel(dist1, dist2):
    scale_1d2 = dist1.scale / dist2.scale
    return (dist2._log_scale - dist1._log_scale + EULER * (scale_1d2 - 1.) +
            exponential.exp((dist2.loc - dist1.loc) / dist2.scale +
                            lgamma.lgamma(scale_1d2 + 1.)) - 1 +
            (dist1.loc - dist2.loc) / dist2.scale)