def gaussian_kl_divergence(self, mu1, ln_var1, mu2, ln_var2): # D_KL [ N(z ; mu1, var1) || N(z; mu2, var2) ] var1 = exponential.exp(ln_var1) inv_var2 = exponential.exp(-ln_var2) mu_diff = mu2 - mu1 term1 = (var1 + mu_diff * mu_diff) * inv_var2 loss = (term1 - ln_var1 + ln_var2 - 1.) * 0.5 return sum.sum(loss)
def gaussian_kl_divergence(mean, ln_var): """Computes the KL-divergence of Gaussian variables from the standard one. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function returns a variable representing the KL-divergence between the given multi-dimensional Gaussian :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)` .. math:: D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)), where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2` and :math:`I` is an identity matrix. Args: mean (~chainer.Variable): A variable representing mean of given gaussian distribution, :math:`\\mu`. ln_var (~chainer.Variable): A variable representing logarithm of variance of given gaussian distribution, :math:`\\log(\\sigma^2)`. Returns: ~chainer.Variable: A variable representing KL-divergence between given gaussian distribution and the standard gaussian. """ assert isinstance(mean, variable.Variable) assert isinstance(ln_var, variable.Variable) J = mean.size var = exponential.exp(ln_var) return (sum.sum(mean * mean) + sum.sum(var) - sum.sum(ln_var) - J) * 0.5
def _kl_gumbel_gumbel(dist1, dist2): scale_1d2 = dist1.scale / dist2.scale return dist2._log_scale - dist1._log_scale \ + EULER * (scale_1d2 - 1.) \ + exponential.exp((dist2.loc - dist1.loc) / dist2.scale + lgamma.lgamma(scale_1d2 + 1.)) \ - 1 + (dist1.loc - dist2.loc) / dist2.scale
def _kl_gumbel_gumbel(dist1, dist2): scale_1d2 = dist1.scale / dist2.scale return exponential.log(dist2.scale) - exponential.log(dist1.scale) \ + EULER * (scale_1d2 - 1.) \ + exponential.exp((dist2.loc - dist1.loc) / dist2.scale + lgamma.lgamma(scale_1d2 + 1.)) \ - 1 + (dist1.loc - dist2.loc) / dist2.scale
def gaussian_nll(x, mean, ln_var, reduce='sum'): """Computes the negative log-likelihood of a Gaussian distribution. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function computes in elementwise manner the negative log-likelihood of :math:`x` on a Gaussian distribution :math:`N(\\mu, S)`, .. math:: -\\log N(x; \\mu, \\sigma^2) = \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) + \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu), where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal matrix where :math:`S_{ii} = \\sigma_i^2`. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable. mean (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): A variable representing mean of a Gaussian distribution, :math:`\\mu`. ln_var (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): A variable representing logarithm of variance of a Gaussian distribution, :math:`\\log(\\sigma^2)`. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing the negative log-likelihood. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( "only 'sum', 'mean' and 'no' are valid for 'reduce', but '%s'" ' is given' % reduce) x_prec = exponential.exp(-ln_var) x_diff = x - mean x_power = (x_diff * x_diff) * x_prec * -0.5 loss = (ln_var + math.log(2 * math.pi)) / 2 - x_power if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def _kl_laplace_laplace(dist1, dist2): diff = abs(dist1.loc - dist2.loc) return ( exponential.log(dist2.scale) - exponential.log(dist1.scale) + diff / dist2.scale + dist1.scale / dist2.scale * exponential.exp(-diff / dist1.scale) - 1)
def gaussian_nll(x, mean, ln_var, reduce='sum'): """Computes the negative log-likelihood of a Gaussian distribution. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function computes in elementwise manner the negative log-likelihood of :math:`x` on a Gaussian distribution :math:`N(\\mu, S)`, .. math:: -\\log N(x; \\mu, \\sigma^2) = \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) + \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu), where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal matrix where :math:`S_{ii} = \\sigma_i^2`. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable. mean (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing mean of a Gaussian distribution, :math:`\\mu`. ln_var (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing logarithm of variance of a Gaussian distribution, :math:`\\log(\\sigma^2)`. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing the negative log-likelihood. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( 'only \'sum\', \'mean\' and \'no\' are valid for \'reduce\', but ' '\'%s\' is given' % reduce) x_prec = exponential.exp(-ln_var) x_diff = x - mean x_power = (x_diff * x_diff) * x_prec * -0.5 loss = (ln_var + math.log(2 * math.pi)) / 2 - x_power if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def gaussian_nll(x, mean, ln_var): assert isinstance(x, variable.Variable) assert isinstance(mean, variable.Variable) assert isinstance(ln_var, variable.Variable) x_prec = exponential.exp(-ln_var) x_diff = x - mean x_power = (x_diff * x_diff) * x_prec * -0.5 return (ln_var + math.log(2 * math.pi)) / 2 - x_power
def _e_chainer(a, b, c, beta=1.0): b = (a - b) * beta c = (a - c) * beta if b.data > 30 or c.data > 30: return 0 if b.data < -20: exp_b = 0 else: exp_b = exp(b) if c.data < -20: exp_c = 0 else: exp_c = exp(c) return 1. / (1 + exp_b + exp_c)
def gaussian_kl_divergence(mean, ln_var, reduce='sum'): """Computes the KL-divergence of Gaussian variables from the standard one. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function calculates the KL-divergence in elementwise manner between the given multi-dimensional Gaussian :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)` .. math:: D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)), where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2` and :math:`I` is an identity matrix. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. Args: mean (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing mean of given gaussian distribution, :math:`\\mu`. ln_var (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing logarithm of variance of given gaussian distribution, :math:`\\log(\\sigma^2)`. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing KL-divergence between given gaussian distribution and the standard gaussian. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( "only 'sum', 'mean' and 'no' are valid for 'reduce', but '%s'" ' is given' % reduce) var = exponential.exp(ln_var) mean_square = mean * mean loss = (mean_square + var - ln_var - 1) * 0.5 if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def gaussian_kl_divergence(mean, ln_var, reduce='sum'): """Computes the KL-divergence of Gaussian variables from the standard one. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function calculates the KL-divergence in elementwise manner between the given multi-dimensional Gaussian :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)` .. math:: D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)), where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2` and :math:`I` is an identity matrix. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. Args: mean (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing mean of given gaussian distribution, :math:`\\mu`. ln_var (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing logarithm of variance of given gaussian distribution, :math:`\\log(\\sigma^2)`. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing KL-divergence between given gaussian distribution and the standard gaussian. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( 'only \'sum\', \'mean\' and \'no\' are valid for \'reduce\', but ' '\'%s\' is given' % reduce) var = exponential.exp(ln_var) mean_square = mean * mean loss = (mean_square + var - ln_var - 1) * 0.5 if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def sample_n(self, n): xp = cuda.get_array_module(self.mu) if xp is cuda.cupy: eps = xp.random.standard_normal( (n,)+self.mu.shape, dtype=self.mu.dtype) else: eps = xp.random.standard_normal( (n,)+self.mu.shape).astype(self.mu.dtype) noise = broadcast.broadcast_to(self.sigma, eps.shape) * eps noise += broadcast.broadcast_to(self.mu, eps.shape) return exponential.exp(noise)
def sample_n(self, n): xp = backend.get_array_module(self.mu) if xp is cuda.cupy: eps = xp.random.standard_normal((n, ) + self.mu.shape, dtype=self.mu.dtype) else: eps = xp.random.standard_normal((n, ) + self.mu.shape).astype( self.mu.dtype) noise = self.sigma * eps noise += self.mu return exponential.exp(noise)
def sample_n(self, n): xp = cuda.get_array_module(self.mu) if xp is cuda.cupy: eps = xp.random.standard_normal( (n,)+self.mu.shape, dtype=self.mu.dtype) else: eps = xp.random.standard_normal( (n,)+self.mu.shape).astype(self.mu.dtype) noise = self.sigma * eps noise += self.mu return exponential.exp(noise)
def black_out(x, t, W, samples): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. Args: x (~chainer.Variable): Batch of input vectors. t (~chainer.Variable): Vector of ground truth labels. W (~chainer.Variable): Weight matrix. samples (~chainer.Variable): Negative samples. Returns: ~chainer.Variable: Loss value. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.batch_matmul(neg_emb, x) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.batch_matmul(pos_emb, x) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast( reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size,)) loss = py - logz + _sum.sum(ny, axis=1) return -_sum.sum(loss) / batch_size
def black_out(x, t, W, samples): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. Args: x (~chainer.Variable): Batch of input vectors. t (~chainer.Variable): Vector of ground truth labels. W (~chainer.Variable): Weight matrix. samples (~chainer.Variable): Negative samples. Returns: ~chainer.Variable: Loss value. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.batch_matmul(neg_emb, x) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.batch_matmul(pos_emb, x) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast(reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size, )) loss = py - logz + _sum.sum(ny, axis=1) return -_sum.sum(loss) / batch_size
def backward(self, indexes, grad_outputs): gy, = grad_outputs logit, x = self.get_retained_inputs() xp = backend.get_array_module(x) dlogit = x - 1. / (1. + exponential.exp(-logit)) # extreme logit nan = xp.array(xp.nan).astype(dlogit.dtype) logit_isinf = xp.bitwise_or(self.logit_ispinf, self.logit_isminf) dlogit = where.where(logit_isinf, nan, dlogit) if self.binary_check: dlogit = where.where(self.invalid, nan, dlogit) return sum.sum_to(gy * dlogit, logit.shape), None
def backward(self, indexes, grad_outputs): gy, = grad_outputs logit, x = self.get_retained_inputs() xp = cuda.get_array_module(x) dlogit = x - 1. / (1. + exponential.exp(-logit)) # extreme logit nan_dlogit = xp.zeros_like(dlogit.array) nan_dlogit[self.invalid] = xp.nan nan_dlogit[self.to_zero] = xp.nan nan_dlogit[self.to_m_inf] = xp.nan dlogit += nan_dlogit return gy * dlogit, None
def backward(self, indexes, grad_outputs): gy, = grad_outputs logit, x = self.get_retained_inputs() xp = backend.get_array_module(x) dlogit = x - 1. / (1. + exponential.exp(-logit)) # extreme logit nan_dlogit = xp.zeros_like(dlogit.array) if self.binary_check: nan_dlogit[self.invalid] = xp.nan nan_dlogit[self.logit_ispinf] = xp.nan nan_dlogit[self.logit_isminf] = xp.nan dlogit += nan_dlogit return gy * dlogit, None
def __call__(self, a_list, state, batch_size, xp): e_list = [] sum_e = xp.zeros((batch_size, 1), dtype=xp.float32) for a in a_list: v = tanh(self.av(array.concat.concat((a, state['h2']), axis=1))) w = self.vw(v) e = exp(w) e_list.append(e) sum_e = sum_e + e context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32) for a, e in zip(a_list, e_list): e /= sum_e context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size)) return context, e_list, sum_e
def __init__(self, p=None, **kwargs): logit = None if kwargs: logit, = argument.parse_kwargs(kwargs, ('logit', logit)) if not (p is None) ^ (logit is None): raise ValueError( "Either `p` or `logit` (not both) must have a value.") with chainer.using_config('enable_backprop', True): if p is None: logit = chainer.as_variable(logit) self.__log_p = log_softmax.log_softmax(logit, axis=-1) self.__p = exponential.exp(self.__log_p) else: self.__p = chainer.as_variable(p) self.__log_p = exponential.log(self.__p)
def __call__(self, a_list, state, batch_size, xp): e_list = [] sum_e = xp.zeros((batch_size, 1), dtype=xp.float32) for a in a_list: w = self.aw(a, state['h2']) w.data = xp.clip(w.data, -20, 20) e = exp(w) e_list.append(e) sum_e = sum_e + e context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32) for a, e in zip(a_list, e_list): e /= sum_e context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size)) return context, e_list, sum_e
def __init__(self, p=None, **kwargs): logit = None if kwargs: logit, = argument.parse_kwargs( kwargs, ('logit', logit)) if not (p is None) ^ (logit is None): raise ValueError( "Either `p` or `logit` (not both) must have a value.") with chainer.using_config('enable_backprop', True): if p is None: logit = chainer.as_variable(logit) self.__log_p = log_softmax.log_softmax(logit, axis=-1) self.__p = exponential.exp(self.__log_p) else: self.__p = chainer.as_variable(p) self.__log_p = exponential.log(self.__p)
def __init__(self, loc, scale=None, **kwargs): super(Normal, self).__init__() log_scale = None if kwargs: log_scale, = argument.parse_kwargs( kwargs, ('log_scale', log_scale)) if not (scale is None) ^ (log_scale is None): raise ValueError( "Either `scale` or `log_scale` (not both) must have a value.") self.loc = chainer.as_variable(loc) with chainer.using_config('enable_backprop', True): if scale is None: self.__log_scale = chainer.as_variable(log_scale) self.__scale = exponential.exp(self.log_scale) else: self.__scale = chainer.as_variable(scale) self.__log_scale = exponential.log(self.scale)
def softmin_chainer(a, b, c, beta=1.0): xp = cuda.get_array_module(a) s = Variable(xp.zeros(1))[0] if a.data < b.data: if a.data < c.data: s += exp(-(b - a) * beta) s += exp(-(c - a) * beta) return -1 / beta * log1p(s) + a else: s += exp(-(a - c) * beta) s += exp(-(b - c) * beta) return -1 / beta * log1p(s) + c else: if b.data < c.data: s += exp(-(a - b) * beta) s += exp(-(c - b) * beta) return -1 / beta * log1p(s) + b else: s += exp(-(a - c) * beta) s += exp(-(b - c) * beta) return -1 / beta * log1p(s) + c
def gaussian_nll(x, mean, ln_var): """Computes the negative log-likelihood of a Gaussian distribution. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function returns the negative log-likelihood of :math:`x` on a Gaussian distribution :math:`N(\\mu, S)`, .. math:: -\\log N(x; \\mu, \\sigma^2) = \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) + \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu), where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal matrix where :math:`S_{ii} = \\sigma_i^2`. Args: x (~chainer.Variable): Input variable. mean (~chainer.Variable): A variable representing mean of a Gaussian distribution, :math:`\\mu`. ln_var (~chainer.Variable): A variable representing logarithm of variance of a Gaussian distribution, :math:`\\log(\\sigma^2)`. Returns: ~chainer.Variable: A variable representing the negative log-likelihood. """ assert isinstance(x, variable.Variable) assert isinstance(mean, variable.Variable) assert isinstance(ln_var, variable.Variable) D = x.size x_prec = exponential.exp(-ln_var) x_diff = x - mean x_power = (x_diff * x_diff) * x_prec * -0.5 return (sum.sum(ln_var) + D * math.log(2 * math.pi)) / 2 - sum.sum(x_power)
def prob(self, x): scale = self.scale return 0.5 / scale * exponential.exp(- abs(x - self.loc) / scale)
def log_prob(self, x): y = (x - self.loc) / self.scale return - self._log_scale - y - exponential.exp(-y)
def variance(self): return (exponential.exp(2 * self.mu + self.sigma**2) * (exponential.exp(self.sigma**2) - 1))
def p(self): if self.__p is not None: return chainer.as_variable(self.__p) else: return exponential.exp(self.log_p)
def mean(self): return exponential.exp(self.mu + 0.5 * self.sigma**2)
def _kl_laplace_laplace(dist1, dist2): diff = abs(dist1.loc - dist2.loc) return exponential.log(dist2.scale) - exponential.log(dist1.scale) \ + diff / dist2.scale \ + dist1.scale / dist2.scale * exponential.exp(- diff / dist1.scale) - 1
def prob(self, x): return PROBC / broadcast.broadcast_to(self.scale, x.shape) * \ exponential.exp( - 0.5 * (x - broadcast.broadcast_to(self.loc, x.shape)) ** 2 / broadcast.broadcast_to(self.scale, x.shape) ** 2)
def _kl_bernoulli_bernoulli(dist1, dist2): return (dist1.logit - dist2.logit) * (dist1.p - 1.) \ - exponential.log(exponential.exp(-dist1.logit) + 1) \ + exponential.log(exponential.exp(-dist2.logit) + 1)
def log_prob(self, x): y = (x - self.loc) / self.scale return -exponential.log(self.scale) - y - exponential.exp(-y)
def scale(self): if self.__scale is not None: return chainer.as_variable(self.__scale) else: return exponential.exp(self.log_scale)
def variance(self): return exponential.exp(2 * self.mu + self.sigma ** 2) \ * (exponential.exp(self.sigma ** 2) - 1)
def backward(self, indexes, gy): x = self.get_retained_inputs()[0] return (2 * numpy.pi) ** -0.5 * exponential.exp(-0.5 * x ** 2) * gy[0],
def mean(self): return exponential.exp(self.mu + 0.5 * self.sigma ** 2)
def prob(self, x): return (PROBC / self.scale) * exponential.exp( - 0.5 * (x - self.loc) ** 2 / self.scale ** 2)
def backward(self, indexes, gy): x = self.get_retained_inputs()[0] return (2 * numpy.pi)**-0.5 * exponential.exp(-0.5 * x**2) * gy[0],
def black_out(x, t, W, samples, reduce='mean'): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the no loss values. If it is ``'mean'``, this function takes a mean of loss values. Args: x (~chainer.Variable): Batch of input vectors. Its shape should be :math:`(N, D)`. t (~chainer.Variable): Vector of ground truth labels. Its shape should be :math:`(N,)`. Each elements :math:`v` should satisfy :math:`0 \\geq v \\geq V` or :math:`-1` where :math:`V` is the number of label types. W (~chainer.Variable): Weight matrix. Its shape should be :math:`(V, D)` samples (~chainer.Variable): Negative samples. Its shape should be :math:`(N, S)` where :math:`S` is the number of negative samples. reduce (str): Reduction option. Its value must be either ``'no'`` or ``'mean'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable object holding loss value(s). If ``reduce`` is ``'no'``, the output variable holds an array whose shape is :math:`(N,)` . If it is ``'mean'``, it holds a scalar. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.matmul(neg_emb, x[:, :, None]) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.matmul(pos_emb, x[:, :, None]) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast( reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size,)) loss = -(py - logz + _sum.sum(ny, axis=1)) if reduce == 'mean': loss = average.average(loss) return loss
def prob(self, x): bl = broadcast.broadcast_to(self.loc, x.shape) bs = broadcast.broadcast_to(self.scale, x.shape) return 0.5 / bs * exponential.exp(-abs(x - bl) / bs)
def _kl_gumbel_gumbel(dist1, dist2): scale_1d2 = dist1.scale / dist2.scale return (dist2._log_scale - dist1._log_scale + EULER * (scale_1d2 - 1.) + exponential.exp((dist2.loc - dist1.loc) / dist2.scale + lgamma.lgamma(scale_1d2 + 1.)) - 1 + (dist1.loc - dist2.loc) / dist2.scale)