def sample_n(self, n): if self._is_gpu: eps = cuda.cupy.random.standard_normal( (n,)+self.loc.shape, dtype=self.loc.dtype) else: eps = numpy.random.standard_normal( (n,)+self.loc.shape).astype(numpy.float32) noise = repeat.repeat( expand_dims.expand_dims(self.scale, axis=0), n, axis=0) * eps noise += repeat.repeat(expand_dims.expand_dims( self.loc, axis=0), n, axis=0) return noise
def sample_n(self, n): if self._is_gpu: eps = cuda.cupy.random.standard_normal( (n,)+self.loc.shape, dtype=self.loc.dtype) else: eps = numpy.random.standard_normal( (n,)+self.loc.shape).astype(numpy.float32) noise = repeat.repeat( expand_dims.expand_dims(self.scale, axis=0), n, axis=0) * eps noise += repeat.repeat(expand_dims.expand_dims( self.loc, axis=0), n, axis=0) return noise
def sample_n(self, n): if self._is_gpu: eps = cuda.cupy.random.standard_normal( (n,)+self.loc.shape+(1,), dtype=self.loc.dtype) else: eps = numpy.random.standard_normal( (n,)+self.loc.shape+(1,)).astype(numpy.float32) noise = matmul.matmul(repeat.repeat( expand_dims.expand_dims(self.scale_tril, axis=0), n, axis=0), eps) noise = squeeze.squeeze(noise, axis=-1) noise += repeat.repeat(expand_dims.expand_dims( self.loc, axis=0), n, axis=0) return noise
def sample_n(self, n): if self._is_gpu: eps = cuda.cupy.random.standard_normal( (n,)+self.loc.shape+(1,), dtype=self.loc.dtype) else: eps = numpy.random.standard_normal( (n,)+self.loc.shape+(1,)).astype(numpy.float32) noise = matmul.matmul(repeat.repeat( expand_dims.expand_dims(self.scale_tril, axis=0), n, axis=0), eps) noise = squeeze.squeeze(noise, axis=-1) noise += repeat.repeat(expand_dims.expand_dims( self.loc, axis=0), n, axis=0) return noise
def variance(self): alpha0 = expand_dims.expand_dims(self.alpha0, axis=-1) return ( self.alpha * (alpha0 - self.alpha) / alpha0 ** 2 / (alpha0 + 1))
def _kl_dirichlet_dirichlet(dist1, dist2): return ( - _lbeta(dist1.alpha) + _lbeta(dist2.alpha) + sum_mod.sum( (dist1.alpha - dist2.alpha) * (digamma.digamma(dist1.alpha) - expand_dims.expand_dims( digamma.digamma(dist1.alpha0), axis=-1)), axis=-1))
def stack(xs, axis=0): """Concatenate variables along a new axis. Args: xs (list of chainer.Variable): Variables to be concatenated. axis (int): Axis of result along which variables are stacked. Returns: ~chainer.Variable: Output variable. """ xs = [expand_dims.expand_dims(x, axis=axis) for x in xs] return concat.concat(xs, axis=axis)
def black_out(x, t, W, samples): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. Args: x (~chainer.Variable): Batch of input vectors. t (~chainer.Variable): Vector of ground truth labels. W (~chainer.Variable): Weight matrix. samples (~chainer.Variable): Negative samples. Returns: ~chainer.Variable: Loss value. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.batch_matmul(neg_emb, x) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.batch_matmul(pos_emb, x) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast( reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size,)) loss = py - logz + _sum.sum(ny, axis=1) return -_sum.sum(loss) / batch_size
def black_out(x, t, W, samples): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. Args: x (~chainer.Variable): Batch of input vectors. t (~chainer.Variable): Vector of ground truth labels. W (~chainer.Variable): Weight matrix. samples (~chainer.Variable): Negative samples. Returns: ~chainer.Variable: Loss value. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.batch_matmul(neg_emb, x) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.batch_matmul(pos_emb, x) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast(reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size, )) loss = py - logz + _sum.sum(ny, axis=1) return -_sum.sum(loss) / batch_size
def log_prob(self, x): scale_tril_inv = \ _batch_triangular_inv(self.scale_tril.reshape(-1, self.d, self.d)) scale_tril_inv = scale_tril_inv.reshape(self.batch_shape + (self.d, self.d)) bsti = broadcast.broadcast_to(scale_tril_inv, x.shape + (self.d, )) bl = broadcast.broadcast_to(self.loc, x.shape) m = matmul.matmul(bsti, expand_dims.expand_dims(x - bl, axis=-1)) m = matmul.matmul(swapaxes.swapaxes(m, -1, -2), m) m = squeeze.squeeze(m, axis=-1) m = squeeze.squeeze(m, axis=-1) logz = LOGPROBC * self.d - self._logdet(self.scale_tril) return broadcast.broadcast_to(logz, m.shape) - 0.5 * m
def log_prob(self, x): scale_tril_inv = \ _batch_triangular_inv(self.scale_tril.reshape(-1, self.d, self.d)) scale_tril_inv = scale_tril_inv.reshape( self.batch_shape+(self.d, self.d)) bsti = broadcast.broadcast_to(scale_tril_inv, x.shape + (self.d,)) bl = broadcast.broadcast_to(self.loc, x.shape) m = matmul.matmul( bsti, expand_dims.expand_dims(x - bl, axis=-1)) m = matmul.matmul(swapaxes.swapaxes(m, -1, -2), m) m = squeeze.squeeze(m, axis=-1) m = squeeze.squeeze(m, axis=-1) logz = LOGPROBC * self.d - self._logdet(self.scale_tril) return broadcast.broadcast_to(logz, m.shape) - 0.5 * m
def __init__(self, p=None, **kwargs): logit = None if kwargs: logit, = argument.parse_kwargs(kwargs, ('logit', logit)) if not (p is None) ^ (logit is None): raise ValueError( "Either `p` or `logit` (not both) must have a value.") with chainer.using_config('enable_backprop', True): if p is None: logit = chainer.as_variable(logit) self.__log_p = logit - expand_dims.expand_dims( logsumexp.logsumexp(logit, axis=-1), axis=-1) self.__p = exponential.exp(self.__log_p) else: self.__p = chainer.as_variable(p) self.__log_p = exponential.log(self.__p)
def __init__(self, p=None, **kwargs): logit = None if kwargs: logit, = argument.parse_kwargs( kwargs, ('logit', logit)) if not (p is None) ^ (logit is None): raise ValueError( "Either `p` or `logit` (not both) must have a value.") with chainer.using_config('enable_backprop', True): if p is None: logit = chainer.as_variable(logit) self.__log_p = logit - expand_dims.expand_dims( logsumexp.logsumexp(logit, axis=-1), axis=-1) self.__p = exponential.exp(self.__log_p) else: self.__p = chainer.as_variable(p) self.__log_p = exponential.log(self.__p)
def _kl_dirichlet_dirichlet(dist1, dist2): return - _lbeta(dist1.alpha) + _lbeta(dist2.alpha) \ + sum_mod.sum((dist1.alpha - dist2.alpha) * ( digamma.digamma(dist1.alpha) - expand_dims.expand_dims(digamma.digamma( dist1.alpha0), axis=-1)), axis=-1)
def variance(self): alpha0 = expand_dims.expand_dims(self.alpha0, axis=-1) return self.alpha * (alpha0 - self.alpha) \ / alpha0 ** 2 / (alpha0 + 1)
def mean(self): alpha0 = expand_dims.expand_dims(self.alpha0, axis=-1) return self.alpha / alpha0
def stack(xs, axis=0): """Concatenate variables along a new axis. Args: xs (list of :class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variables to be concatenated. The variables must have the same shape. axis (int): The axis along which the arrays will be stacked. The ``axis`` parameter is acceptable when :math:`-ndim - 1 \\leq axis \\leq ndim`. (``ndim`` is the dimension of input variables). When :math:`axis < 0`, the result is the same with :math:`ndim + 1 - |axis|`. Returns: ~chainer.Variable: Output variable. Let ``x_1, x_2, ..., x_n`` and ``y`` be the input variables and the output variable, ``y[:, ..., 0, ..., :]`` is ``x_1``, ``y[:, ..., 1, ..., :]`` is ``x_2`` and ``y[:, ..., n-1, ..., :]`` is ``x_n`` (The indexed axis indicates the ``axis``). .. admonition:: Example >>> x1 = np.arange(0, 12).reshape(3, 4) >>> x1.shape (3, 4) >>> x1 array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]]) >>> x2 = np.arange(12, 24).reshape(3, 4) >>> x2.shape (3, 4) >>> x2 array([[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]) >>> y = F.stack([x1, x2], axis=0) >>> y.shape (2, 3, 4) >>> y.data array([[[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], <BLANKLINE> [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]) >>> y = F.stack([x1, x2], axis=1) >>> y.shape (3, 2, 4) >>> y.data array([[[ 0, 1, 2, 3], [12, 13, 14, 15]], <BLANKLINE> [[ 4, 5, 6, 7], [16, 17, 18, 19]], <BLANKLINE> [[ 8, 9, 10, 11], [20, 21, 22, 23]]]) >>> y = F.stack([x1, x2], axis=2) >>> y.shape (3, 4, 2) >>> y.data array([[[ 0, 12], [ 1, 13], [ 2, 14], [ 3, 15]], <BLANKLINE> [[ 4, 16], [ 5, 17], [ 6, 18], [ 7, 19]], <BLANKLINE> [[ 8, 20], [ 9, 21], [10, 22], [11, 23]]]) >>> y = F.stack([x1, x2], axis=-1) >>> y.shape (3, 4, 2) """ xs = [expand_dims.expand_dims(x, axis=axis) for x in xs] return concat.concat(xs, axis=axis)
def black_out(x, t, W, samples, reduce='mean'): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the no loss values. If it is ``'mean'``, this function takes a mean of loss values. Args: x (~chainer.Variable): Batch of input vectors. Its shape should be :math:`(N, D)`. t (~chainer.Variable): Vector of ground truth labels. Its shape should be :math:`(N,)`. Each elements :math:`v` should satisfy :math:`0 \\geq v \\geq V` or :math:`-1` where :math:`V` is the number of label types. W (~chainer.Variable): Weight matrix. Its shape should be :math:`(V, D)` samples (~chainer.Variable): Negative samples. Its shape should be :math:`(N, S)` where :math:`S` is the number of negative samples. reduce (str): Reduction option. Its value must be either ``'no'`` or ``'mean'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable object holding loss value(s). If ``reduce`` is ``'no'``, the output variable holds an array whose shape is :math:`(N,)` . If it is ``'mean'``, it holds a scalar. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.matmul(neg_emb, x[:, :, None]) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.matmul(pos_emb, x[:, :, None]) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast( reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size,)) loss = -(py - logz + _sum.sum(ny, axis=1)) if reduce == 'mean': loss = average.average(loss) return loss
def mean(self): alpha0 = expand_dims.expand_dims(self.alpha0, axis=-1) return self.alpha / alpha0