def predict(self, images, oversample=True): """Computes all the probabilities of given images. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. oversample (bool): If ``True``, it averages results across center, corners, and mirrors. Otherwise, it uses only the center. Returns: ~chainer.Variable: Output that contains the class probabilities of given images. """ x = concat_examples([prepare(img, size=(256, 256)) for img in images]) if oversample: x = imgproc.oversample(x, crop_dims=(224, 224)) else: x = x[:, :, 16:240, 16:240] # Set volatile option to ON to reduce memory consumption x = Variable(self.xp.asarray(x), volatile=flag.ON) y = self(x, layers=['prob'])['prob'] if oversample: n = y.data.shape[0] // 10 y_shape = y.data.shape[1:] y = reshape(y, (n, 10) + y_shape) y = average(y, axis=1) return y
def predict(self, images, oversample=True): """Computes all the probabilities of given images. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. When you specify a color image as a :class:`numpy.ndarray`, make sure that color order is RGB. oversample (bool): If ``True``, it averages results across center, corners, and mirrors. Otherwise, it uses only the center. Returns: ~chainer.Variable: Output that contains the class probabilities of given images. """ x = concat_examples([prepare(img, size=(256, 256)) for img in images]) if oversample: x = imgproc.oversample(x, crop_dims=(224, 224)) else: x = x[:, :, 16:240, 16:240] # Use no_backprop_mode to reduce memory consumption with function.no_backprop_mode(), chainer.using_config('train', False): x = Variable(self.xp.asarray(x)) y = self(x, layers=['prob'])['prob'] if oversample: n = len(y) // 10 y_shape = y.shape[1:] y = reshape(y, (n, 10) + y_shape) y = average(y, axis=1) return y
def predict(self, images, oversample=True): """Computes all the probabilities of given images. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. oversample (bool): If ``True``, it averages results across center, corners, and mirrors. Otherwise, it uses only the center. Returns: ~chainer.Variable: Output that contains the class probabilities of given images. """ x = concat_examples([prepare(img, size=(256, 256)) for img in images]) if oversample: x = imgproc.oversample(x, crop_dims=(224, 224)) else: x = x[:, :, 16:240, 16:240] # Set volatile option to ON to reduce memory consumption x = Variable(self.xp.asarray(x), volatile=flag.ON) y = self(x, layers=['prob'])['prob'] if oversample: n = y.data.shape[0] // 10 y_shape = y.data.shape[1:] y = reshape(y, (n, 10) + y_shape) y = average(y, axis=1) return y
def predict(self, images, oversample=True): """Computes all the probabilities of given images. Args: images (iterable of PIL.Image or numpy.ndarray): Input images. When you specify a color image as a :class:`numpy.ndarray`, make sure that color order is RGB. oversample (bool): If ``True``, it averages results across center, corners, and mirrors. Otherwise, it uses only the center. Returns: ~chainer.Variable: Output that contains the class probabilities of given images. """ x = concat_examples([prepare(img, size=(256, 256)) for img in images]) if oversample: x = imgproc.oversample(x, crop_dims=(224, 224)) else: x = x[:, :, 16:240, 16:240] # Use no_backprop_mode to reduce memory consumption with function.no_backprop_mode(), chainer.using_config('train', False): x = Variable(self.xp.asarray(x)) y = self(x, layers=['prob'])['prob'] if oversample: n = y.data.shape[0] // 10 y_shape = y.data.shape[1:] y = reshape(y, (n, 10) + y_shape) y = average(y, axis=1) return y
def gaussian_nll(x, mean, ln_var, reduce='sum'): """Computes the negative log-likelihood of a Gaussian distribution. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function computes in elementwise manner the negative log-likelihood of :math:`x` on a Gaussian distribution :math:`N(\\mu, S)`, .. math:: -\\log N(x; \\mu, \\sigma^2) = \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) + \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu), where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal matrix where :math:`S_{ii} = \\sigma_i^2`. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable. mean (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): A variable representing mean of a Gaussian distribution, :math:`\\mu`. ln_var (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): A variable representing logarithm of variance of a Gaussian distribution, :math:`\\log(\\sigma^2)`. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing the negative log-likelihood. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( "only 'sum', 'mean' and 'no' are valid for 'reduce', but '%s'" ' is given' % reduce) x_prec = exponential.exp(-ln_var) x_diff = x - mean x_power = (x_diff * x_diff) * x_prec * -0.5 loss = (ln_var + math.log(2 * math.pi)) / 2 - x_power if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def bernoulli_nll(x, y, reduce='sum'): """Computes the negative log-likelihood of a Bernoulli distribution. This function calculates the negative log-likelihood of a Bernoulli distribution. .. math:: -\\log B(x; p) = -\\sum_i \\{x_i \\log(p_i) + \ (1 - x_i)\\log(1 - p_i)\\}, where :math:`p = \\sigma(y)`, :math:`\\sigma(\\cdot)` is a sigmoid function, and :math:`B(x; p)` is a Bernoulli distribution. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. .. note:: As this function uses a sigmoid function, you can pass a result of fully-connected layer (that means :class:`Linear`) to this function directly. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable. y (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): A variable representing the parameter of Bernoulli distribution. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing the negative log-likelihood. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( "only 'sum', 'mean' and 'no' are valid for 'reduce', but '%s'" ' is given' % reduce) loss = softplus.softplus(y) - x * y if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def gaussian_nll(x, mean, ln_var, reduce='sum'): """Computes the negative log-likelihood of a Gaussian distribution. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function computes in elementwise manner the negative log-likelihood of :math:`x` on a Gaussian distribution :math:`N(\\mu, S)`, .. math:: -\\log N(x; \\mu, \\sigma^2) = \\log\\left(\\sqrt{(2\\pi)^D |S|}\\right) + \\frac{1}{2}(x - \\mu)^\\top S^{-1}(x - \\mu), where :math:`D` is a dimension of :math:`x` and :math:`S` is a diagonal matrix where :math:`S_{ii} = \\sigma_i^2`. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable. mean (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing mean of a Gaussian distribution, :math:`\\mu`. ln_var (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing logarithm of variance of a Gaussian distribution, :math:`\\log(\\sigma^2)`. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing the negative log-likelihood. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( 'only \'sum\', \'mean\' and \'no\' are valid for \'reduce\', but ' '\'%s\' is given' % reduce) x_prec = exponential.exp(-ln_var) x_diff = x - mean x_power = (x_diff * x_diff) * x_prec * -0.5 loss = (ln_var + math.log(2 * math.pi)) / 2 - x_power if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def gaussian_kl_divergence(mean, ln_var, reduce='sum'): """Computes the KL-divergence of Gaussian variables from the standard one. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function calculates the KL-divergence in elementwise manner between the given multi-dimensional Gaussian :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)` .. math:: D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)), where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2` and :math:`I` is an identity matrix. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. Args: mean (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing mean of given gaussian distribution, :math:`\\mu`. ln_var (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing logarithm of variance of given gaussian distribution, :math:`\\log(\\sigma^2)`. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing KL-divergence between given gaussian distribution and the standard gaussian. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( "only 'sum', 'mean' and 'no' are valid for 'reduce', but '%s'" ' is given' % reduce) var = exponential.exp(ln_var) mean_square = mean * mean loss = (mean_square + var - ln_var - 1) * 0.5 if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def gaussian_kl_divergence(mean, ln_var, reduce='sum'): """Computes the KL-divergence of Gaussian variables from the standard one. Given two variable ``mean`` representing :math:`\\mu` and ``ln_var`` representing :math:`\\log(\\sigma^2)`, this function calculates the KL-divergence in elementwise manner between the given multi-dimensional Gaussian :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)` .. math:: D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)), where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2` and :math:`I` is an identity matrix. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. Args: mean (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing mean of given gaussian distribution, :math:`\\mu`. ln_var (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing logarithm of variance of given gaussian distribution, :math:`\\log(\\sigma^2)`. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing KL-divergence between given gaussian distribution and the standard gaussian. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( 'only \'sum\', \'mean\' and \'no\' are valid for \'reduce\', but ' '\'%s\' is given' % reduce) var = exponential.exp(ln_var) mean_square = mean * mean loss = (mean_square + var - ln_var - 1) * 0.5 if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def bernoulli_nll(x, y, reduce='sum'): """Computes the negative log-likelihood of a Bernoulli distribution. This function calculates the negative log-likelihood of a Bernoulli distribution. .. math:: -\\log B(x; p) = -\\sum_i \\{x_i \\log(p_i) + \ (1 - x_i)\\log(1 - p_i)\\}, where :math:`p = \\sigma(y)`, :math:`\\sigma(\\cdot)` is a sigmoid function, and :math:`B(x; p)` is a Bernoulli distribution. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the elementwise loss values. If it is ``'sum'`` or ``'mean'``, loss values are summed up or averaged respectively. .. note:: As this function uses a sigmoid function, you can pass a result of fully-connected layer (that means :class:`Linear`) to this function directly. Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable. y (:class:`~chainer.Variable` or :ref:`ndarray`): A variable representing the parameter of Bernoulli distribution. reduce (str): Reduction option. Its value must be either ``'sum'``, ``'mean'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable representing the negative log-likelihood. If ``reduce`` is ``'no'``, the output variable holds array whose shape is same as one of (hence both of) input variables. If it is ``'sum'`` or ``'mean'``, the output variable holds a scalar value. """ if reduce not in ('sum', 'mean', 'no'): raise ValueError( 'only \'sum\', \'mean\' and \'no\' are valid for \'reduce\', but ' '\'%s\' is given' % reduce) loss = softplus.softplus(y) - x * y if reduce == 'sum': return sum.sum(loss) elif reduce == 'mean': return average.average(loss) else: return loss
def black_out(x, t, W, samples, reduce='mean'): """BlackOut loss function. BlackOut loss function is defined as .. math:: -\\log(p(t)) - \\sum_{s \\in S} \\log(1 - p(s)), where :math:`t` is the correct label, :math:`S` is a set of negative examples and :math:`p(\\cdot)` is likelihood of a given label. And, :math:`p` is defined as .. math:: p(y) = \\frac{\\exp(W_y^\\top x)}{ \\sum_{s \\in samples} \\exp(W_s^\\top x)}. The output is a variable whose value depends on the value of the option ``reduce``. If it is ``'no'``, it holds the no loss values. If it is ``'mean'``, this function takes a mean of loss values. Args: x (~chainer.Variable): Batch of input vectors. Its shape should be :math:`(N, D)`. t (~chainer.Variable): Vector of ground truth labels. Its shape should be :math:`(N,)`. Each elements :math:`v` should satisfy :math:`0 \\geq v \\geq V` or :math:`-1` where :math:`V` is the number of label types. W (~chainer.Variable): Weight matrix. Its shape should be :math:`(V, D)` samples (~chainer.Variable): Negative samples. Its shape should be :math:`(N, S)` where :math:`S` is the number of negative samples. reduce (str): Reduction option. Its value must be either ``'no'`` or ``'mean'``. Otherwise, :class:`ValueError` is raised. Returns: ~chainer.Variable: A variable object holding loss value(s). If ``reduce`` is ``'no'``, the output variable holds an array whose shape is :math:`(N,)` . If it is ``'mean'``, it holds a scalar. See: `BlackOut: Speeding up Recurrent Neural Network Language Models With \ Very Large Vocabularies <https://arxiv.org/abs/1511.06909>`_ .. seealso:: :class:`~chainer.links.BlackOut`. """ batch_size = x.shape[0] neg_emb = embed_id.embed_id(samples, W) neg_y = matmul.matmul(neg_emb, x[:, :, None]) neg_y = reshape.reshape(neg_y, neg_y.shape[:-1]) pos_emb = expand_dims.expand_dims(embed_id.embed_id(t, W), 1) pos_y = matmul.matmul(pos_emb, x[:, :, None]) pos_y = reshape.reshape(pos_y, pos_y.shape[:-1]) logz = logsumexp.logsumexp(concat.concat([pos_y, neg_y]), axis=1) blogz, bneg_y = broadcast.broadcast( reshape.reshape(logz, (batch_size, 1)), neg_y) ny = exponential.log(1 - exponential.exp(bneg_y - blogz)) py = reshape.reshape(pos_y, (batch_size,)) loss = -(py - logz + _sum.sum(ny, axis=1)) if reduce == 'mean': loss = average.average(loss) return loss