def backward(self, indexes, grad_outputs): gy, = grad_outputs logit, x = self.get_retained_inputs() xp = backend.get_array_module(x) dlogit = x - 1. / (1. + exponential.exp(-logit)) # extreme logit nan = xp.array(xp.nan).astype(dlogit.dtype) logit_isinf = xp.bitwise_or(self.logit_ispinf, self.logit_isminf) dlogit = where.where(logit_isinf, nan, dlogit) if self.binary_check: dlogit = where.where(self.invalid, nan, dlogit) return sum.sum_to(gy * dlogit, logit.shape), None
def mean(self): mean = (self.alpha * self.scale / (self.alpha - 1)) xp = mean.xp return where.where( self.alpha.data > 1, mean, xp.array(xp.inf, mean.dtype))
def _kl_pareto_pareto(dist1, dist2): kl = dist2.alpha * (dist1._log_scale - dist2._log_scale) \ + dist1._log_alpha - dist2._log_alpha \ + (dist2.alpha - dist1.alpha) / dist1.alpha xp = kl.xp return where.where(dist1.scale.data >= dist2.scale.data, kl, xp.array(xp.inf, kl.dtype))
def log_prob(self, x): x = chainer.as_variable(x) logp = ((self.a - 1) * exponential.log(x) + (self.b - 1) * exponential.log(1 - x) - _lbeta(self.a, self.b)) xp = logp.xp return where.where(utils.force_array((x.array >= 0) & (x.array <= 1)), logp, xp.array(-xp.inf, logp.dtype))
def log_prob(self, x): logp = self._log_lam - self.lam * x xp = logp.xp if isinstance(x, chainer.Variable): x = x.array inf = xp.full_like(logp.array, xp.inf) return where.where(xp.asarray(x >= 0), logp, xp.asarray(-inf))
def log_prob(self, x): x = chainer.as_variable(x) logp = (self._log_alpha + self.alpha * self._log_scale - (self.alpha + 1) * exponential.log(x)) xp = logp.xp return where.where(utils.force_array(x.data >= self.scale.data), logp, xp.array(-xp.inf, logp.dtype))
def variance(self): var = self.scale ** 2 * self.alpha / (self.alpha - 1) ** 2 \ / (self.alpha - 2) xp = var.xp return where.where( self.alpha.data > 2, var, xp.array(xp.inf, var.dtype))
def log_prob(self, x): logp = exponential.log(self.lam) - self.lam * x xp = logp.xp if isinstance(x, chainer.Variable): x = x.array inf = xp.full_like(logp.array, xp.inf) return where.where(xp.asarray(x >= 0), logp, xp.asarray(-inf))
def _modified_xlogx(x): x = chainer.as_variable(x) xp = x.xp return ModifiedXLogX( exponential.log( where.where(utils.force_array(x.array > 0), x, xp.ones_like(x.array)))).apply((x, ))[0]
def _kl_pareto_pareto(dist1, dist2): kl = dist2.alpha * (exponential.log(dist1.scale) - exponential.log(dist2.scale)) \ + exponential.log(dist1.alpha) - exponential.log(dist2.alpha) \ + (dist2.alpha - dist1.alpha) / dist1.alpha xp = kl.xp return where.where(dist1.scale.data >= dist2.scale.data, kl, xp.array(xp.inf, kl.dtype))
def log_prob(self, x): logp = - lgamma.lgamma(self.k) - self.k * exponential.log(self.theta) \ + (self.k - 1) * exponential.log(x) - x / self.theta xp = logp.xp inf = xp.full_like(logp.array, xp.inf) if isinstance(x, chainer.Variable): x = x.array return where.where(xp.asarray(x >= 0), logp, xp.asarray(-inf))
def _kl_pareto_pareto(dist1, dist2): kl = dist2.alpha * (dist1._log_scale - dist2._log_scale) \ + dist1._log_alpha - dist2._log_alpha \ + (dist2.alpha - dist1.alpha) / dist1.alpha xp = kl.xp return where.where( dist1.scale.data >= dist2.scale.data, kl, xp.array(xp.inf, kl.dtype))
def _kl_pareto_pareto(dist1, dist2): kl = dist2.alpha * (exponential.log(dist1.scale) - exponential.log(dist2.scale)) \ + exponential.log(dist1.alpha) - exponential.log(dist2.alpha) \ + (dist2.alpha - dist1.alpha) / dist1.alpha xp = kl.xp inf = xp.full_like(kl.array, xp.inf) return where.where(dist1.scale.data >= dist2.scale.data, kl, inf)
def log_prob(self, x): logp = - lgamma.lgamma(self.k) - self.k * exponential.log(self.theta) \ + (self.k - 1) * exponential.log(x) - x / self.theta xp = logp.xp inf = xp.full_like(logp.array, xp.inf) if isinstance(x, chainer.Variable): x = x.array return where.where(xp.asarray(x >= 0), logp, xp.asarray(-inf))
def log_prob(self, x): logp = (self.a - 1) * exponential.log(x) \ + (self.b - 1) * exponential.log(1 - x) \ - _lbeta(self.a, self.b) xp = logp.xp inf = xp.full_like(logp.array, xp.inf) if isinstance(x, chainer.Variable): x = x.array return where.where(xp.logical_and(x >= 0, x <= 1), logp, -inf)
def _kl_pareto_pareto(dist1, dist2): kl = dist2.alpha * (exponential.log(dist1.scale) - exponential.log(dist2.scale)) \ + exponential.log(dist1.alpha) - exponential.log(dist2.alpha) \ + (dist2.alpha - dist1.alpha) / dist1.alpha xp = kl.xp return where.where( dist1.scale.data >= dist2.scale.data, kl, xp.array(xp.inf, kl.dtype))
def log_prob(self, x): x = chainer.as_variable(x) logp = exponential.log(self.alpha) \ + self.alpha * exponential.log(self.scale) \ - (self.alpha + 1) * exponential.log(x) xp = logp.xp inf = xp.full_like(logp.array, xp.inf) return where.where(xp.asarray(x.data >= self.scale.data), logp, xp.asarray(-inf))
def log_prob(self, x): x = chainer.as_variable(x) logp = (self.a - 1) * exponential.log(x) \ + (self.b - 1) * exponential.log(1 - x) \ - _lbeta(self.a, self.b) xp = logp.xp return where.where( utils.force_array((x.array >= 0) & (x.array <= 1)), logp, xp.array(-xp.inf, logp.dtype))
def log_prob(self, x): x = chainer.as_variable(x) logp = self._log_alpha \ + self.alpha * self._log_scale \ - (self.alpha + 1) * exponential.log(x) xp = logp.xp return where.where( utils.force_array(x.data >= self.scale.data), logp, xp.array(-xp.inf, logp.dtype))
def _kl_uniform_uniform(dist1, dist2): xp = backend.get_array_module(dist1.low) is_inf = xp.logical_or(dist1.high.data > dist2.high.data, dist1.low.data < dist2.low.data) kl = - exponential.log(dist1.high - dist1.low) \ + exponential.log(dist2.high - dist2.low) inf = xp.array(xp.inf, dist1.high.dtype) return where.where(is_inf, inf, kl)
def _kl_uniform_uniform(dist1, dist2): xp = cuda.get_array_module(dist1.low) is_inf = xp.logical_or(dist1.high.data > dist2.high.data, dist1.low.data < dist2.low.data) kl = - exponential.log(dist1.high - dist1.low) \ + exponential.log(dist2.high - dist2.low) inf = xp.full_like(dist1.high.data, numpy.inf) return where.where(is_inf, inf, kl)
def _kl_uniform_uniform(dist1, dist2): xp = backend.get_array_module(dist1.low) is_inf = xp.logical_or(dist1.high.data > dist2.high.data, dist1.low.data < dist2.low.data) kl = - exponential.log(dist1.high - dist1.low) \ + exponential.log(dist2.high - dist2.low) inf = xp.array(xp.inf, dist1.high.dtype) return where.where(is_inf, inf, kl)
def variance(self): var = ( self.scale ** 2 * self.alpha / (self.alpha - 1) ** 2 / (self.alpha - 2)) xp = var.xp return where.where( self.alpha.data > 2, var, xp.array(xp.inf, var.dtype))
def log_prob(self, x): if not isinstance(x, chainer.Variable): x = chainer.Variable(x) xp = backend.get_array_module(x) logp = broadcast.broadcast_to(-exponential.log(self.scale), x.shape) return where.where( utils.force_array((x.data >= self.low.data) & (x.data <= self.high.data)), logp, xp.array(-xp.inf, logp.dtype))
def log_prob(self, x): if not isinstance(x, chainer.Variable): x = chainer.Variable(x) xp = backend.get_array_module(x) logp = broadcast.broadcast_to( -exponential.log(self.scale), x.shape) return where.where( utils.force_array( (x.data >= self.low.data) & (x.data <= self.high.data)), logp, xp.array(-xp.inf, logp.dtype))
def log_prob(self, x): if not isinstance(x, chainer.Variable): x = chainer.Variable(x) xp = cuda.get_array_module(x) logp = broadcast.broadcast_to( -exponential.log(self.scale), x.shape) return where.where( utils.force_array( (x.data >= self.low.data) & (x.data < self.high.data)), logp, xp.full_like(logp.array, -numpy.inf))
def backward(self, target_input_indexes, grad_outputs): gy, = grad_outputs xp = backend.get_array_module(gy) invx, = self.get_retained_outputs() mask = xp.tril(xp.ones((len(invx), len(invx)), dtype=bool)) if not self._lower: mask = mask.T # Gradient is - x^-T (dx) x^-T invxT = chainer.functions.transpose(invx) gx = chainer.functions.matmul( chainer.functions.matmul(- invxT, gy), invxT) gx = where.where(mask, gx, xp.zeros_like(gx.array)) return gx,
def backward(self, target_input_indexes, grad_outputs): gy, = grad_outputs xp = cuda.get_array_module(gy) invx, = self.get_retained_outputs() mask = xp.tril(xp.ones((len(invx), len(invx)), dtype=bool)) if not self._lower: mask = mask.T # Gradient is - x^-T (dx) x^-T invxT = chainer.functions.transpose(invx) gx = chainer.functions.matmul( chainer.functions.matmul(- invxT, gy), invxT) gx = where.where(mask, gx, xp.zeros_like(gx.array)) return gx,
def log_prob(self, x): if not isinstance(x, chainer.Variable): x = chainer.Variable(x) bl = broadcast.broadcast_to(self.low, x.shape) bh = broadcast.broadcast_to(self.high, x.shape) xp = cuda.get_array_module(x) logp = -exponential.log(bh - bl) return where.where( xp.asarray((x.data >= bl.data) & (x.data < bh.data)), logp, xp.asarray(-xp.ones_like(x.data) * numpy.inf, dtype=x.dtype))
def _modified_xlogx(x): x = chainer.as_variable(x) xp = x.xp return ModifiedXLogX(exponential.log( where.where(utils.force_array(x.array > 0), x, xp.ones_like(x.array)))).apply((x,))[0]
def mean(self): mean = (self.alpha * self.scale / (self.alpha - 1)) xp = mean.xp inf = xp.full_like(mean.array, xp.inf) return where.where(self.alpha.data > 1, mean, inf)
def variance(self): var = self.scale ** 2 * self.alpha / (self.alpha - 1) ** 2 \ / (self.alpha - 2) xp = var.xp inf = xp.full_like(var.array, xp.inf) return where.where(self.alpha.data > 2, var, inf)
def mean(self): mean = (self.alpha * self.scale / (self.alpha - 1)) xp = mean.xp return where.where( self.alpha.data > 1, mean, xp.array(xp.inf, mean.dtype))