def average(x, axis=None, weights=None, keepdims=False): """Calculate weighted average of array elements over a given axis. Args: x (~chainer.Variable): Elements to sum. axis (None or int or tuple of int): Axis which the method is performed. With the default (axis = None) it performs a mean over all the dimensions of the input array. weights (None or chainer.Variable): An array holding weights to calculate weighted average. If it is ``None``, all weights are assumed to be one. When ``axis`` is ``None``, ``weights`` must have the same shape of ``x``. And when ``axis`` is ``int``, it must be 1-D array satisfing ``weights.shape == (x.shape[axis],)``. keepdims (bool): If ``True``, the specified axes are remained as axes of length one. Returns: ~chainer.Variable: Output variable. """ if axis is None: pass elif isinstance(axis, tuple): axis = [a + x.ndim if a < 0 else a for a in axis] axis.sort() for a, b in six.moves.zip(axis, axis[1:]): if a == b: raise ValueError('duplicate value in \'axis\'') axis = tuple(axis) else: if axis < 0: axis += x.ndim axis = (axis,) if weights is not None: if axis is not None and len(axis) > 1: raise ValueError( 'tuple axis is not supported when weights is given') divider = sum_mod.sum(weights) if axis is not None: w_shape = [d if i in axis else 1 for i, d in enumerate(x.shape)] weights = broadcast.broadcast_to( reshape.reshape(weights, w_shape), x.shape) x = x * weights else: if axis is None: divider = x.size else: divider = 1 for a in axis: divider *= x.shape[a] x_sum = sum_mod.sum(x, axis, keepdims) if weights is not None: # We do not need to call broadcast when weights is None because # divider here is not a Variable but a scalar divider = broadcast.broadcast_to(divider, x_sum.shape) return x_sum / divider
def W_bar(self): """ Spectrally Normalized Weight """ if self.n % self.ncritic == 0: W_mat = self.W.reshape(self.W.shape[0], -1) xp = cuda.get_array_module(W_mat.data) U, s, V = xp.linalg.svd(W_mat.data, full_matrices=True) s = s / max(s) S_shape = min(W_mat.data.shape) half_shape = S_shape // 2 s[:half_shape] = xp.ones(half_shape) S = xp.zeros(W_mat.data.shape) S[:S_shape, :S_shape] = xp.diag(s) W_mat.data = xp.dot(U, xp.dot(S, V)) self.W.copydata(W_mat.reshape(self.W.shape)) self.n = 1 return self.W else: self.n += 1 W_mat = self.W.reshape(self.W.shape[0], -1) sigma, _u, _ = max_singular_value(W_mat, self.u, self.Ip) if self.factor: sigma = sigma / self.factor sigma = broadcast_to(sigma.reshape((1, 1, 1, 1)), self.W.shape) if chainer.config.train: # Update estimated 1st singular vector self.u[:] = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) * self.W / sigma else: return self.W / sigma
def average(x, axis=None, weights=None, keepdims=False): """Calculate weighted average of array elements over a given axis. Args: x (~chainer.Variable): Elements to sum. axis (None or int or tuple of int): Axis which the method is performed. With the default (axis = None) it performs a mean over all the dimensions of the input array. weights (None or chainer.Variable): An array holding weights to calculate weighted average. If it is ``None``, all weights are assumed to be one. When ``axis`` is ``None``, ``weights`` must have the same shape of ``x``. And when ``axis`` is ``int``, it must be 1-D array satisfing ``weights.shape == (x.shape[axis],)``. keepdims (bool): If ``True``, the specified axes are remained as axes of length one. Returns: ~chainer.Variable: Output variable. """ if axis is None: pass elif isinstance(axis, tuple): axis = [a + x.ndim if a < 0 else a for a in axis] axis.sort() for a, b in six.moves.zip(axis, axis[1:]): if a == b: raise ValueError('duplicate value in \'axis\'') axis = tuple(axis) else: if axis < 0: axis += x.ndim axis = (axis, ) if weights is not None: if axis is not None and len(axis) > 1: raise ValueError( 'tuple axis is not supported when weights is given') divider = sum_mod.sum(weights) if axis is not None: w_shape = [d if i in axis else 1 for i, d in enumerate(x.shape)] weights = broadcast.broadcast_to(reshape.reshape(weights, w_shape), x.shape) x = x * weights else: if axis is None: divider = x.size else: divider = 1 for a in axis: divider *= x.shape[a] x_sum = sum_mod.sum(x, axis, keepdims) if weights is not None: # We do not need to call broadcast whene weights is None because # divider here is not a Variable but a scalar divider = broadcast.broadcast_to(divider, x_sum.shape) return x_sum / divider
def _normalize(self, x): size = x.shape[1] mean = broadcast.broadcast_to((sum.sum(x, axis=1) / size)[:, None], x.shape) std = broadcast.broadcast_to( sqrt.sqrt(sum.sum(square.square(x - mean), axis=1) / size)[:, None], x.shape) + self.eps return (x - mean) / std
def _normalize(self, x): size = x.shape[1] mean = broadcast.broadcast_to( (sum.sum(x, axis=1) / size)[:, None], x.shape) std = broadcast.broadcast_to(sqrt.sqrt( sum.sum(square.square(x - mean), axis=1) / size)[:, None], x.shape) + self.eps return (x - mean) / std
def prob(self, x): x = chainer.as_variable(x) if self._is_gpu: valid = cuda.cupy.bitwise_or(x.array == 0, x.array == 1) else: valid = numpy.bitwise_or(x.array == 0, x.array == 1) ret = x * broadcast.broadcast_to(self.p, x.shape) \ + (1 - x) * (1 - broadcast.broadcast_to(self.p, x.shape)) return ret * valid
def W_bar(self): sigma, _u, _ = max_singular_value(self.W, self.u, self.Ip) if self.factor: sigma = sigma / self.factor sigma = broadcast_to(sigma.reshape((1, 1)), self.W.shape) self.u[:] = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) * self.W / sigma else: return self.W / sigma
def W_bar(self): """ Spectral Normalized Weight """ sigma, _u, _ = max_sv.max_singular_value(self.W, self.u, self.Ip) sigma = broadcast_to(sigma.reshape((1, 1)), self.W.shape) self.u = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) * self.W / sigma else: return self.W / sigma
def sample_n(self, n): if self._is_gpu: eps = cuda.cupy.random.laplace(size=(n, ) + self.loc.shape).astype( numpy.float32) else: eps = numpy.random.laplace(size=(n, ) + self.loc.shape).astype( numpy.float32) noise = broadcast.broadcast_to(self.scale, eps.shape) * eps noise += broadcast.broadcast_to(self.loc, eps.shape) return noise
def log_prob(self, x): if not isinstance(x, chainer.Variable): x = chainer.Variable(x) bl = broadcast.broadcast_to(self.low, x.shape) bh = broadcast.broadcast_to(self.high, x.shape) xp = cuda.get_array_module(x) logp = -exponential.log(bh - bl) return where.where( xp.asarray((x.data >= bl.data) & (x.data < bh.data)), logp, xp.asarray(-xp.ones_like(x.data) * numpy.inf, dtype=x.dtype))
def W_bar(self): """ Spectral Normalized Weight """ xp = cuda.get_array_module(self.W.data) W_mat = self.W.reshape(self.W.shape[0], -1) sigma, _u, _ = max_sv.max_singular_value(W_mat, self.u, self.Ip) sigma = broadcast_to(sigma.reshape((1, 1, 1, 1)), self.W.shape) self.u = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) * self.W / sigma else: return self.W / sigma
def W_bar(self): #Spectral Normalized Weight W_mat = self.W.reshape(self.W.shape[0], -1) sigma, _u, _ = max_singular_value(W_mat, self.u, self.Ip) if self.factor: sigma = sigma / self.factor sigma = broadcast_to(sigma.reshape((1, 1, 1, 1)), self.W.shape) if chainer.config.train: self.u[:] = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) * self.W / sigma else: return self.W / sigma
def sample_n(self, n): xp = cuda.get_array_module(self.mu) if xp is cuda.cupy: eps = xp.random.standard_normal( (n,)+self.mu.shape, dtype=self.mu.dtype) else: eps = xp.random.standard_normal( (n,)+self.mu.shape).astype(self.mu.dtype) noise = broadcast.broadcast_to(self.sigma, eps.shape) * eps noise += broadcast.broadcast_to(self.mu, eps.shape) return exponential.exp(noise)
def log_prob(self, x): scale_tril_inv = \ _batch_triangular_inv(self.scale_tril.reshape(-1, self.d, self.d)) scale_tril_inv = scale_tril_inv.reshape(self.batch_shape + (self.d, self.d)) bsti = broadcast.broadcast_to(scale_tril_inv, x.shape + (self.d, )) bl = broadcast.broadcast_to(self.loc, x.shape) m = matmul.matmul(bsti, expand_dims.expand_dims(x - bl, axis=-1)) m = matmul.matmul(swapaxes.swapaxes(m, -1, -2), m) m = squeeze.squeeze(m, axis=-1) m = squeeze.squeeze(m, axis=-1) logz = LOGPROBC * self.d - self._logdet(self.scale_tril) return broadcast.broadcast_to(logz, m.shape) - 0.5 * m
def W_bar(self): """ Spectral Normalized Weight """ sigma, _u, _ = max_singular_value(self.W, self.u, self.Ip) if self.factor: sigma = sigma / self.factor sigma = broadcast_to(sigma.reshape((1, 1)), self.W.shape) if chainer.config.train: # Update estimated 1st singular vector self.u[:] = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) * self.W / sigma else: return self.W / sigma
def W_bar(self): """ Spectral Normalized Weight """ W_mat = self.W.reshape(self.W.shape[0], -1) sigma, _u, _ = max_singular_value(W_mat, self.u, self.Ip) if self.factor: sigma = sigma / self.factor sigma = broadcast_to(sigma.reshape([1] * len(self.W.shape)), self.W.shape) self.u[:] = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) * self.W / sigma else: return self.W / sigma
def W_bar(self): """ Spectrally normalized weight. """ sigma, _u, _ = max_singular_value(self.W, self.u, self.Ip) if self.factor: sigma = sigma / self.factor sigma = broadcast_to(sigma.reshape((1, 1)), self.W.shape) self.u[:] = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) else: return self.W / sigma
def log_prob(self, x): scale_tril_inv = \ _batch_triangular_inv(self.scale_tril.reshape(-1, self.d, self.d)) scale_tril_inv = scale_tril_inv.reshape( self.batch_shape+(self.d, self.d)) bsti = broadcast.broadcast_to(scale_tril_inv, x.shape + (self.d,)) bl = broadcast.broadcast_to(self.loc, x.shape) m = matmul.matmul( bsti, expand_dims.expand_dims(x - bl, axis=-1)) m = matmul.matmul(swapaxes.swapaxes(m, -1, -2), m) m = squeeze.squeeze(m, axis=-1) m = squeeze.squeeze(m, axis=-1) logz = LOGPROBC * self.d - self._logdet(self.scale_tril) return broadcast.broadcast_to(logz, m.shape) - 0.5 * m
def W_bar(self): sigma, _u, _ = max_singular_value(self.W, self.u, self.Ip) if self.factor: sigma = sigma / self.factor sigma = broadcast_to(sigma.reshape((1, 1)), self.W.shape) self.u[:] = _u return self.W / sigma
def sample_n(self, n): xp = cuda.get_array_module(self.k) if xp is cuda.cupy: eps = xp.random.gamma( self.k.data, size=(n,) + self.batch_shape, dtype=self.k.dtype) else: eps = xp.random.gamma( self.k.data, size=(n,) + self.batch_shape).astype(self.k.dtype) noise = broadcast.broadcast_to(self.theta, eps.shape) * eps return noise
def average(x, axis=None, weights=None, keepdims=False): """Calculate weighted average of array elements over a given axis. Args: x (~chainer.Variable): Elements to sum. axis (None or int): Axis which the method is performed. With the default (axis = None) it performs a mean over all the dimensions of the input array. weights (None or chainer.Variable): An array holding weights to calculate weighted average. If it is ``None``, all weights are assumed to be one. When ``axis`` is ``None``, ``weights`` must have the same shape of ``x``. And when ``axis`` is ``int``, it must be 1-D array satisfing ``weights.shape == (x.shape[axis],)``. keepdims (bool): If ``True``, the specified axes are remained as axes of length one. Returns: ~chainer.Variable: Output variable. """ if weights is not None: divider = sum_mod.sum(weights) if axis is not None: if axis < 0: axis += x.ndim w_shape = [d if i == axis else 1 for i, d in enumerate(x.shape)] weights = broadcast.broadcast_to( reshape.reshape(weights, w_shape), x.shape) x = x * weights else: if axis is None: divider = x.size else: divider = x.shape[axis] x_sum = sum_mod.sum(x, axis, keepdims) if weights is not None: # We do not need to call broadcast whene weights is None because # divider here is not a Variable but a scalar divider = broadcast.broadcast_to(divider, x_sum.shape) return x_sum / divider
def average(x, axis=None, weights=None, keepdims=False): """Calculate weighted average of array elements over a given axis. Args: x (~chainer.Variable): Elements to sum. axis (None or int): Axis which the method is performed. With the default (axis = None) it performs a mean over all the dimensions of the input array. weights (None or chainer.Variable): An array holding weights to calculate weighted average. If it is ``None``, all weights are assumed to be one. When ``axis`` is ``None``, ``weights`` must have the same shape of ``x``. And when ``axis`` is ``int``, it must be 1-D array satisfing ``weights.shape == (x.shape[axis],)``. keepdims (bool): If ``True``, the specified axes are remained as axes of length one. Returns: ~chainer.Variable: Output variable. """ if weights is not None: divider = sum_mod.sum(weights) if axis is not None: if axis < 0: axis += x.ndim w_shape = [d if i == axis else 1 for i, d in enumerate(x.shape)] weights = broadcast.broadcast_to(reshape.reshape(weights, w_shape), x.shape) x = x * weights else: if axis is None: divider = x.size else: divider = x.shape[axis] x_sum = sum_mod.sum(x, axis, keepdims) if weights is not None: # We do not need to call broadcast whene weights is None because # divider here is not a Variable but a scalar divider = broadcast.broadcast_to(divider, x_sum.shape) return x_sum / divider
def log_prob(self, x): if not isinstance(x, chainer.Variable): x = chainer.Variable(x) xp = backend.get_array_module(x) logp = broadcast.broadcast_to(-exponential.log(self.scale), x.shape) return where.where( utils.force_array((x.data >= self.low.data) & (x.data <= self.high.data)), logp, xp.array(-xp.inf, logp.dtype))
def W_bar(self): """ Spectral Normalized Weight """ sigma, _u, _ = max_singular_value(self.W, self.u, self.Ip) if self.factor: sigma = sigma / self.factor sigma = broadcast_to(sigma.reshape((1, 1)), self.W.shape) if chainer.config.train: self.u[:] = _u return self.W / sigma
def W_bar(self): """ Spectral Normalized Weight """ sigma, _u, _ = max_sv.max_singular_value(self.W, self.u, self.Ip) # broadcast_to # >>> x # array([0, 1, 2]) # >>> y = F.broadcast_to(x, (3, 3)) # >>> y.data # array([[0, 1, 2], # [0, 1, 2], # [0, 1, 2]]) sigma = broadcast_to(sigma.reshape((1, 1)), self.W.shape) self.u = _u if hasattr(self, 'gamma'): return broadcast_to(self.gamma, self.W.shape) * self.W / sigma else: return self.W / sigma
def log_prob(self, x): if not isinstance(x, chainer.Variable): x = chainer.Variable(x) xp = backend.get_array_module(x) logp = broadcast.broadcast_to( -exponential.log(self.scale), x.shape) return where.where( utils.force_array( (x.data >= self.low.data) & (x.data <= self.high.data)), logp, xp.array(-xp.inf, logp.dtype))
def log_prob(self, x): if not isinstance(x, chainer.Variable): x = chainer.Variable(x) xp = cuda.get_array_module(x) logp = broadcast.broadcast_to( -exponential.log(self.scale), x.shape) return where.where( utils.force_array( (x.data >= self.low.data) & (x.data < self.high.data)), logp, xp.full_like(logp.array, -numpy.inf))
def _log_det_jacobian(self, x, y): shape = x.shape scale = self.scale if isinstance(scale, numbers.Number): xp = cuda.get_array_module(x, y) result = exponential.log(basic_math.absolute(scale)) \ * xp.ones(shape, dtype=x.dtype) else: result = exponential.log(basic_math.absolute(scale)) if self.event_dim: result_size = result.shape[:-self.event_dim] + (-1, ) result = sum_mod.sum(result.view(result_size), axis=-1) shape = shape[:-self.event_dim] return broadcast.broadcast_to(result, shape)
def bias(x, y, axis=1): """Elementwise summation with broadcasting. Computes a elementwise summation of two input variables, with the shape of the latter variable broadcasted to match the shape of the former. ``axis`` is the first axis of the first variable along which the second variable is applied. The term "broadcasting" here comes from Caffe's bias layer so the "broadcasting" with the following arguments:: x : 100 x 3 x 40 x 5 x 6 y : 3 x 40 axis : 1 is equivalent to the following numpy broadcasting:: x : 100 x 3 x 40 x 5 x 6 y : (1 x) 3 x 40 x 1 x 1 Note that the axis of ``x`` to which we apply ``y`` is specified by the argument ``axis``, whose meaning is different from numpy's ``axis``. Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable to be summed. y (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable to sum, broadcasted. axis (int): The first axis of ``x`` along which ``y`` is applied. Returns: ~chainer.Variable: Output variable. """ x_shape = x.shape y_shape = y.shape if chainer.is_debug(): assert x_shape[axis:axis + len(y_shape)] == y_shape y1_shape = tuple([1] * axis + list(y_shape) + [1] * (len(x_shape) - axis - len(y_shape))) y1 = reshape.reshape(y, y1_shape) y2 = broadcast.broadcast_to(y1, x_shape) return x + y2
def scale(x, y, axis=1): """Elementwise product with broadcasting. Computes a elementwise product of two input variables, with the shape of the latter variable broadcasted to match the shape of the former. ``axis`` is the first axis of the first variable along which the second variable is applied. The term "broadcasting" here comes from Caffe's scale layer so the "broadcasting" with the following arguments:: x : 100 x 3 x 40 x 60 y : 3 x 40 axis : 1 is equivalent to the following numpy broadcasting:: x : 100 x 3 x 40 x 60 y : 1 x 3 x 40 x 1 Note that how the ``axis`` indicates to which axis of ``x`` we apply ``y``. Args: x (~chainer.Variable): Input variable to be scaled. y (~chainer.Variable): Input variable to scale, broadcasted. axis (int): The first axis of ``x`` along which ``y`` is applied. Returns: ~chainer.Variable: Output variable. """ x_shape = x.shape y_shape = y.shape if chainer.is_debug(): assert x_shape[axis:axis + len(y_shape)] == y_shape y1_shape = tuple([1] * axis + list(y_shape) + [1] * (len(x_shape) - axis - len(y_shape))) y1 = reshape.reshape(y, y1_shape) y2 = broadcast.broadcast_to(y1, x_shape) return x * y2
def prob(self, x): return PROBC / broadcast.broadcast_to(self.scale, x.shape) * \ exponential.exp( - 0.5 * (x - broadcast.broadcast_to(self.loc, x.shape)) ** 2 / broadcast.broadcast_to(self.scale, x.shape) ** 2)
def log_prob(self, x): return - broadcast.broadcast_to(self.log_scale, x.shape) \ - 0.5 * (x - broadcast.broadcast_to(self.loc, x.shape)) ** 2 \ / broadcast.broadcast_to(self.scale, x.shape) ** 2 + LOGPROBC
def survival_function(self, x): return 0.5 * (1. - erf.erf( (x - broadcast.broadcast_to(self.loc, x.shape)) / (2 ** 0.5 * broadcast.broadcast_to(self.scale, x.shape))))
def icdf(self, x): return erfinv.erfinv(2. * chainer.as_variable(x) - 1.) \ * (2 ** 0.5) * broadcast.broadcast_to(self.scale, x.shape) \ + broadcast.broadcast_to(self.loc, x.shape)
def group_normalization(x, groups, gamma, beta, eps=1e-5): """Group normalization function. This function implements a "group normalization" which divides the channels into groups and computes within each group the mean and variance, then normalize by these statistics, scales and shifts them. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Batch tensors. First dimension of this value must be the size of minibatch and second dimension must be the number of channels. Moreover, this value must have one or more following dimensions, such as height and width. groups (int): The number of channel groups. This value must be a divisor of the number of channels. gamma (~chainer.Variable): Scaling parameter. beta (~chainer.Variable): Shifting parameter. eps (float): Epsilon value for numerical stability of normalization. Returns: ~chainer.Variable: The output variable which has the same shape as :math:`x`. See: `Group Normalization <https://arxiv.org/abs/1803.08494>`_ """ if x.ndim <= 2: raise ValueError('Input dimension must be grater than 2, ' 'including batch size dimension ' '(first dimension).') if not isinstance(groups, int): raise TypeError('Argument: \'groups\' type must be (int).') xp = backend.get_array_module(x) batch_size, channels = x.shape[:2] original_shape = x.shape if channels % groups != 0: raise ValueError('Argument: \'groups\' must be a divisor ' 'of the number of channel.') # By doing this reshaping, calling batch_normalization function becomes # equivalent to Group Normalization. # And redundant dimension is added in order to utilize ideep64/cuDNN. x = reshape.reshape(x, (1, batch_size * groups, -1, 1)) with cuda.get_device_from_array(x.array): dummy_gamma = xp.ones(batch_size * groups).astype(xp.float32) dummy_beta = xp.zeros(batch_size * groups).astype(xp.float32) with warnings.catch_warnings(): warnings.simplefilter("ignore") x = batch_normalization.batch_normalization( x, dummy_gamma, dummy_beta, eps=eps) x = reshape.reshape(x, original_shape) target_shape = [1, channels] + [1] * (x.ndim - 2) gamma_broadcast = broadcast.broadcast_to( reshape.reshape(gamma, target_shape), x.shape) beta_broadcast = broadcast.broadcast_to( reshape.reshape(beta, target_shape), x.shape) return x * gamma_broadcast + beta_broadcast
def __call__(self, embeddings, labels): """ Args: embeddings (:class:`~chainer.Variable` or :class:`numpy.ndarray` \ or :class:`cupy.ndarray`): \ predicted embedding vectors (batch size, max embedding dimensions, height, width) labels (:class:`numpy.ndarray` or :class:`cupy.ndarray`): \ instance segmentation ground truth each unique value has to be denoting one instance (batch size, height, width) Returns: :class:`tuple` of :class:`chainer.Variable`: - *Variance loss*: Variance loss multiplied by ``alpha`` - *Distance loss*: Distance loss multiplied by ``beta`` - *Regularization loss*: Regularization loss multiplied by ``gamma`` """ assert (self.max_embedding_dim == embeddings.shape[1]) l_dist = 0.0 count = 0 xp = cuda.get_array_module(embeddings) emb = embeddings[None, :] emb = broadcast_to(emb, (emb.shape[1], emb.shape[1], emb.shape[2], emb.shape[3], emb.shape[4])) ms = [] for c in range(self.max_embedding_dim): # Create mask for instance mask = xp.expand_dims(labels == c + 1, 1) ms.append(mask) if hasattr(xp, 'stack'): ms = xp.stack(ms, 0) else: # Old numpy does not have numpy.stack. ms = xp.concatenate([xp.expand_dims(x, 0) for x in ms], 0) mns = c_sum(emb * ms, axis=(3, 4)) mns = mns / xp.maximum(xp.sum(ms, (2, 3, 4))[:, :, None], 1) mns_exp = mns[:, :, :, None, None] # Calculate regularization term l_reg = c_sum(self.norm(mns, (1, 2))) l_reg = l_reg / (self.max_embedding_dim * embeddings.shape[0]) # Calculate variance term l_var = self.norm((mns_exp - emb) * ms, 2) l_var = relu(l_var - self.delta_v) ** 2 l_var = c_sum(l_var, (1, 2, 3)) l_var = l_var / xp.maximum(xp.sum(ms, (1, 2, 3, 4)), 1) l_var = c_sum(l_var) / self.max_embedding_dim # Calculate distance loss for c_a in range(len(mns)): for c_b in range(c_a + 1, len(mns)): m_a = mns[c_a] m_b = mns[c_b] dist = self.norm(m_a - m_b, 1) # N l_dist += c_sum((relu(2 * self.delta_d - dist)) ** 2) count += 1 l_dist /= max(count * embeddings.shape[0], 1) rtn = self.alpha * l_var, self.beta * l_dist, self.gamma * l_reg return rtn
def log_prob(self, x): return - exponential.log(broadcast.broadcast_to(self.scale, x.shape)) \ - 0.5 * (x - broadcast.broadcast_to(self.loc, x.shape)) ** 2 \ / broadcast.broadcast_to(self.scale, x.shape) ** 2 + LOGPROBC
def icdf(self, x): return erfinv.erfinv(2. * x - 1.) \ * (2 ** 0.5) * broadcast.broadcast_to(self.scale, x.shape) \ + broadcast.broadcast_to(self.loc, x.shape)
def cdf(self, x): return 0.5 * (1. + erf.erf(( x - broadcast.broadcast_to(self.loc, x.shape)) / (2 ** 0.5 * broadcast.broadcast_to(self.scale, x.shape))))
def deformable_convolution_2d_sampler(x, offset, W, b=None, stride=1, pad=0): """Two-dimensional deformable convolution function using computed offset. This is an implementation of two-dimensional deformable convolution from `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`_. It takes four variables: the input image ``x``, the offset image ``offset``, the filter weight ``W``, and the bias vector ``b``. Notation: here is the notation for the dimensionalities. - :math:`n` is the batch size. - :math:`c_I` and :math:`c_O` are the number of the input and output, respectively. - :math:`h` and :math:`w` are the height and width of the input image, respectively. - :math:`k_H` and :math:`k_W` are the height and width of the filters, respectively. - :math:`s_Y` and :math:`s_X` are the strides of the filter. - :math:`p_H` and :math:`p_W` are the spatial padding sizes. The output size :math:`(h_O, w_O)` is determined by the following equations: .. math:: h_O &= (h + 2p_H - k_H) / s_Y + 1,\\\\ w_O &= (w + 2p_W - k_W) / s_X + 1. Args: x (~chainer.Variable): Input variable of shape :math:`(n, c_I, h, w)`. offset (~chainer.Variable): Offset variable of shape :math:`(n, 2 \\cdot k_H \\cdot k_W, h_O, w_O)`. The first :math:`k_H \\cdot k_W` index of the second axis corresponds to the offsets in the horizontal direction. The last :math:`k_H \\cdot k_W` index of the second axis corresponds to the offsets in the vertical direction. W (~chainer.Variable): Weight variable of shape :math:`(c_O, c_I, k_H, k_W)`. b (~chainer.Variable): Bias variable of length :math:`c_O` (optional). stride (int or pair of ints): Stride of filter applications. ``stride=s`` and ``stride=(s, s)`` are equivalent. pad (int or pair of ints): Spatial padding width for input arrays. ``pad=p`` and ``pad=(p, p)`` are equivalent. Returns: ~chainer.Variable: Output variable. Deformable convolution adds 2D offsets to the regular grid sampling locations in the standard convolution. It enables free form deformation of the sampling grid. See `Jifeng Dai, Haozhi Qi, Yuwen Xiong, Yi Li, Guodong Zhang, Han Hu, \ Yichen Wei. Deformable Convolutional Networks\ <https://arxiv.org/abs/1703.06211>`_ If the bias vector is given, then it is added to all spatial locations of the output of convolution. .. seealso:: :class:`~chainer.links.DeformableConvolution2D` .. admonition:: Example >>> x = np.random.uniform(0, 1, (2, 3, 4, 7)).astype(np.float32) >>> offset = np.random.uniform( ... 0, 1, (2, 2 * 3 * 3, 2, 5)).astype(np.float32) >>> W = np.random.uniform(0, 1, (4, 3, 3, 3)).astype(np.float32) >>> b = np.random.uniform(0, 1, (4,)).astype(np.float32) >>> y = F.deformable_convolution_2d_sampler(x, offset, W, b) >>> y.shape (2, 4, 2, 5) """ sy, sx = _pair(stride) ph, pw = _pair(pad) out_c, _, kh, kw = W.shape n, c, h, w = x.shape _, khkw2, out_h, out_w = offset.shape if khkw2 != 2 * kh * kw: raise ValueError( 'The shape of the offset does not match the kernel size') grid = _offset2grid(offset, kh, kw, sy, sx, ph, pw, h, w) grid = grid.reshape(n, 2, kh * kw, out_h * out_w) x_pad = pad_module.pad(x, ((0, 0), (0, 0), (ph, ph), (pw, pw)), 'constant') x_st = spatial_transformer_sampler.spatial_transformer_sampler( x_pad, grid) x_st = x_st.transpose(0, 3, 1, 2).reshape(n * out_h * out_w, c * kh * kw) W = W.transpose(1, 2, 3, 0).reshape(c * kh * kw, out_c) y = matmul.matmul(x_st, W) y = y.reshape(n, out_h, out_w, out_c).transpose(0, 3, 1, 2) if b is not None: b = broadcast.broadcast_to(b[None, :, None, None], y.shape) y += b return y