def __init__(self, channels): super().__init__() self.channels = channels shape = (channels, channels) rotation_mat = np.linalg.qr( np.random.normal(size=shape))[0].astype("float32") # w_p: a permutation matrix # w_l: a lower triangular matrix with ones on the diagonal # w_u: an upper triangular matrix with zeros on the diagonal, w_p, w_l, w_u = scipy.linalg.lu(rotation_mat) s = cf.diagonal(w_u) u_mask = np.triu(np.ones_like(w_u), k=1) l_mask = np.tril(np.ones_like(w_u), k=-1) l_diag = np.eye(w_l.shape[0]) w_u = w_u * u_mask with self.init_scope(): self.w_u = chainer.Parameter(initializer=w_u, shape=w_u.shape) self.w_l = chainer.Parameter(initializer=w_l, shape=w_l.shape) self.s = chainer.Parameter(initializer=s, shape=s.shape) self.add_persistent("w_p", w_p) self.add_persistent("u_mask", u_mask) self.add_persistent("l_mask", l_mask) self.add_persistent("l_diag", l_diag) # W^(-1) is not a learnable parameter inv_rotation_mat = np.linalg.inv(self.W) self.inverse_conv = L.Convolution2D(channels, channels, ksize=1, stride=1, pad=0, nobias=True, initialW=inv_rotation_mat)
def _pairwise_distances_l2(embeddings): """Compute the 2D matrix of distances between all the embeddings. Distance is defined by L2 norm: distance(x, y) := ||x - y||^2 Args: embeddings: Variable with shape=(batch_size, embed_dim) Returns: pairwise_distances: Variable with shape=(batch_size, batch_size) """ # Multiply 0.5 to embeddings because constrain distance in [0.0, 1.0]. embeddings = 0.5 * embeddings # Get the dot product between all embeddings # shape (batch_size, batch_size) dot_product = F.matmul(embeddings, embeddings, transa=False, transb=True) # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`. # This also provides more numerical stability (the diagonal of the result will be exactly 0). # shape (batch_size,) squared_norm = F.diagonal(dot_product) # Compute the pairwise distance matrix as we have: # ||a - b||^2 = ||a||^2 - 2 <a, b> + ||b||^2 # shape (batch_size, batch_size) distances = F.expand_dims( squared_norm, axis=0) - 2.0 * dot_product + F.expand_dims(squared_norm, axis=1) # Because of computation errors, some distances might be negative so we put everything >= 0.0 distances = F.clip(distances, 0.0, 1.0) return distances
def _kdeparts(self, input_obs, input_ins): """ Multivariate Kernel Density Estimation (KDE) with Gaussian kernels on the given random variables. INPUT: input_obs - Variable of input observation random variables to estimate density input_ins - Variable of input data instance to calculate the probability value OUTPUT: const - Constant term in the Gaussian KDE expression energy - Expressions in the exponential to calculate Gaussian KDE (energy wrt. every obs. point) """ [n, d] = input_obs.shape # Compute Kernel Bandwidth Matrix based on Silverman's Rule of Thumb silverman_factor = np.power(n * (d + 2.0) / 4.0, -1. / (d + 4)) input_centered = input_obs - F.mean(input_obs, axis=0, keepdims=True) data_covariance = F.matmul(F.transpose(input_centered), input_centered) / n kernel_bw = F.diagonal(data_covariance) * (silverman_factor ** 2) * np.eye(d, d) const = 1 / (n * ((2 * np.pi) ** (d/2)) * F.sqrt(F.det(kernel_bw))) # Compute energy expressions in the exponent for every observation point diff = input_obs - input_ins energy = -0.5 * F.diagonal(F.matmul(F.matmul(diff, F.inv(kernel_bw)), F.transpose(diff))) return const, energy
def regularize_diag_off_diag_dip(cov_mu, lambda_od, lambda_d): """Compute on and off diagonal regularizers for DIP-VAE models. Penalize deviations of covariance_matrix from the identity matrix. Uses different weights for the deviations of the diagonal and off diagonal entries. Args: cov_mu : [num_latent, num_latent] to regularize. lambda_od: Weight of penalty for off diagonal elements. lambda_d: Weight of penalty for diagonal elements. Returns: dip_regularizer: Regularized deviation from diagonal of covariance_matrix. """ xp = cov_mu.xp cov_mu_diag = F.diagonal(cov_mu) cov_mu_off_diag = cov_mu - cov_mu_diag * xp.eye(cov_mu.shape[0]) dip_regularizer = lambda_od * F.sum(cov_mu_off_diag ** 2) \ + lambda_d * F.sum((cov_mu_diag - 1) ** 2) return dip_regularizer
def calcAttentionLoss(self, A): if self.I is None: self.setIdentityMatrix(A) # rewrite to use comprehension like # F.sum([... for a in A]) ''' loss = 0 for a in A: # |AA^T|_F^2 = |B|_F^2 = tr(BB^T) = sum(BB^T*I) b = F.matmul(a.T, a)-self.I loss += F.sum(F.diagonal(F.matmul(b,b.T))) ''' losses = [] for a in A: # |AA^T|_F^2 = |B|_F^2 = tr(BB^T) = sum(BB^T*I) b = F.matmul(a.T, a) - self.I losses.append(F.sum(F.diagonal(F.matmul(b, b.T)))) return F.average(F.vstack(losses))
def f(x): x = functions.diagonal(x, *self.args) return x * x
def check_backward(self, x_data, y_grad): gradient_check.check_backward( lambda x: functions.diagonal(x, *self.args), x_data, y_grad, dtype=numpy.float64)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.diagonal(x, *self.args) testing.assert_allclose(y.data, self.y_expected)
def f(x): return functions.diagonal(x, *self.args)
def W(self): kernel = self.w_p @ (self.w_l * self.l_mask + self.l_diag) @ ( self.w_u * self.u_mask + cf.diagonal(self.s)) return cf.reshape(kernel, kernel.shape + (1, 1))
def forward(self, inputs, device): x, = inputs return functions.diagonal(x, *self.args),