def __init__(self, channels):
        super().__init__()
        self.channels = channels
        shape = (channels, channels)
        rotation_mat = np.linalg.qr(
            np.random.normal(size=shape))[0].astype("float32")
        # w_p: a permutation matrix
        # w_l: a lower triangular matrix with ones on the diagonal
        # w_u: an upper triangular matrix with zeros on the diagonal,
        w_p, w_l, w_u = scipy.linalg.lu(rotation_mat)
        s = cf.diagonal(w_u)
        u_mask = np.triu(np.ones_like(w_u), k=1)
        l_mask = np.tril(np.ones_like(w_u), k=-1)
        l_diag = np.eye(w_l.shape[0])
        w_u = w_u * u_mask

        with self.init_scope():
            self.w_u = chainer.Parameter(initializer=w_u, shape=w_u.shape)
            self.w_l = chainer.Parameter(initializer=w_l, shape=w_l.shape)
            self.s = chainer.Parameter(initializer=s, shape=s.shape)
            self.add_persistent("w_p", w_p)
            self.add_persistent("u_mask", u_mask)
            self.add_persistent("l_mask", l_mask)
            self.add_persistent("l_diag", l_diag)

        # W^(-1) is not a learnable parameter
        inv_rotation_mat = np.linalg.inv(self.W)
        self.inverse_conv = L.Convolution2D(channels,
                                            channels,
                                            ksize=1,
                                            stride=1,
                                            pad=0,
                                            nobias=True,
                                            initialW=inv_rotation_mat)
예제 #2
0
def _pairwise_distances_l2(embeddings):
    """Compute the 2D matrix of distances between all the embeddings.

    Distance is defined by L2 norm: distance(x, y) := ||x - y||^2

    Args:
        embeddings: Variable with shape=(batch_size, embed_dim)

    Returns:
        pairwise_distances: Variable with shape=(batch_size, batch_size)
    """
    # Multiply 0.5 to embeddings because constrain distance in [0.0, 1.0].
    embeddings = 0.5 * embeddings

    # Get the dot product between all embeddings
    # shape (batch_size, batch_size)
    dot_product = F.matmul(embeddings, embeddings, transa=False, transb=True)

    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
    # shape (batch_size,)
    squared_norm = F.diagonal(dot_product)

    # Compute the pairwise distance matrix as we have:
    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
    # shape (batch_size, batch_size)
    distances = F.expand_dims(
        squared_norm, axis=0) - 2.0 * dot_product + F.expand_dims(squared_norm,
                                                                  axis=1)

    # Because of computation errors, some distances might be negative so we put everything >= 0.0
    distances = F.clip(distances, 0.0, 1.0)

    return distances
예제 #3
0
    def _kdeparts(self, input_obs, input_ins):
        """ Multivariate Kernel Density Estimation (KDE) with Gaussian kernels on the given random variables.
            INPUT:
                input_obs - Variable of input observation random variables to estimate density
                input_ins - Variable of input data instance to calculate the probability value
            OUTPUT:
                const - Constant term in the Gaussian KDE expression
                energy - Expressions in the exponential to calculate Gaussian KDE (energy wrt. every obs. point)
        """
        [n, d] = input_obs.shape

        # Compute Kernel Bandwidth Matrix based on Silverman's Rule of Thumb
        silverman_factor = np.power(n * (d + 2.0) / 4.0, -1. / (d + 4))
        input_centered = input_obs - F.mean(input_obs, axis=0, keepdims=True)
        data_covariance = F.matmul(F.transpose(input_centered), input_centered) / n
        kernel_bw = F.diagonal(data_covariance) * (silverman_factor ** 2) * np.eye(d, d)
        const = 1 / (n * ((2 * np.pi) ** (d/2)) * F.sqrt(F.det(kernel_bw)))

        # Compute energy expressions in the exponent for every observation point
        diff = input_obs - input_ins
        energy = -0.5 * F.diagonal(F.matmul(F.matmul(diff, F.inv(kernel_bw)), F.transpose(diff)))

        return const, energy
예제 #4
0
def regularize_diag_off_diag_dip(cov_mu, lambda_od, lambda_d):
    """Compute on and off diagonal regularizers for DIP-VAE models.
    Penalize deviations of covariance_matrix from the identity matrix. Uses
    different weights for the deviations of the diagonal and off diagonal entries.
    Args:
    cov_mu : [num_latent, num_latent]
        to regularize.
    lambda_od: Weight of penalty for off diagonal elements.
    lambda_d: Weight of penalty for diagonal elements.
    Returns:
    dip_regularizer: Regularized deviation from diagonal of covariance_matrix.
    """
    xp = cov_mu.xp
    cov_mu_diag = F.diagonal(cov_mu)
    cov_mu_off_diag = cov_mu - cov_mu_diag * xp.eye(cov_mu.shape[0])
    dip_regularizer = lambda_od * F.sum(cov_mu_off_diag ** 2) \
        + lambda_d * F.sum((cov_mu_diag - 1) ** 2)
    return dip_regularizer
예제 #5
0
    def calcAttentionLoss(self, A):
        if self.I is None:
            self.setIdentityMatrix(A)

        # rewrite to use comprehension like
        # F.sum([... for a in A])
        '''
        loss = 0
        for a in A:
            # |AA^T|_F^2 = |B|_F^2 = tr(BB^T) = sum(BB^T*I)
            b = F.matmul(a.T, a)-self.I
            loss += F.sum(F.diagonal(F.matmul(b,b.T)))
        '''
        losses = []
        for a in A:
            # |AA^T|_F^2 = |B|_F^2 = tr(BB^T) = sum(BB^T*I)
            b = F.matmul(a.T, a) - self.I
            losses.append(F.sum(F.diagonal(F.matmul(b, b.T))))
        return F.average(F.vstack(losses))
예제 #6
0
 def f(x):
     x = functions.diagonal(x, *self.args)
     return x * x
예제 #7
0
 def check_backward(self, x_data, y_grad):
     gradient_check.check_backward(
         lambda x: functions.diagonal(x, *self.args),
         x_data,
         y_grad,
         dtype=numpy.float64)
예제 #8
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.diagonal(x, *self.args)
     testing.assert_allclose(y.data, self.y_expected)
예제 #9
0
 def f(x):
     return functions.diagonal(x, *self.args)
 def W(self):
     kernel = self.w_p @ (self.w_l * self.l_mask + self.l_diag) @ (
         self.w_u * self.u_mask + cf.diagonal(self.s))
     return cf.reshape(kernel, kernel.shape + (1, 1))
예제 #11
0
 def forward(self, inputs, device):
     x, = inputs
     return functions.diagonal(x, *self.args),
예제 #12
0
 def f(x):
     return functions.diagonal(x, *self.args)
예제 #13
0
 def check_backward(self, x_data, y_grad):
     gradient_check.check_backward(
         lambda x: functions.diagonal(x, *self.args),
         x_data, y_grad, dtype=numpy.float64)
예제 #14
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.diagonal(x, *self.args)
     testing.assert_allclose(y.data, self.y_expected)
예제 #15
0
 def f(x):
     x = functions.diagonal(x, *self.args)
     return x * x
예제 #16
0
 def forward(self, inputs, device):
     x, = inputs
     return functions.diagonal(x, *self.args),