Example #1
0
def _sample_conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False, num_samples=None):
    """
    `sample_conditional` will return a sample from the conditional distribution.
    In most cases this means calculating the conditional mean m and variance v and then
    returning m + sqrt(v) * eps, with eps ~ N(0, 1).
    However, for some combinations of Mok and Mof more efficient sampling routines exists.
    The dispatcher will make sure that we use the most efficient one.

    :return: samples, mean, cov
        samples has shape [num_samples, N, P] or [N, P] if num_samples is None
        mean and cov as for conditional()
    """
    if full_cov and full_output_cov:
        raise NotImplementedError("The combination of both full_cov and full_output_cov is not "
                                  "implemented for sample_conditional.")

    logger.debug("sample conditional: InducingFeature Kernel")
    mean, cov = conditional(Xnew, feat, kern, f, q_sqrt=q_sqrt, white=white,
                            full_cov=full_cov, full_output_cov=full_output_cov)
    if full_cov:
        # mean: N x P
        # cov: P x N x N
        mean = tf.matrix_transpose(mean)  # now P x N
        samples = _sample_mvn(mean, cov, 'full', num_samples=num_samples)  # (S x) P x N
        samples = tf.matrix_transpose(samples)  # now (S x) N x P

    else:
        cov_structure = "full" if full_output_cov else "diag"
        samples = _sample_mvn(mean, cov, cov_structure, num_samples=num_samples)  # [(S,), N, P]

    return samples, mean, cov
Example #2
0
 def testNonBatchMatrixDynamicallyDefined(self):
     matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
     expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
     with self.test_session():
         matrix_ph = tf.placeholder(tf.int32)
         transposed = tf.matrix_transpose(matrix_ph)
         self.assertAllEqual(expected_transposed, transposed.eval(feed_dict={matrix_ph: matrix}))
Example #3
0
def _quadrature_expectation(p, obj1, feature1, obj2, feature2, num_gauss_hermite_points):
    """
    Handling of quadrature expectations for Markov Gaussians (useful for time series)
    Fallback method for missing analytic expectations wrt Markov Gaussians
    Nota Bene: obj1 is always associated with x_n, whereas obj2 always with x_{n+1}
               if one requires e.g. <x_{n+1} K_{x_n, Z}>_p(x_{n:n+1}), compute the
               transpose and then transpose the result of the expectation
    """
    num_gauss_hermite_points = 40 if num_gauss_hermite_points is None else num_gauss_hermite_points

    warnings.warn("Quadrature is used to calculate the expectation. This means that "
                  "an analytical implementations is not available for the given combination.")

    if obj2 is None:
        eval_func = lambda x: get_eval_func(obj1, feature1)(x)
        mu, cov = p.mu[:-1], p.cov[0, :-1]  # cross covariances are not needed
    elif obj1 is None:
        eval_func = lambda x: get_eval_func(obj2, feature2)(x)
        mu, cov = p.mu[1:], p.cov[0, 1:]  # cross covariances are not needed
    else:
        eval_func = lambda x: (get_eval_func(obj1, feature1, np.s_[:, :, None])(tf.split(x, 2, 1)[0]) *
                               get_eval_func(obj2, feature2, np.s_[:, None, :])(tf.split(x, 2, 1)[1]))
        mu = tf.concat((p.mu[:-1, :], p.mu[1:, :]), 1)  # Nx2D
        cov_top = tf.concat((p.cov[0, :-1, :, :], p.cov[1, :-1, :, :]), 2)  # NxDx2D
        cov_bottom = tf.concat((tf.matrix_transpose(p.cov[1, :-1, :, :]), p.cov[0, 1:, :, :]), 2)
        cov = tf.concat((cov_top, cov_bottom), 1)  # Nx2Dx2D

    return mvnquad(eval_func, mu, cov, num_gauss_hermite_points)
Example #4
0
 def testNonBatchMatrix(self):
   matrix = [[1, 2, 3], [4, 5, 6]]  # Shape (2, 3)
   expected_transposed = [[1, 4], [2, 5], [3, 6]]  # Shape (3, 2)
   with self.test_session():
     transposed = tf.matrix_transpose(matrix)
     self.assertEqual((3, 2), transposed.get_shape())
     self.assertAllEqual(expected_transposed, transposed.eval())
Example #5
0
def _sample_conditional(Xnew, X, kern, f, *, q_sqrt=None, white=False, full_cov=False, full_output_cov=False, num_samples=None):
    if full_cov and full_output_cov:
        raise NotImplementedError("The combination of both full_cov and full_output_cov is not "
                                  "implemented for sample_conditional.")

    logger.debug("sample conditional: Kernel")
    if full_output_cov:
        raise NotImplementedError("full_output_cov is not implemented")

    mean, cov = conditional(Xnew, X, kern, f, q_sqrt=q_sqrt, white=white, full_cov=full_cov)
    if full_cov:
        mean = tf.matrix_transpose(mean)
    cov_structure = "full" if full_cov else "diag"
    samples = _sample_mvn(mean, cov, cov_structure, num_samples=num_samples)
    if full_cov:
        samples = tf.matrix_transpose(samples)
    return samples, mean, cov
Example #6
0
 def _dot(self, slist1, slist2, tf_embs):
     """
     Simple dot product between two vectors of embeddings.
     This returns a matrix of positive real numbers.
     """
     matlist1 = tf.gather(tf_embs, slist1, name='matlist1')
     matlist2 = tf.matrix_transpose(tf.gather(tf_embs, slist2, name='matlist2'))
     return tf.batch_matmul(matlist1, matlist2)
Example #7
0
def _expectation(p, kern, feat, mean, none, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <K_{Z, x_n} m(x_n)>_p(x_n)
    or the equivalent for MarkovGaussian

    :return: NxMxQ
    """
    return tf.matrix_transpose(expectation(p, mean, (kern, feat), nghp=nghp))
Example #8
0
def _expectation(p, mean, none, kern, feat, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <x_n K_{x_n, Z}>_p(x_n)
        - K_{.,} :: Linear kernel
    or the equivalent for MarkovGaussian

    :return: NxDxM
    """
    return tf.matrix_transpose(expectation(p, (kern, feat), mean))
Example #9
0
def _conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False):
    """
    Most efficient routine to project L independent latent gps through a mixing matrix W.
    The mixing matrix is a member of the `SeparateMixedMok` and has shape P x L.

    The covariance matrices used to calculate the conditional have the following shape:
    - Kuu: L x M x M
    - Kuf: L x M x N
    - Kff: L x N or L x N x N

    Further reference
    -----------------
    - See `gpflow.conditionals._conditional` for a detailed explanation of
      conditional in the single-output case.
    - See the multiouput notebook for more information about the multiouput framework.

    """
    logger.debug("conditional: (MixedKernelSharedMof, MixedKernelSeparateMof), SeparateMixedMok")
    independent_cond = conditional.dispatch(object, SeparateIndependentMof, SeparateIndependentMok, object)
    gmu, gvar = independent_cond(Xnew, feat, kern, f, full_cov=full_cov, q_sqrt=q_sqrt,
                                 full_output_cov=False, white=white)  # N x L, L x N x N or N x L

    gmu = tf.matrix_transpose(gmu)  # L x N
    if not full_cov:
        gvar = tf.matrix_transpose(gvar)  # L x N (x N)

    Wgmu = tf.tensordot(gmu, kern.W, [[0], [1]])  # N x P

    if full_output_cov:
        Wt_expanded = tf.matrix_transpose(kern.W)[:, None, :]  # L x 1 x P
        if full_cov:
            Wt_expanded = tf.expand_dims(Wt_expanded, axis=-1)  # L x 1 x P x 1

        gvarW = tf.expand_dims(gvar, axis=2) * Wt_expanded  # L x N x P (x N)
        WgvarW = tf.tensordot(gvarW, kern.W, [[0], [1]])  # N x P (x N) x P
    else:
        if not full_cov:
            WgvarW = tf.tensordot(gvar, kern.W ** 2, [[0], [1]])  # N x P
        else:
            WgvarW = tf.tensordot(kern.W ** 2, gvar, [[1], [0]])  # P x N (x N)

    return Wgmu, WgvarW
Example #10
0
    def _inverse(self, y, z, reuse):
        hh, ww, nc = gs(y)[1:]
        if y.dtype == tf.float32:
            nptype = np.float32
        else:
            nptype = np.float64
        rotation = tf.get_variable(
            "1x1_conv_weight",
            dtype=y.dtype,
            initializer=random_rotation_matrix(nc).astype(nptype),
            trainable=True)
        rotation = (rotation -
                    tf.matrix_transpose(rotation)) / 2.0  # make skew symmetric
        rotation = matrix_exponential(rotation,
                                      name="MatrixExpFor1x1Convolution")
        _rot = tf.cast(tf.matrix_transpose(rotation), y.dtype)
        kernel = tf.reshape(_rot, shape=[1, 1, nc, nc])
        x = self.conv(y, kernel)

        return x
Example #11
0
 def testBatchMatrixDynamicallyDefined(self):
     matrix_0 = [[1, 2, 3], [4, 5, 6]]
     matrix_0_t = [[1, 4], [2, 5], [3, 6]]
     matrix_1 = [[11, 22, 33], [44, 55, 66]]
     matrix_1_t = [[11, 44], [22, 55], [33, 66]]
     batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
     expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
     with self.test_session():
         batch_matrix_ph = tf.placeholder(tf.int32)
         transposed = tf.matrix_transpose(batch_matrix_ph)
         self.assertAllEqual(expected_transposed, transposed.eval(feed_dict={batch_matrix_ph: batch_matrix}))
    def left_grad(U, S, V, dU, dV):
        U, V = (V, U)
        dU, dV = (dV, dU)
        D = tf.matmul(dU, tf.matrix_diag(1 / (s + 1e-8)))
        US = tf.matmul(U, S)

        grad = tf.matmul(D, V, transpose_b=True)\
              +tf.matmul(tf.matmul(U,tf.matrix_diag(tf.matrix_diag_part(-tf.matmul(U,D,transpose_a=True)))), V, transpose_b=True)\
              +tf.matmul(2*tf.matmul(US, msym(KT*(tf.matmul(V,-tf.matmul(V,tf.matmul(D,US,transpose_a=True)),transpose_a=True)))),V,transpose_b=True)
        grad = tf.matrix_transpose(grad)
        return grad
Example #13
0
 def testBatchMatrix(self):
   matrix_0 = [[1, 2, 3], [4, 5, 6]]
   matrix_0_t = [[1, 4], [2, 5], [3, 6]]
   matrix_1 = [[11, 22, 33], [44, 55, 66]]
   matrix_1_t = [[11, 44], [22, 55], [33, 66]]
   batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
   expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
   with self.test_session():
     transposed = tf.matrix_transpose(batch_matrix)
     self.assertEqual((2, 3, 2), transposed.get_shape())
     self.assertAllEqual(expected_transposed, transposed.eval())
Example #14
0
    def make_attention_mat(self, x1, x2):
        # x1  [batch_size, vec_dim, sentence_length, 1]
        # tf.matrix_transpose(x2) [batch_size, vec_dim, 1, sentence_length]

        # 广播产生一个 [sentence_length_0, sentence_length_1]的矩阵
        # x1 - tf.matrix_transpose(x2)  [batch_size, vec_dim, sentence_length, sentence_length]
        # euclidean [bath_size, sentence_length, sentence_length]
        euclidean = tf.sqrt(
            tf.reduce_sum(tf.square(x1 - tf.matrix_transpose(x2)), axis=1) +
            self.eclipse)
        return 1 / (1 + euclidean)
Example #15
0
    def _scaled_square_dist(self, X, X2):
        """
        Returns ((X - X2ᵀ)/lengthscales)².
        Due to the implementation and floating-point imprecision, the
        result may actually be very slightly negative for entries very
        close to each other.
        """
        X = X / self.lengthscales
        Xs = tf.reduce_sum(tf.square(X), axis=-1, keepdims=True)

        if X2 is None:
            dist = -2 * tf.matmul(X, X, transpose_b=True)
            dist += Xs + tf.matrix_transpose(Xs)
            return dist

        X2 = X2 / self.lengthscales
        X2s = tf.reduce_sum(tf.square(X2), axis=-1, keepdims=True)
        dist = -2 * tf.matmul(X, X2, transpose_b=True)
        dist += Xs + tf.matrix_transpose(X2s)
        return dist
Example #16
0
    def recursive_kernel(self, points1, points2, depth):
        if depth == 1:
            mag_sqr1 = tf.reduce_sum(points1**2, 1, keep_dims=True)
            mag_sqr2 = tf.reduce_sum(points2**2, 1, keep_dims=True)
            point_prod = tf.matmul(points1, points2,
                                   transpose_b=True)  # points1 @ points2.T
        else:
            mag_sqr1 = tf.expand_dims(
                self.diag_recursive_kernel(points1, depth - 1), 1)
            mag_sqr2 = tf.expand_dims(
                self.diag_recursive_kernel(points2, depth - 1), 1)
            point_prod = self.recursive_kernel(points1, points2, depth - 1)

        mag_prod = tf.sqrt(mag_sqr1) * tf.matrix_transpose(tf.sqrt(mag_sqr2))
        cos_angles = (
            2 * point_prod) / (tf.sqrt(1 + 2 * mag_sqr1) *
                               tf.matrix_transpose(tf.sqrt(1 + 2 * mag_sqr2)))

        return (((mag_prod**self.degree) / np.pi) *
                self.angular_func(cos_angles))
Example #17
0
    def _scaled_square_dist(self, X, X2):
        """
        Returns ((X - X2ᵀ)/lengthscales)².
        Due to the implementation and floating-point imprecision, the
        result may actually be very slightly negative for entries very
        close to each other.
        """
        X = X / self.lengthscales
        Xs = tf.reduce_sum(tf.square(X), axis=-1, keepdims=True)

        if X2 is None:
            dist = -2 * tf.matmul(X, X, transpose_b=True)
            dist += Xs + tf.matrix_transpose(Xs)
            return dist

        X2 = X2 / self.lengthscales
        X2s = tf.reduce_sum(tf.square(X2), axis=-1, keepdims=True)
        dist = -2 * tf.matmul(X, X2, transpose_b=True)
        dist += Xs + tf.matrix_transpose(X2s)
        return dist
Example #18
0
    def forward_pass(self, embed, weights_input, biases_input, weights_output):
        """

        :param embed:
        :param weights:
        :param biases:
        :return:
        """
        """
        =======================================================

        Implement the forwrad pass described in
        "A Fast and Accurate Dependency Parser using Neural Networks"(2014)

        =======================================================
        """

        print('forwadpass inputs')
        print(embed, weights_input, biases_input, weights_output)

        layer1 = tf.add(tf.matmul(weights_input, tf.matrix_transpose(embed)),
                        biases_input)
        layer1 = tf.math.pow(layer1, tf.fill(tf.shape(layer1), 3.0))
        # tanh activation function
        #         layer1 = tf.math.tanh(layer1, name = 'tanh_activation')

        # sigmoid activation function
        #         layer1 = tf.math.sigmoid(layer1, name = 'sigmoid_activation')

        # relu activation function
        #         layer1 = tf.nn.relu(layer1, name = 'relu_activation')

        print('layer1')
        print(layer1)

        p = tf.matrix_transpose(tf.matmul(weights_output, layer1))

        print('p')
        print(p)

        return p
Example #19
0
        def CNN_layer(variable_scope, x1, x2, d):
            # x1, x2 = [batch, d, s, 1]
            with tf.variable_scope(variable_scope):
                if model_type == "ABCNN1" or model_type == "ABCNN3":
                    with tf.name_scope("att_mat"):
                        aW = tf.get_variable(name="aW",
                                             shape=(s, d),
                                             initializer=tf.contrib.layers.xavier_initializer(),
                                             regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg))

                        # [batch, s, s]
                        att_mat = make_attention_mat(x1, x2)

                        # [batch, s, s] * [s,d] => [batch, s, d]
                        # matrix transpose => [batch, d, s]
                        # expand dims => [batch, d, s, 1]
                        x1_a = tf.expand_dims(tf.matrix_transpose(tf.einsum("ijk,kl->ijl", att_mat, aW)), -1)
                        x2_a = tf.expand_dims(tf.matrix_transpose(
                            tf.einsum("ijk,kl->ijl", tf.matrix_transpose(att_mat), aW)), -1)

                        # [batch, d, s, 2]
                        x1 = tf.concat([x1, x1_a], axis=3)
                        x2 = tf.concat([x2, x2_a], axis=3)

                left_conv = convolution(name_scope="left", x=pad_for_wide_conv(x1), d=d)
                right_conv = convolution(name_scope="right", x=pad_for_wide_conv(x2), d=d)

                left_attention, right_attention = None, None

                if model_type == "ABCNN2" or model_type == "ABCNN3":
                    # [batch, s+w-1, s+w-1]
                    att_mat = make_attention_mat(left_conv, right_conv)
                    # [batch, s+w-1], [batch, s+w-1]
                    left_attention, right_attention = tf.reduce_sum(att_mat, axis=2), tf.reduce_sum(att_mat, axis=1)

                left_wp = w_pool(variable_scope="left", x=left_conv, attention=left_attention)
                left_ap = all_pool(variable_scope="left", x=left_conv)
                right_wp = w_pool(variable_scope="right", x=right_conv, attention=right_attention)
                right_ap = all_pool(variable_scope="right", x=right_conv)

                return left_wp, left_ap, right_wp, right_ap
Example #20
0
def scNBMF_model(G,
                 C,
                 k,
                 variable_idx,
                 sample_idx,
                 T_,
                 y_,
                 psi,
                 penalty_type,
                 lambda_for_l1,
                 eps=1e-8):
    '''
    scNBMF model
    
    G: Number of genes
    C: Number of cells
    variable_idx: Gene index
    sample_idx: Cell index
    T_: Total counts or read depth
    y_: Count expression matrix
    psi: Dispersion parameters computed by edgeR
    penalty_type: 1 means l1_penalty and others means l2_penalty
    lambda_for_l1: The coeffcient of l1 or l2_penalty

    return:
    LL : loss function for the model
    '''
    W = tf.Variable(np.random.randn(G, k), name='weights')

    H = tf.Variable(np.random.randn(k, C), name='PCs')

    S = tf.Variable(np.array([0.]), name='Scaling')

    W_ = tf.gather(W, variable_idx)
    psi_ = tf.gather(psi, variable_idx)

    H_ = tf.gather(tf.matrix_transpose(H), sample_idx)
    eta_ = tf.reduce_sum(W_ * H_, 1)

    mu_ = tf.exp(eta_ + S + tf.log(T_))

    LL = tf.reduce_sum(y_ * tf.log(mu_ + eps) -
                       (y_ + psi_) * tf.log(mu_ + psi_ + eps))

    if penalty_type == 1:
        Wpenalty = get_weight(W, lambda_for_l1)
    else:
        Wpenalty = get_weight2(W, lambda_for_l1)

    beta = 1
    LL = tf.reduce_mean(LL + beta * Wpenalty)

    return LL
Example #21
0
 def kronecker_vec(self, X, m, n):
     leading_dim = tf.shape(X)[:-2]
     blocks = []
     for i in range(n):
         blocks.append([])
         for j in range(m):
             idx = i * m + j
             block = tf.matrix_transpose(
                 tf.reshape(X[..., idx, :],
                            tf.concat([leading_dim, [n, m]], 0)))
             blocks[-1].append(block)
     return tf.concat([tf.concat(b, -2) for b in blocks], -1)
Example #22
0
def generator(inputs, is_training=True):
    feat, _ = inputs
    embedding = tf.get_variable(name='embedding',
                                shape=[FLAGS.vocab_size, FLAGS.emb_dim],
                                initializer=tf.random_uniform_initializer(
                                    -0.08, 0.08))
    softmax_w = tf.matrix_transpose(embedding)
    softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size])

    batch_size = tf.shape(feat)[0]
    cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.mem_dim)
    if is_training:
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, FLAGS.keep_prob,
                                             FLAGS.keep_prob)
    zero_state = cell.zero_state(batch_size, tf.float32)

    sequence, logits, log_probs, rnn_outs = [], [], [], []

    _, state = cell(feat, zero_state)
    state_bl = state
    tf.get_variable_scope().reuse_variables()
    for t in range(FLAGS.max_caption_length):
        if t == 0:
            rnn_inp = tf.zeros([batch_size], tf.int32) + FLAGS.start_id
        rnn_inp = tf.nn.embedding_lookup(embedding, rnn_inp)
        rnn_out, state = cell(rnn_inp, state)
        rnn_outs.append(rnn_out)
        logit = tf.nn.bias_add(tf.matmul(rnn_out, softmax_w), softmax_b)
        categorical = tf.contrib.distributions.Categorical(logits=logit)
        fake = categorical.sample()
        log_prob = categorical.log_prob(fake)
        sequence.append(fake)
        log_probs.append(log_prob)
        logits.append(logit)
        rnn_inp = fake
    sequence = tf.stack(sequence, axis=1)
    log_probs = tf.stack(log_probs, axis=1)
    logits = tf.stack(logits, axis=1)

    baseline = []
    state = state_bl
    for t in range(FLAGS.max_caption_length):
        if t == 0:
            rnn_inp = tf.zeros([batch_size], tf.int32) + FLAGS.start_id
        rnn_inp = tf.nn.embedding_lookup(embedding, rnn_inp)
        rnn_out, state = cell(rnn_inp, state)
        logit = tf.nn.bias_add(tf.matmul(rnn_out, softmax_w), softmax_b)
        fake = tf.argmax(logit, axis=1, output_type=tf.int32)
        baseline.append(fake)
        rnn_inp = fake
    baseline = tf.stack(baseline, axis=1)

    return sequence, logits, log_probs, baseline
def get_matrix_tree(r, A):
    L = tf.reduce_sum(A, 1)
    L = tf.matrix_diag(L)
    L = L - A

    r_diag = tf.matrix_diag(r)
    LL = L + r_diag

    LL_inv = tf.matrix_inverse(LL)  #batch_l, doc_l, doc_l
    LL_inv_diag_ = tf.matrix_diag_part(LL_inv)

    d0 = tf.multiply(r, LL_inv_diag_)

    LL_inv_diag = tf.expand_dims(LL_inv_diag_, 2)

    tmp1 = tf.multiply(A, tf.matrix_transpose(LL_inv_diag))
    tmp2 = tf.multiply(A, tf.matrix_transpose(LL_inv))

    d = tmp1 - tmp2
    d = tf.concat([tf.expand_dims(d0, [1]), d], 1)
    return d
Example #24
0
def _expectation(p, lin_kern, feat1, rbf_kern, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - K_lin_{.,.} :: Linear kernel
        - K_rbf_{.,.} :: RBF kernel
    Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint
    active_dims, in which case the joint expectations simplify into a product of expectations

    :return: NxM1xM2
    """
    return tf.matrix_transpose(expectation(p, (rbf_kern, feat2), (lin_kern, feat1)))
Example #25
0
def _expectation(p, lin_kern, feat1, rbf_kern, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - K_lin_{.,.} :: Linear kernel
        - K_rbf_{.,.} :: RBF kernel
    Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint
    active_dims, in which case the joint expectations simplify into a product of expectations

    :return: NxM1xM2
    """
    return tf.matrix_transpose(expectation(p, (rbf_kern, feat2), (lin_kern, feat1)))
Example #26
0
    def __affine_image(self,imgs,r,t):
        # The Tensor [imgs].format is [NHWC]
        r = tf.matrix_inverse(r)
        r = tf.matrix_transpose(r)

        rm = tf.reshape(tf.pad(r, [[0, 0], [0, 0], [0, 1]], mode='CONSTANT'), [-1, 6])
        rm = tf.pad(rm, [[0, 0], [0, 2]], mode='CONSTANT')

        tm = tf.contrib.image.translations_to_projective_transforms(tf.reshape(t, [-1, 2]))
        rtm = tf.contrib.image.compose_transforms(rm, tm)

        return tf.contrib.image.transform(imgs, rtm, "BILINEAR")
Example #27
0
    def forward_pass_parallel(self, embed, weights_input, biases_input,
                              weights_input2, biases_input2, weights_input3,
                              biases_input3, weights_output):
        """

        :param embed:
        :param weights:
        :param biases:
        :return:
        """
        """
        =======================================================

        two layer forward pass function
        =======================================================
        """
        print('forwadpass inputs')

        print(embed, weights_input, biases_input, weights_input2,
              biases_input2, weights_input3, biases_input3, weights_output)

        embed1, embed2, embed3 = tf.split(embed, [
            Config.embedding_size * Config.n_Tokens1, Config.embedding_size *
            Config.n_Tokens2, Config.embedding_size * Config.n_Tokens3
        ], 1)
        print(embed1, embed2, embed3)

        layer1 = tf.add(tf.matmul(weights_input, tf.transpose(embed1)),
                        biases_input)
        layer1 = tf.math.pow(layer1, 3.0)
        #layer1 = tf.math.tanh(layer1, name = 'tanh_activation1')

        layer2 = tf.add(tf.matmul(weights_input2, tf.transpose(embed2)),
                        biases_input2)
        layer2 = tf.math.pow(layer2, 3.0)
        #layer2 = tf.math.tanh(layer2, name = 'tanh_activation2')

        layer3 = tf.add(tf.matmul(weights_input3, tf.transpose(embed3)),
                        biases_input2)
        layer3 = tf.math.pow(layer3, 3.0)
        #layer3 = tf.math.tanh(layer3 , name = 'tanh_activation3')

        print(layer1, layer2, layer3)
        layer123 = layer1 + layer2 + layer3  #tf.concat([layer1, layer2,layer3], 0)
        print('--------')
        print('layer123', layer123)
        print('--------')
        print('weights_output', weights_output)
        p = tf.matrix_transpose(tf.matmul(weights_output, layer123))
        print('--------')
        print('p', p)

        return p
Example #28
0
    def LSTM_layer(self, variables_scope, x1, x2):
        with tf.variable_scope(variables_scope) as scope:
            #reconstruct squeeze data
            x1 = self.squeeze_data(x1)
            x2 = self.squeeze_data(x2)

            #input data to birnn
            L_rnn = self.birnn_x1(x1)
            R_rnn = self.birnn_x1(x2, reuse=True)
            _length = len(L_rnn)
            L_rnn = tf.transpose(L_rnn, [1, 0, 2])
            R_rnn = tf.transpose(R_rnn, [1, 0, 2])
            expend_L_rnn = tf.expand_dims(L_rnn, -1)
            trans_L_rnn = tf.transpose(expend_L_rnn, [0, 2, 1, 3])

            expend_R_rnn = tf.expand_dims(R_rnn, -1)
            trans_R_rnn = tf.transpose(expend_R_rnn, [0, 2, 1, 3])

            #attention matrix
            with tf.name_scope("rnn_att_mat"):
                aW = tf.get_variable(name="aW",
                                     shape=(_length, self.lstm_cell * 2))
                att_mat = self.att_mat(trans_L_rnn, trans_R_rnn)
                x1_a = tf.transpose(
                    tf.matrix_transpose(tf.einsum("ijk,kl->ijl", att_mat, aW)),
                    [0, 2, 1])
                x2_a = tf.transpose(
                    tf.matrix_transpose(
                        tf.einsum("ijk,kl->ijl",
                                  tf.transpose(att_mat, [0, 2, 1]), aW)),
                    [0, 2, 1])
            # attention layer
            L_att = self.Attention_1(L_rnn, )
            R_att = self.Attention_1(R_rnn, reuse=True)
            with tf.variable_scope("att2att"):
                L_att_2 = self.Attention_2(x1_a)
                R_att_2 = self.Attention_2(x2_a, reuse=True)

            return L_att, R_att, L_att_2, R_att_2
        pass
def batch_bilinear(x, weights_w, weights_h):
	# x: [ batch_size, channels, height, width ]
	x_shape = x.get_shape().as_list()

	# coords_w: [ batch_size, channels, width ]
	# coords_h: [ batch_size, channels, height ]
	coords_w = weights_to_coords(weights_w)
	coords_h = weights_to_coords(weights_h)
	coords_w = tf.identity(coords_w, name='coords_w')
	coords_h = tf.identity(coords_h, name='coords_h')
	tf.add_to_collection('70f92c137c01d89c6477c5ef22411bfe', [coords_w, coords_h])

	# idx__ : [ batch_size, channels, _, 2 ], 2 = (#batch, #channel)
	mesh = tf.meshgrid(tf.range(x_shape[1]), tf.range(x_shape[0]))
	idx = tf.expand_dims(tf.stack([mesh[1], mesh[0]],-1), 2)
	idx_h = tf.tile(idx, [1, 1, x_shape[2], 1])
	idx_w = tf.tile(idx, [1, 1, x_shape[3], 1])

	coords_0_ = tf.concat([idx_h, tf.expand_dims(tf.cast(tf.floor(coords_h), tf.int32), -1)], -1)
	coords_1_ = tf.concat([idx_h, tf.expand_dims(tf.cast(tf.ceil(coords_h), tf.int32), -1)], -1)
	coords__0 = tf.concat([idx_w, tf.expand_dims(tf.cast(tf.floor(coords_w), tf.int32), -1)], -1)
	coords__1 = tf.concat([idx_w, tf.expand_dims(tf.cast(tf.ceil(coords_w), tf.int32), -1)], -1)

	vals_0_ = tf.matrix_transpose(tf.gather_nd(x, coords_0_))
	vals_1_ = tf.matrix_transpose(tf.gather_nd(x, coords_1_))

	vals_00 = tf.gather_nd(vals_0_, coords__0)
	vals_01 = tf.gather_nd(vals_0_, coords__1)
	vals_10 = tf.gather_nd(vals_1_, coords__0)
	vals_11 = tf.gather_nd(vals_1_, coords__1)

	coords_x = tf.expand_dims(coords_w - tf.floor(coords_w), 3)
	coords_y = tf.expand_dims(coords_h - tf.floor(coords_h), 2)

	vals = vals_00 + \
				 (vals_10 - vals_00) * coords_x + \
				 (vals_01 - vals_00) * coords_y + \
				 (vals_11 + vals_00 - vals_10 - vals_01) * coords_x * coords_y

	return vals
Example #30
0
    def __net_load_constants(self, variable_scope_name):
        ###define some constants
        view_mat_for_normal_init = tf.constant_initializer(self.mat_for_normal)
        view_mat_for_normal = tf.get_variable(
            name="view_mat_for_normal",
            dtype=tf.float32,
            shape=self.mat_for_normal.shape,
            trainable=False,
            initializer=view_mat_for_normal_init)
        view_mat_for_normal_t = tf.matrix_transpose(view_mat_for_normal)
        view_mat_model_init = tf.constant_initializer(self.mat_model)
        view_mat_model = tf.get_variable(name="view_mat_model",
                                         dtype=tf.float32,
                                         shape=self.mat_model.shape,
                                         trainable=False,
                                         initializer=view_mat_model_init)
        view_mat_model_t = tf.matrix_transpose(view_mat_model)
        cam_pos_init = tf.constant_initializer(self.cam_pos)
        cam_pos = tf.get_variable(name="cam_pos",
                                  dtype=tf.float32,
                                  shape=self.cam_pos.shape,
                                  trainable=False,
                                  initializer=cam_pos_init)  #shape=[3]
        self.endPoints[variable_scope_name + "cam_pos"] = cam_pos

        light_normals_init = tf.constant_initializer(self.light_normals)
        light_normals = tf.get_variable(name="light_normals",
                                        dtype=tf.float32,
                                        shape=self.light_normals.shape,
                                        trainable=False,
                                        initializer=light_normals_init)
        light_poses_init = tf.constant_initializer(self.light_poses)
        light_poses = tf.get_variable(name="light_poses",
                                      dtype=tf.float32,
                                      shape=self.light_poses.shape,
                                      trainable=False,
                                      initializer=light_poses_init)
        self.endPoints[variable_scope_name + "light_poses"] = light_poses

        return view_mat_for_normal_t, view_mat_model_t, light_normals, light_poses, cam_pos
    def __init__(self, hparams, iterator, cv=None):
        self.hparams = hparams
        self.iterator = iterator

        #To compute RNN vectors, we need W and rnn_cell and dynamic_rnn
        self.W = tf.get_variable(
            'embeddings', shape=[self.hparams.size_vocab, self.hparams.d])

        txt1_vectors = tf.nn.embedding_lookup(self.W, self.iterator.txt1)

        rnn_cell = rnn.BasicLSTMCell(self.hparams.d, self.hparams.forget_bias)
        with tf.variable_scope('rnn'):
            _, state_txt1 = tf.nn.dynamic_rnn(
                cell=rnn_cell,
                inputs=txt1_vectors,
                sequence_length=self.iterator.len_txt1,
                dtype=tf.float32)
        vec_txt1 = state_txt1.h
        self.vec_txt1 = vec_txt1

        if cv is not None:
            self.M = tf.Variable(tf.eye(self.hparams.d), name='M')
            txt2_vectors = tf.nn.embedding_lookup(self.W, self.iterator.txt2)
            with tf.variable_scope('rnn', reuse=True):
                _, state_txt2 = tf.nn.dynamic_rnn(
                    cell=rnn_cell,
                    inputs=txt2_vectors,
                    sequence_length=self.iterator.len_txt2,
                    dtype=tf.float32)
            vec_txt2 = state_txt2.h
            self.saver = tf.train.Saver(tf.global_variables())

            self.WC = tf.get_variable('candidate_vectors',
                                      shape=[cv.shape[0], cv.shape[1]])
            self.WC_assign = tf.assign(self.WC, cv)

            self.candidate_vectors = tf.nn.embedding_lookup(
                self.WC, self.iterator.indexes)

            #Concatenate bs x 1 x d with bs x NC x d; Result bs x NC+1 x d
            self.gt_with_candidate_vectors = tf.concat([
                tf.reshape(vec_txt2, [-1, 1, self.hparams.d]),
                self.candidate_vectors
            ], 1)
            scores = tf.matmul(
                tf.reshape(tf.matmul(vec_txt1, self.M),
                           [-1, 1, self.hparams.d]),
                tf.matrix_transpose(self.gt_with_candidate_vectors))

            self.scores = tf.reshape(scores, [tf.shape(vec_txt1)[0], -1])
        else:
            self.saver = tf.train.Saver(tf.global_variables())
Example #32
0
def unpack_smm(theta_smm, name='unpack_theta_smm'):
    # extract point-estimates for Student-t mixture components

    with tf.name_scope(name):
        mu, L_k_raw = theta_smm

        # make sure that L is a valid Cholesky decomposition and compute scaling matrix
        with tf.name_scope('compute_prec'):
            L_k = tf.linalg.LinearOperatorLowerTriangular(L_k_raw, name='to_triL').to_dense()
            L_k = tf.matrix_set_diag(L_k, tf.nn.softplus(tf.matrix_diag_part(L_k), name='softplus_diag'), name='L')
            Sigma = tf.matmul(L_k, tf.matrix_transpose(L_k), name='precision')

        return tf.tuple((mu, Sigma), name='theta_smm_unpacked')
Example #33
0
    def prior_fn(latent_dimension):
        cov_init = util.positive_definate_initializer([10] +
                                                      [latent_dimension] * 2)
        eigvals = tf.self_adjoint_eig(
            tf.divide(cov_init + tf.matrix_transpose(cov_init),
                      2.,
                      name='symmetrised'))[0]
        cov_init = tf.Print(cov_init, [cov_init])

        return parameterized_distributions.gmm.GMM(10,
                                                   latent_dimension,
                                                   cov_init=cov_init,
                                                   trainable=True).model
 def __call__(
     self,
     x_1,
     x_2,
     reuse=False,
 ):
     with tf.variable_scope(self.name) as scope:
         if reuse:
             scope.reuse_variables()
         euclidean = tf.sqrt(
             tf.reduce_sum(tf.square(x_1 - tf.matrix_transpose(x_2)),
                           axis=1))
         return 1 / (1 + euclidean)
Example #35
0
    def get_pdist2(self, X, Y):

        if X.shape.ndims == 1:
            X = X[None, :]
        if Y.shape.ndims == 1:
            Y = Y[None, :]
        assert X.shape[1] == Y.shape[1]
        pdist2 = tf.reduce_sum(tf.square(X), axis=1, keep_dims=True)
        pdist2 -= 2.0 * tf.matmul(X, Y, transpose_b=True)
        pdist2 += tf.matrix_transpose(
            tf.reduce_sum(tf.square(Y), axis=1, keep_dims=True))
        self.pdist2 = pdist2
        return pdist2
def mult_mod(M,N,left_right):
    tensor_shape = M.shape
    dims = N.shape 
    if left_right == 'r':
        #M tensor of size (batch_size, n, m)
        #N tensor of size (m, p)
        n = tensor_shape[1].value
        m = dims[0]
        p = dims[1]
        y = tf.reshape(tf.reshape(M, [-1, m]) @ N, [-1, n, p])
    elif left_right == 'l':
        #M tensor of size (batch_size, n, m)
        #N tensor of size (p, n)
        m = tensor_shape[2].value
        p = dims[0]
        n = dims[1]        
        MT = tf.matrix_transpose(M)
        NT = tf.matrix_transpose(N)
        MTNT = tf.reshape(tf.reshape(MT, [-1, n]) @ NT, [-1, m, p])
        y = tf.matrix_transpose(MTNT)
    
    return(y)
Example #37
0
 def testBatchMatrixDynamicallyDefined(self):
   matrix_0 = [[1, 2, 3], [4, 5, 6]]
   matrix_0_t = [[1, 4], [2, 5], [3, 6]]
   matrix_1 = [[11, 22, 33], [44, 55, 66]]
   matrix_1_t = [[11, 44], [22, 55], [33, 66]]
   batch_matrix = [matrix_0, matrix_1]  # Shape (2, 2, 3)
   expected_transposed = [matrix_0_t, matrix_1_t]  # Shape (2, 3, 2)
   with self.test_session():
     batch_matrix_ph = tf.placeholder(tf.int32)
     transposed = tf.matrix_transpose(batch_matrix_ph)
     self.assertAllEqual(
         expected_transposed,
         transposed.eval(feed_dict={batch_matrix_ph: batch_matrix}))
Example #38
0
    def __init__(self, tensor_x, tensor_y):
        """
        :param tensor_x: (..., channel_x, sample)
        :param tensor_y: (..., channel_y, sample)
        """
        self.x = tensor_x
        self.y = tensor_y

        self.mean_x = tf.reduce_mean(tensor_x, axis=-1, keepdims=True)
        self.mean_y = tf.reduce_mean(tensor_y, axis=-1, keepdims=True)

        x_ = self.x - self.mean_x
        y_ = self.y - self.mean_y

        s_xx = tf.matmul(x_, tf.matrix_transpose(x_))
        s_yy = tf.matmul(y_, tf.matrix_transpose(y_))
        s_xy = tf.matmul(x_, tf.matrix_transpose(y_))
        s_yx = tf.matmul(y_, tf.matrix_transpose(x_))

        self.M = tf.linalg.inv(s_xx) @ s_xy @ tf.linalg.inv(s_yy) @ s_yx

        self.rho = tf.linalg.trace(self.M)  # \sum\rho^2
Example #39
0
def AffineTransformLayer(imgs, r, t):
    r = tf.matrix_inverse(r)
    r = tf.matrix_transpose(r)

    rm = tf.reshape(tf.pad(r, [[0, 0], [0, 0], [0, 1]], mode='CONSTANT'),
                    [-1, 6])
    rm = tf.pad(rm, [[0, 0], [0, 2]], mode='CONSTANT')

    tm = tf.contrib.image.translations_to_projective_transforms(
        tf.reshape(t, [-1, 2]))
    rtm = tf.contrib.image.compose_transforms(rm, tm)

    return tf.contrib.image.transform(imgs, rtm, "BILINEAR")
Example #40
0
def gram(layer, factor):
    """ Get style with gram matrix.
    layer with shape(batch, height, weight, channels) of activations.
    """
    shape = tf.shape(layer)
    num_images = shape[0]
    num_filters = shape[3]
    size = tf.size(layer)
    filters = tf.reshape(layer, tf.stack([num_images, -1, num_filters]))
    grams = tf.matmul(tf.matrix_transpose(filters), filters) / tf.to_float(
        size / factor)  # FLAGS.batch_size)

    return grams
Example #41
0
def compute_adjacency_matrix(hidden_features, inputs_data_label, num_task):
    new_hidden_features = change_datastruct(hidden_features, num_task)
    new_inputs_data_label = change_datastruct(inputs_data_label, num_task)
    adjacency_matrixs = []
    for i in range(num_task):
        dist_matrix = -compute_pairwise_dist_tf(new_hidden_features[i])
        sign_matrix = 2 * tf.matmul(
            new_inputs_data_label[i],
            tf.matrix_transpose(new_inputs_data_label[i])) - 1
        adjacency_matrix = tf.exp(dist_matrix) * sign_matrix
        adjacency_matrixs.append(adjacency_matrix)
    adjacency_matrixs = tf.stack(adjacency_matrixs)
    return adjacency_matrixs
Example #42
0
def gp_conditional(z, fz, x, full_cov, kernel, Kzz_chol=None):
    '''
    GP gp_conditional f(x) | f(z)==fz
    :param z: shape [n_z, n_covariates]
    :param fz: shape [n_particles, n_z]
    :param x: shape [n_x, n_covariates]
    :return: a distribution with shape [n_particles, n_x]
    '''
    n_z = int(z.shape[0])
    n_particles = tf.shape(fz)[0]

    if Kzz_chol is None:
        Kzz_chol = tf.cholesky(kernel(z, z))

    # Mean[fx|fz] = Kxz @ inv(Kzz) @ fz; Cov[fx|z] = Kxx - Kxz @ inv(Kzz) @ Kzx
    # With ill-conditioned Kzz, the inverse is often asymmetric, which
    # breaks further cholesky decomposition. We compute a symmetric one.
    Kzz_chol_inv = tf.matrix_triangular_solve(Kzz_chol, tf.eye(n_z))
    Kzz_inv = tf.matmul(tf.transpose(Kzz_chol_inv), Kzz_chol_inv)
    Kxz = kernel(x, z)  # [n_x, n_z]
    Kxziz = tf.matmul(Kxz, Kzz_inv)
    mean_fx_given_fz = tf.matmul(fz, tf.matrix_transpose(Kxziz))

    if full_cov:
        cov_fx_given_fz = kernel(x, x) - tf.matmul(Kxziz, tf.transpose(Kxz))
        cov_fx_given_fz = tf.tile(
            tf.expand_dims(tf.cholesky(cov_fx_given_fz), 0),
            [n_particles, 1, 1])
        fx_given_fz = zs.distributions.MultivariateNormalCholesky(
            mean_fx_given_fz, cov_fx_given_fz)
    else:
        # diag(AA^T) = sum(A**2, axis=-1)
        var = kernel.Kdiag(x) - \
            tf.reduce_sum(tf.matmul(
                Kxz, tf.matrix_transpose(Kzz_chol_inv)) ** 2, axis=-1)
        std = tf.sqrt(var)
        fx_given_fz = zs.distributions.Normal(
            mean=mean_fx_given_fz, std=std, group_ndims=1)
    return fx_given_fz
Example #43
0
  def create(self,
             fixed_embeddings,
             linked_embeddings,
             context_tensor_arrays,
             attention_tensor,
             during_training,
             stride=None):
    """Requires |stride|; otherwise see base class."""
    check.NotNone(stride,
                  'BiaffineDigraphNetwork requires "stride" and must be called '
                  'in the bulk feature extractor component.')

    # TODO(googleuser): Add dropout during training.
    del during_training

    # Retrieve (possibly averaged) weights.
    weights_arc = self._component.get_variable('weights_arc')
    weights_source = self._component.get_variable('weights_source')
    root = self._component.get_variable('root')

    # Extract the source and target token activations.  Use |stride| to collapse
    # batch and beam into a single dimension.
    sources = network_units.lookup_named_tensor('sources', linked_embeddings)
    targets = network_units.lookup_named_tensor('targets', linked_embeddings)
    source_tokens_bxnxs = tf.reshape(sources.tensor,
                                     [stride, -1, self._source_dim])
    target_tokens_bxnxt = tf.reshape(targets.tensor,
                                     [stride, -1, self._target_dim])
    num_tokens = tf.shape(source_tokens_bxnxs)[1]

    # Compute the arc, source, and root potentials.
    arcs_bxnxn = digraph_ops.ArcPotentialsFromTokens(
        source_tokens_bxnxs, target_tokens_bxnxt, weights_arc)
    sources_bxnxn = digraph_ops.ArcSourcePotentialsFromTokens(
        source_tokens_bxnxs, weights_source)
    roots_bxn = digraph_ops.RootPotentialsFromTokens(
        root, target_tokens_bxnxt, weights_arc, weights_source)

    # Combine them into a single matrix with the roots on the diagonal.
    adjacency_bxnxn = digraph_ops.CombineArcAndRootPotentials(
        arcs_bxnxn + sources_bxnxn, roots_bxn)

    # The adjacency matrix currently has sources on rows and targets on columns,
    # but we want targets on rows so that maximizing within a row corresponds to
    # selecting sources for a given target.
    adjacency_bxnxn = tf.matrix_transpose(adjacency_bxnxn)

    return [tf.reshape(adjacency_bxnxn, [-1, num_tokens])]
  def _updated_mat(self, mat, v, diag):
    # Get dense matrix defined by its square root, which is an update of `mat`:
    # A = (mat + v D v^T) (mat + v D v^T)^T
    # D is the diagonal matrix with `diag` on the diagonal.

    # If diag is None, then it defaults to the identity matrix, so DV^T = V^T
    if diag is None:
      diag_vt = tf.matrix_transpose(v)
    else:
      diag_mat = tf.matrix_diag(diag)
      diag_vt = tf.matmul(diag_mat, v, adjoint_b=True)

    v_diag_vt = tf.matmul(v, diag_vt)
    sqrt = mat + v_diag_vt
    a = tf.matmul(sqrt, sqrt, adjoint_b=True)
    return a.eval()
Example #45
0
def _conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False):
    """
    Multi-output GP with independent GP priors.
    Number of latent processes equals the number of outputs (L = P).

    The covariance matrices used to calculate the conditional have the following shape:
    - Kuu: P x M x M
    - Kuf: P x M x N
    - Kff: P x N or P x N x N

    Further reference
    -----------------
    - See `gpflow.conditionals._conditional` for a detailed explanation of
      conditional in the single-output case.
    - See the multiouput notebook for more information about the multiouput framework.
    - See above for the parameters and the return value.
    """

    logger.debug("conditional: object, SharedIndependentMof, SeparateIndependentMok, object")
    # Following are: P x M x M  -  P x M x N  -  P x N(x N)
    Kmms = Kuu(feat, kern, jitter=settings.numerics.jitter_level)  # P x M x M
    Kmns = Kuf(feat, kern, Xnew)  # P x M x N
    kern_list = kern.kernels if isinstance(kern, Combination) else [kern.kern] * len(feat.feat_list)
    Knns = tf.stack([k.K(Xnew) if full_cov else k.Kdiag(Xnew) for k in kern_list], axis=0)
    fs = tf.transpose(f)[:, :, None]  # P x M x 1
    # P x 1 x M x M  or  P x M x 1
    q_sqrts = tf.transpose(q_sqrt)[:, :, None] if q_sqrt.shape.ndims == 2 else q_sqrt[:, None, :, :]

    def single_gp_conditional(t):
        Kmm, Kmn, Knn, f, q_sqrt = t
        return base_conditional(Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=white)

    rmu, rvar = tf.map_fn(single_gp_conditional,
                          (Kmms, Kmns, Knns, fs, q_sqrts),
                          (settings.float_type, settings.float_type))  # P x N x 1, P x 1 x N x N or P x N x 1

    fmu = tf.matrix_transpose(rmu[:, :, 0])  # N x P

    if full_cov:
        fvar = rvar[:, 0, :, :]  # P x N x N
    else:
        fvar = tf.transpose(rvar[..., 0])  # N x P

    return fmu, _expand_independent_outputs(fvar, full_cov, full_output_cov)
Example #46
0
 def _arccosine(self, slist1, slist2, tf_embs):
     """
     Uses an arccosine kernel of degree 0 to calculate
     the similarity matrix between two vectors of embeddings. 
     This is just cosine similarity projected into the [0,1] interval.
     """
     dot = self._dot(slist1, slist2, tf_embs)
     # This calculation corresponds to an arc-cosine with 
     # degree 0. It can be interpreted as cosine
     # similarity but projected into a [0,1] interval.
     # TODO: arc-cosine with degree 1.
     tf_pi = tf.constant(np.pi, dtype=tf.float64)
     tf_norms = tf.constant(self.norms, dtype=tf.float64, name='norms')
     normlist1 = tf.gather(tf_norms, slist1, name='normlist1')
     normlist2 = tf.matrix_transpose(tf.gather(tf_norms, slist2, name='normlist2'))
     norms = tf.batch_matmul(normlist1, normlist2)
     cosine = tf.clip_by_value(tf.truediv(dot, norms), -1, 1)
     angle = tf.acos(cosine)
     angle = tf.select(tf.is_nan(angle), tf.ones_like(angle) * tf_pi, angle)
     return 1 - (angle / tf_pi)
Example #47
0
    def K(self, X, X2=None, presliced=False):
        if not presliced:
            X, X2 = self._slice(X, X2)

        X_denominator = tf.sqrt(self._weighted_product(X))
        if X2 is None:
            X2 = X
            X2_denominator = X_denominator
        else:
            X2_denominator = tf.sqrt(self._weighted_product(X2))

        numerator = self._weighted_product(X, X2)
        X_denominator = tf.expand_dims(X_denominator, -1)
        X2_denominator = tf.matrix_transpose(tf.expand_dims(X2_denominator, -1))
        cos_theta = numerator / X_denominator / X2_denominator
        jitter = 1e-15
        theta = tf.acos(jitter + (1 - 2 * jitter) * cos_theta)

        return self.variance * (1. / np.pi) * self._J(theta) * \
               X_denominator ** self.order * \
               X2_denominator ** self.order
Example #48
0
 def _validate_correlationness(self, x):
   if not self.validate_args:
     return x
   checks = [
       tf.assert_less_equal(
           tf.cast(-1., dtype=x.dtype.base_dtype),
           x,
           message='Correlations must be >= -1.'),
       tf.assert_less_equal(
           x,
           tf.cast(1., x.dtype.base_dtype),
           message='Correlations must be <= 1.'),
       tf.assert_near(
           tf.matrix_diag_part(x),
           tf.cast(1., x.dtype.base_dtype),
           message='Self-correlations must be = 1.'),
       tf.assert_near(
           x, tf.matrix_transpose(x),
           message='Correlation matrices must be symmetric')
   ]
   with tf.control_dependencies(checks):
     return tf.identity(x)
Example #49
0
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <(\Sum_i K1_i_{Z1, x_n}) (\Sum_j K2_j_{x_n, Z2})>_p(x_n)
        - \Sum_i K1_i_{.,.}, \Sum_j K2_j_{.,.} :: Sum kernels

    :return: NxM1xM2
    """
    crossexps = []

    if kern1 == kern2 and feat1 == feat2:  # avoid duplicate computation by using transposes
        for i, k1 in enumerate(kern1.kernels):
            crossexps.append(expectation(p, (k1, feat1), (k1, feat1), nghp=nghp))

            for k2 in kern1.kernels[:i]:
                eKK = expectation(p, (k1, feat1), (k2, feat2), nghp=nghp)
                eKK += tf.matrix_transpose(eKK)
                crossexps.append(eKK)
    else:
        for k1, k2 in it.product(kern1.kernels, kern2.kernels):
            crossexps.append(expectation(p, (k1, feat1), (k2, feat2), nghp=nghp))

    return functools.reduce(tf.add, crossexps)
def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed):
  """Returns a uniformly random `Tensor` of "correlation-like" matrices.

  A "correlation-like" matrix is a symmetric square matrix with all entries
  between -1 and 1 (inclusive) and 1s on the main diagonal.  Of these,
  the ones that are positive semi-definite are exactly the correlation
  matrices.

  Args:
    num_rows: Python `int` dimension of the correlation-like matrices.
    batch_shape: `Tensor` or Python `tuple` of `int` shape of the
      batch to return.
    dtype: `dtype` of the `Tensor` to return.
    seed: Random seed.

  Returns:
    matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]`
      and dtype `dtype`.  Each entry is in [-1, 1], and each matrix
      along the bottom two dimensions is symmetric and has 1s on the
      main diagonal.
  """
  num_entries = num_rows * (num_rows + 1) / 2
  ones = tf.ones(shape=[num_entries], dtype=dtype)
  # It seems wasteful to generate random values for the diagonal since
  # I am going to throw them away, but `fill_triangular` fills the
  # diagonal, so I probably need them.
  # It's not impossible that it would be more efficient to just fill
  # the whole matrix with random values instead of messing with
  # `fill_triangular`.  Then would need to filter almost half out with
  # `matrix_band_part`.
  unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed)
  tril = util.fill_triangular(unifs)
  symmetric = tril + tf.matrix_transpose(tril)
  diagonal_ones = tf.ones(
      shape=util.pad(batch_shape, axis=0, back=True, value=num_rows),
      dtype=dtype)
  return tf.matrix_set_diag(symmetric, diagonal_ones)
Example #51
0
def lanczos_bidiag(operator,
                   k,
                   orthogonalize=True,
                   starting_vector=None,
                   name="lanczos_bidiag"):
  """Computes a Lanczos bidiagonalization for a linear operator.

  Computes matrices `U` of shape `[m, k+1]`, `V` of shape `[n, k]` and lower
  bidiagonal matrix `B` of shape `[k+1, k]`, that satisfy the equations
  `A * V = U * B` and `A' * U[:, :-1] = V * B[:-1, :]'`.

  The columns of `U` are orthonormal and form a basis for the Krylov subspace
  `K(A*A', U[:,0])`.

  The columns of `V` are orthonormal and form a basis for the Krylov subspace
  `K(A'*A, A' U[:,0])`.

  Args:
    operator: An object representing a linear operator with attributes:
      - shape: Either a list of integers or a 1-D `Tensor` of type `int32` of
        length 2. `shape[0]` is the dimension on the domain of the operator,
        `shape[1]` is the dimension of the co-domain of the operator. On other
        words, if operator represents an M x N matrix A, `shape` must contain
        `[M, N]`.
      - dtype: The datatype of input to and output from `apply` and
        `apply_adjoint`.
      - apply: Callable object taking a vector `x` as input and returning a
        vector with the result of applying the operator to `x`, i.e. if
       `operator` represents matrix `A`, `apply` should return `A * x`.
      - apply_adjoint: Callable object taking a vector `x` as input and
        returning a vector with the result of applying the adjoint operator
        to `x`, i.e. if `operator` represents matrix `A`, `apply_adjoint` should
        return `conj(transpose(A)) * x`.
    k: An integer or a scalar Tensor of type `int32`. Determines the maximum
      number of steps to run. If an invariant subspace is found, the algorithm
      may terminate before `k` steps have been run.
    orthogonalize: If `True`, perform full orthogonalization. If `False` no
      orthogonalization is performed.
    starting_vector: If not null, must be a `Tensor` of shape `[n]`.
    name: A name scope for the operation.

  Returns:
    output: A namedtuple representing a Lanczos bidiagonalization of
      `operator` with attributes:
      u: A rank-2 `Tensor` of type `operator.dtype` and shape
        `[operator.shape[0], k_actual+1]`, where `k_actual` is the number of
        steps run.
      v: A rank-2 `Tensor` of type `operator.dtype` and shape
        `[operator.shape[1], k_actual]`, where `k_actual` is the number of steps
        run.
      alpha: A rank-1 `Tensor` of type `operator.dtype` and shape `[k]`.
      beta: A rank-1 `Tensor` of type `operator.dtype` and shape `[k]`.
  """

  def tarray(size, dtype, name):
    return tf.TensorArray(
        dtype=dtype,
        size=size,
        tensor_array_name=name,
        clear_after_read=False)

  # Reads a row-vector at location i in tarray and returns it as a
  # column-vector.
  def read_colvec(tarray, i):
    return tf.expand_dims(tarray.read(i), -1)

  # Writes an column-vector as a row-vecor at location i in tarray.
  def write_colvec(tarray, colvec, i):
    return tarray.write(i, tf.squeeze(colvec))

  # Ephemeral class holding Lanczos bidiagonalization state:
  #   u = left Lanczos vectors
  #   v = right Lanczos vectors
  #   alpha = diagonal of B_k.
  #   beta = subdiagonal of B_k.
  # Notice that we store the left and right Lanczos vectors as the _rows_
  # of u and v. This is done because tensors are stored row-major and
  # TensorArray only supports packing along dimension 0.
  lanzcos_bidiag_state = collections.namedtuple("LanczosBidiagState",
                                                ["u", "v", "alpha", "beta"])

  def update_state(old, i, u, v, alpha, beta):
    return lanzcos_bidiag_state(
        write_colvec(old.u, u, i + 1),
        write_colvec(old.v, v, i),
        old.alpha.write(i, alpha),
        old.beta.write(i, beta))

  def gram_schmidt_step(j, basis, v):
    """Makes v orthogonal to the j'th vector in basis."""
    v_shape = v.get_shape()
    basis_vec = read_colvec(basis, j)
    v -= tf.batch_matmul(basis_vec, v, adj_x=True) * basis_vec
    v.set_shape(v_shape)
    return j + 1, basis, v

  def orthogonalize_once(i, basis, v):
    j = tf.constant(0, dtype=tf.int32)
    _, _, v = tf.while_loop(lambda j, basis, v: j < i, gram_schmidt_step,
                            [j, basis, v])
    return util.l2normalize(v)

  # Iterated modified Gram-Schmidt orthogonalization adapted from PROPACK.
  # TODO(rmlarsen): This is possibly the slowest implementation of
  # iterated Gram-Schmidt orthogonalization since the abacus. Move to C++.
  def orthogonalize_(i, basis, v):
    v_norm = util.l2norm(v)
    v_new, v_new_norm = orthogonalize_once(i, basis, v)
    # If the norm decreases more than 1/sqrt(2), run a second
    # round of MGS. See proof in:
    #   B. N. Parlett, ``The Symmetric Eigenvalue Problem'',
    #   Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109
    return tf.cond(v_new_norm < 0.7071 * v_norm,
                   lambda: orthogonalize_once(i, basis, v),
                   lambda: (v_new, v_new_norm))

  def stopping_criterion(i, _):
    # TODO(rmlarsen): Stop if an invariant subspace is detected.
    return i < k

  def lanczos_bidiag_step(i, ls):
    """Extends the Lanczos bidiagonalization ls by one step."""
    u = read_colvec(ls.u, i)
    r = operator.apply_adjoint(u)
    # The shape inference doesn't work across cond, save and reapply the shape.
    r_shape = r.get_shape()
    r = tf.cond(
        i > 0,
        lambda: r - ls.beta.read(i - 1) * read_colvec(ls.v, i - 1),
        lambda: r)
    r.set_shape(r_shape)
    if orthogonalize:
      v, alpha = orthogonalize_(i - 1, ls.v, r)
    else:
      v, alpha = util.l2normalize(r)
    p = operator.apply(v) - alpha * u
    if orthogonalize:
      u, beta = orthogonalize_(i, ls.u, p)
    else:
      u, beta = util.l2normalize(p)

    return i + 1, update_state(ls, i, u, v, alpha, beta)

  with tf.name_scope(name):
    dtype = operator.dtype
    if starting_vector is None:
      starting_vector = tf.random_uniform(
          operator.shape[:1], -1, 1, dtype=dtype)
    u0, _ = util.l2normalize(starting_vector)
    ls = lanzcos_bidiag_state(
        u=write_colvec(tarray(k + 1, dtype, "u"), u0, 0),
        v=tarray(k, dtype, "v"),
        alpha=tarray(k, dtype, "alpha"),
        beta=tarray(k, dtype, "beta"))
    i = tf.constant(0, dtype=tf.int32)
    _, ls = tf.while_loop(stopping_criterion, lanczos_bidiag_step, [i, ls])
    return lanzcos_bidiag_state(
        tf.matrix_transpose(ls.u.pack()),
        tf.matrix_transpose(ls.v.pack()), ls.alpha.pack(), ls.beta.pack())
Example #52
0
def gen_decoder(hparams,
                inputs,
                targets,
                targets_present,
                encoding_state,
                is_training,
                is_validating,
                reuse=None):
  """Define the Decoder graph. The Decoder will now impute tokens that
      have been masked from the input seqeunce.
  """
  config = get_config()
  gen_decoder_rnn_size = hparams.gen_rnn_size

  if FLAGS.seq2seq_share_embedding:
    with tf.variable_scope('decoder/rnn', reuse=True):
      embedding = tf.get_variable('embedding',
                                  [FLAGS.vocab_size, gen_decoder_rnn_size])

  with tf.variable_scope('decoder', reuse=reuse):
    # Neural architecture search cell.
    cell = custom_cell.Alien(config.hidden_size)

    if is_training:
      [h2h_masks, _, _,
       output_mask] = variational_dropout.generate_variational_dropout_masks(
           hparams, config.keep_prob)
    else:
      output_mask = None

    cell_gen = custom_cell.GenericMultiRNNCell([cell] * config.num_layers)

    # Hidden encoder states.
    hidden_vector_encodings = encoding_state[0]

    # Carry forward the final state tuple from the encoder.
    # State tuples.
    state_gen = encoding_state[1]

    if FLAGS.attention_option is not None:
      (attention_keys, attention_values, _,
       attention_construct_fn) = attention_utils.prepare_attention(
           hidden_vector_encodings,
           FLAGS.attention_option,
           num_units=gen_decoder_rnn_size,
           reuse=reuse)

    with tf.variable_scope('rnn'):
      sequence, logits, log_probs = [], [], []

      if not FLAGS.seq2seq_share_embedding:
        embedding = tf.get_variable('embedding',
                                    [FLAGS.vocab_size, gen_decoder_rnn_size])
      softmax_w = tf.matrix_transpose(embedding)
      softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size])

      rnn_inputs = tf.nn.embedding_lookup(embedding, inputs)

      if is_training and FLAGS.keep_prob < 1:
        rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob)

      for t in xrange(FLAGS.sequence_length):
        if t > 0:
          tf.get_variable_scope().reuse_variables()

        # Input to the Decoder.
        if t == 0:
          # Always provide the real input at t = 0.
          rnn_inp = rnn_inputs[:, t]

        # If the input is present, read in the input at t.
        # If the input is not present, read in the previously generated.
        else:
          real_rnn_inp = rnn_inputs[:, t]
          fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake)

          # While validating, the decoder should be operating in teacher
          # forcing regime.  Also, if we're just training with cross_entropy
          # use teacher forcing.
          if is_validating or (is_training and
                               FLAGS.gen_training_strategy == 'cross_entropy'):
            rnn_inp = real_rnn_inp
          else:
            rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp,
                               fake_rnn_inp)

        if is_training:
          state_gen = list(state_gen)
          for layer_num, per_layer_state in enumerate(state_gen):
            per_layer_state = LSTMTuple(
                per_layer_state[0], per_layer_state[1] * h2h_masks[layer_num])
            state_gen[layer_num] = per_layer_state

        # RNN.
        rnn_out, state_gen = cell_gen(rnn_inp, state_gen)

        if is_training:
          rnn_out = output_mask * rnn_out

        if FLAGS.attention_option is not None:
          rnn_out = attention_construct_fn(rnn_out, attention_keys,
                                           attention_values)
        #   # TODO(liamfedus): Assert not "monotonic" attention_type.
        #   # TODO(liamfedus): FLAGS.attention_type.
        #   context_state = revised_attention_utils._empty_state()
        #   rnn_out, context_state = attention_construct_fn(
        #       rnn_out, attention_keys, attention_values, context_state, t)
        logit = tf.matmul(rnn_out, softmax_w) + softmax_b

        # Output for Decoder.
        # If input is present:   Return real at t+1.
        # If input is not present:  Return fake for t+1.
        real = targets[:, t]

        categorical = tf.contrib.distributions.Categorical(logits=logit)
        fake = categorical.sample()
        log_prob = categorical.log_prob(fake)

        output = tf.where(targets_present[:, t], real, fake)

        # Add to lists.
        sequence.append(output)
        log_probs.append(log_prob)
        logits.append(logit)

  return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack(
      log_probs, axis=1))
  def testMultivariateFromScalarBatchScalarEvent(self):
    with self.test_session() as sess:
      shift = np.array([-1, 0, 1], dtype=np.float32)
      scale = la.LinearOperatorTriL(
          [[[-1., 0, 0],
            [2, 1, 0],
            [3, 2, 1]],
           [[2, 0, 0],
            [3, -2, 0],
            [4, 3, 2]]],
          is_non_singular=True,
          is_positive_definite=False)

      # Overriding shapes must be compatible w/bijector; most bijectors are
      # batch_shape agnostic and only care about event_ndims.
      # In the case of `Affine`, if we got it wrong then it would fire an
      # exception due to incompatible dimensions.
      fake_mvn = ds.TransformedDistribution(
          distribution=ds.Normal(mu=0., sigma=1.),
          bijector=bs.AffineLinearOperator(shift, scale),
          batch_shape=scale.batch_shape,               # [2]
          event_shape=[scale.domain_dimension.value],  # [3]
          validate_args=True)

      # Note: Affine ellided this tile.
      actual_mean = np.tile(shift, [2, 1])
      # Since LinOp.apply doesn't support `adjoint_b` nor composition,
      # we cannot do: scale.apply(scale, adjoint_b=True).eval()
      actual_cov = scale.apply(tf.matrix_transpose(scale.to_dense())).eval()

      actual_mvn = ds.MultivariateNormalFull(mu=actual_mean, sigma=actual_cov)

      # Ensure sample works by checking first, second moments.
      n = 5e3
      y = fake_mvn.sample(int(n), seed=0)
      sample_mean = tf.reduce_mean(y, 0)
      centered_y = tf.transpose(y - sample_mean, [1, 2, 0])
      sample_cov = tf.matmul(centered_y, centered_y, transpose_b=True) / n
      [sample_mean_, sample_cov_] = sess.run([sample_mean, sample_cov])
      self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1)
      self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1)

      # Ensure all other functions work as intended.
      x = fake_mvn.sample(5, seed=0).eval()
      self.assertAllEqual([5, 2, 3], x.shape)
      self.assertAllEqual(actual_mvn.get_event_shape(),
                          fake_mvn.get_event_shape())
      self.assertAllEqual(actual_mvn.event_shape().eval(),
                          fake_mvn.event_shape().eval())
      self.assertAllEqual(actual_mvn.get_batch_shape(),
                          fake_mvn.get_batch_shape())
      self.assertAllEqual(actual_mvn.batch_shape().eval(),
                          fake_mvn.batch_shape().eval())
      self.assertAllClose(actual_mvn.log_prob(x).eval(),
                          fake_mvn.log_prob(x).eval(),
                          atol=0., rtol=1e-7)
      self.assertAllClose(actual_mvn.prob(x).eval(),
                          fake_mvn.prob(x).eval(),
                          atol=0., rtol=1e-6)
      self.assertAllClose(actual_mvn.entropy().eval(),
                          fake_mvn.entropy().eval(),
                          atol=0., rtol=1e-6)
      for unsupported_fn in (fake_mvn.log_cdf,
                             fake_mvn.cdf,
                             fake_mvn.survival_function,
                             fake_mvn.log_survival_function):
        with self.assertRaisesRegexp(
            NotImplementedError, "not implemented when overriding event_shape"):
          self.assertRaisesRegexp(unsupported_fn(x))
Example #54
0
def uncertain_conditional(Xnew_mu, Xnew_var, feat, kern, q_mu, q_sqrt, *,
                          mean_function=None, full_output_cov=False, full_cov=False, white=False):
    """
    Calculates the conditional for uncertain inputs Xnew, p(Xnew) = N(Xnew_mu, Xnew_var).
    See ``conditional`` documentation for further reference.

    :param Xnew_mu: mean of the inputs, size N x Din
    :param Xnew_var: covariance matrix of the inputs, size N x Din x Din
    :param feat: gpflow.InducingFeature object, only InducingPoints is supported
    :param kern: gpflow kernel or ekernel object.
    :param q_mu: mean inducing points, size M x Dout
    :param q_sqrt: cholesky of the covariance matrix of the inducing points, size Dout x M x M
    :param full_output_cov: boolean wheter to compute covariance between output dimension.
                            Influences the shape of return value ``fvar``. Default is False
    :param white: boolean whether to use whitened representation. Default is False.

    :return fmean, fvar: mean and covariance of the conditional, size ``fmean`` is N x Dout,
            size ``fvar`` depends on ``full_output_cov``: if True ``f_var`` is N x Dout x Dout,
            if False then ``f_var`` is N x Dout
    """

    # TODO(VD): Tensorflow 1.7 doesn't support broadcasting in``tf.matmul`` and
    # ``tf.matrix_triangular_solve``. This is reported in issue 216.
    # As a temporary workaround, we are using ``tf.einsum`` for the matrix
    # multiplications and tiling in the triangular solves.
    # The code that should be used once the bug is resolved is added in comments.

    if not isinstance(feat, InducingPoints):
        raise NotImplementedError

    if full_cov:
        # TODO(VD): ``full_cov`` True would return a ``fvar`` of shape N x N x D x D,
        # encoding the covariance between input datapoints as well.
        # This is not implemented as this feature is only used for plotting purposes.
        raise NotImplementedError

    pXnew = Gaussian(Xnew_mu, Xnew_var)

    num_data = tf.shape(Xnew_mu)[0]  # number of new inputs (N)
    num_ind = tf.shape(q_mu)[0]  # number of inducing points (M)
    num_func = tf.shape(q_mu)[1]  # output dimension (D)

    q_sqrt_r = tf.matrix_band_part(q_sqrt, -1, 0)  # D x M x M

    eKuf = tf.transpose(expectation(pXnew, (kern, feat)))  # M x N (psi1)
    Kuu = feat.Kuu(kern, jitter=settings.numerics.jitter_level)  # M x M
    Luu = tf.cholesky(Kuu)  # M x M

    if not white:
        q_mu = tf.matrix_triangular_solve(Luu, q_mu, lower=True)
        Luu_tiled = tf.tile(Luu[None, :, :], [num_func, 1, 1])  # remove line once issue 216 is fixed
        q_sqrt_r = tf.matrix_triangular_solve(Luu_tiled, q_sqrt_r, lower=True)

    Li_eKuf = tf.matrix_triangular_solve(Luu, eKuf, lower=True)  # M x N
    fmean = tf.matmul(Li_eKuf, q_mu, transpose_a=True)

    eKff = expectation(pXnew, kern)  # N (psi0)
    eKuffu = expectation(pXnew, (kern, feat), (kern, feat))  # N x M x M (psi2)
    Luu_tiled = tf.tile(Luu[None, :, :], [num_data, 1, 1])  # remove this line, once issue 216 is fixed
    Li_eKuffu = tf.matrix_triangular_solve(Luu_tiled, eKuffu, lower=True)
    Li_eKuffu_Lit = tf.matrix_triangular_solve(Luu_tiled, tf.matrix_transpose(Li_eKuffu), lower=True)  # N x M x M
    cov = tf.matmul(q_sqrt_r, q_sqrt_r, transpose_b=True)  # D x M x M

    if mean_function is None or isinstance(mean_function, mean_functions.Zero):
        e_related_to_mean = tf.zeros((num_data, num_func, num_func), dtype=settings.float_type)
    else:
        # Update mean: \mu(x) + m(x)
        fmean = fmean + expectation(pXnew, mean_function)

        # Calculate: m(x) m(x)^T + m(x) \mu(x)^T + \mu(x) m(x)^T,
        # where m(x) is the mean_function and \mu(x) is fmean
        e_mean_mean = expectation(pXnew, mean_function, mean_function)  # N x D x D
        Lit_q_mu = tf.matrix_triangular_solve(Luu, q_mu, adjoint=True)
        e_mean_Kuf = expectation(pXnew, mean_function, (kern, feat))  # N x D x M
        # einsum isn't able to infer the rank of e_mean_Kuf, hence we explicitly set the rank of the tensor:
        e_mean_Kuf = tf.reshape(e_mean_Kuf, [num_data, num_func, num_ind])
        e_fmean_mean = tf.einsum("nqm,mz->nqz", e_mean_Kuf, Lit_q_mu)  # N x D x D
        e_related_to_mean = e_fmean_mean + tf.matrix_transpose(e_fmean_mean) + e_mean_mean

    if full_output_cov:
        fvar = (
                tf.matrix_diag(tf.tile((eKff - tf.trace(Li_eKuffu_Lit))[:, None], [1, num_func])) +
                tf.matrix_diag(tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov)) +
                # tf.matrix_diag(tf.trace(tf.matmul(Li_eKuffu_Lit, cov))) +
                tf.einsum("ig,nij,jh->ngh", q_mu, Li_eKuffu_Lit, q_mu) -
                # tf.matmul(q_mu, tf.matmul(Li_eKuffu_Lit, q_mu), transpose_a=True) -
                fmean[:, :, None] * fmean[:, None, :] +
                e_related_to_mean
        )
    else:
        fvar = (
                (eKff - tf.trace(Li_eKuffu_Lit))[:, None] +
                tf.einsum("nij,dji->nd", Li_eKuffu_Lit, cov) +
                tf.einsum("ig,nij,jg->ng", q_mu, Li_eKuffu_Lit, q_mu) -
                fmean ** 2 +
                tf.matrix_diag_part(e_related_to_mean)
        )

    return fmean, fvar
Example #55
0
def generator(hparams,
              inputs,
              targets,
              targets_present,
              is_training,
              is_validating,
              reuse=None):
  """Define the Generator graph.

    G will now impute tokens that have been masked from the input seqeunce.
  """
  tf.logging.info(
      'Undirectional generative model is not a useful model for this MaskGAN '
      'because future context is needed.  Use only for debugging purposes.')
  config = get_config()
  config.keep_prob = [hparams.gen_nas_keep_prob_0, hparams.gen_nas_keep_prob_1]
  configs.print_config(config)

  init_scale = config.init_scale
  initializer = tf.random_uniform_initializer(-init_scale, init_scale)

  with tf.variable_scope('gen', reuse=reuse, initializer=initializer):
    # Neural architecture search cell.
    cell = custom_cell.Alien(config.hidden_size)

    if is_training:
      [h2h_masks, _, _,
       output_mask] = variational_dropout.generate_variational_dropout_masks(
           hparams, config.keep_prob)
    else:
      output_mask = None

    cell_gen = custom_cell.GenericMultiRNNCell([cell] * config.num_layers)
    initial_state = cell_gen.zero_state(FLAGS.batch_size, tf.float32)

    with tf.variable_scope('rnn'):
      sequence, logits, log_probs = [], [], []
      embedding = tf.get_variable('embedding',
                                  [FLAGS.vocab_size, hparams.gen_rnn_size])
      softmax_w = tf.matrix_transpose(embedding)
      softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size])

      rnn_inputs = tf.nn.embedding_lookup(embedding, inputs)

      if is_training and FLAGS.keep_prob < 1:
        rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob)

      for t in xrange(FLAGS.sequence_length):
        if t > 0:
          tf.get_variable_scope().reuse_variables()

        # Input to the model is the first token to provide context.  The
        # model will then predict token t > 0.
        if t == 0:
          # Always provide the real input at t = 0.
          state_gen = initial_state
          rnn_inp = rnn_inputs[:, t]

        # If the input is present, read in the input at t.
        # If the input is not present, read in the previously generated.
        else:
          real_rnn_inp = rnn_inputs[:, t]
          fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake)

          # While validating, the decoder should be operating in teacher
          # forcing regime.  Also, if we're just training with cross_entropy
          # use teacher forcing.
          if is_validating or (is_training and
                               FLAGS.gen_training_strategy == 'cross_entropy'):
            rnn_inp = real_rnn_inp
          else:
            rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp,
                               fake_rnn_inp)

        if is_training:
          state_gen = list(state_gen)
          for layer_num, per_layer_state in enumerate(state_gen):
            per_layer_state = LSTMTuple(
                per_layer_state[0], per_layer_state[1] * h2h_masks[layer_num])
            state_gen[layer_num] = per_layer_state

        # RNN.
        rnn_out, state_gen = cell_gen(rnn_inp, state_gen)

        if is_training:
          rnn_out = output_mask * rnn_out

        logit = tf.matmul(rnn_out, softmax_w) + softmax_b

        # Real sample.
        real = targets[:, t]

        categorical = tf.contrib.distributions.Categorical(logits=logit)
        fake = categorical.sample()
        log_prob = categorical.log_prob(fake)

        # Output for Generator will either be generated or the input.
        #
        # If present:   Return real.
        # If not present:  Return fake.
        output = tf.where(targets_present[:, t], real, fake)

        # Add to lists.
        sequence.append(output)
        log_probs.append(log_prob)
        logits.append(logit)

      # Produce the RNN state had the model operated only
      # over real data.
      real_state_gen = initial_state
      for t in xrange(FLAGS.sequence_length):
        tf.get_variable_scope().reuse_variables()

        rnn_inp = rnn_inputs[:, t]

        # RNN.
        rnn_out, real_state_gen = cell_gen(rnn_inp, real_state_gen)

      final_state = real_state_gen

  return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack(
      log_probs, axis=1), initial_state, final_state)
Example #56
0
 def testTensorWithStaticRankLessThanTwoRaisesBecauseNotAMatrix(self):
   vector = [1, 2, 3]
   with self.test_session():
     with self.assertRaisesRegexp(ValueError, "should be a "):
       tf.matrix_transpose(vector)
 def _maybe_adjoint(self, x, adjoint):
   if adjoint:
     return tf.matrix_transpose(x)
   else:
     return x
  def __init__(self,
               loc=None,
               covariance_matrix=None,
               validate_args=False,
               allow_nan_stats=True,
               name="MultivariateNormalFullCovariance"):
    """Construct Multivariate Normal distribution on `R^k`.

    The `batch_shape` is the broadcast shape between `loc` and
    `covariance_matrix` arguments.

    The `event_shape` is given by last dimension of the matrix implied by
    `covariance_matrix`. The last dimension of `loc` (if provided) must
    broadcast with this.

    A non-batch `covariance_matrix` matrix is a `k x k` symmetric positive
    definite matrix.  In other words it is (real) symmetric with all eigenvalues
    strictly positive.

    Additional leading dimensions (if any) will index batches.

    Args:
      loc: Floating-point `Tensor`. If this is set to `None`, `loc` is
        implicitly `0`. When specified, may have shape `[B1, ..., Bb, k]` where
        `b >= 0` and `k` is the event size.
      covariance_matrix: Floating-point, symmetric positive definite `Tensor` of
        same `dtype` as `loc`.  The strict upper triangle of `covariance_matrix`
        is ignored, so if `covariance_matrix` is not symmetric no error will be
        raised (unless `validate_args is True`).  `covariance_matrix` has shape
        `[B1, ..., Bb, k, k]` where `b >= 0` and `k` is the event size.
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`,
        statistics (e.g., mean, mode, variance) use the value "`NaN`" to
        indicate the result is undefined. When `False`, an exception is raised
        if one or more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.

    Raises:
      ValueError: if neither `loc` nor `covariance_matrix` are specified.
    """
    parameters = dict(locals())

    # Convert the covariance_matrix up to a scale_tril and call MVNTriL.
    with tf.name_scope(name) as name:
      with tf.name_scope("init", values=[loc, covariance_matrix]):
        dtype = dtype_util.common_dtype([loc, covariance_matrix], tf.float32)
        loc = loc if loc is None else tf.convert_to_tensor(
            loc, name="loc", dtype=dtype)
        if covariance_matrix is None:
          scale_tril = None
        else:
          covariance_matrix = tf.convert_to_tensor(
              covariance_matrix, name="covariance_matrix", dtype=dtype)
          if validate_args:
            covariance_matrix = control_flow_ops.with_dependencies([
                tf.assert_near(
                    covariance_matrix,
                    tf.matrix_transpose(covariance_matrix),
                    message="Matrix was not symmetric")
            ], covariance_matrix)
          # No need to validate that covariance_matrix is non-singular.
          # LinearOperatorLowerTriangular has an assert_non_singular method that
          # is called by the Bijector.
          # However, cholesky() ignores the upper triangular part, so we do need
          # to separately assert symmetric.
          scale_tril = tf.cholesky(covariance_matrix)
        super(MultivariateNormalFullCovariance, self).__init__(
            loc=loc,
            scale_tril=scale_tril,
            validate_args=validate_args,
            allow_nan_stats=allow_nan_stats,
            name=name)
    self._parameters = parameters
Example #59
0
def gen_decoder(hparams,
                inputs,
                targets,
                targets_present,
                encoding_state,
                is_training,
                is_validating,
                reuse=None):
  """Define the Decoder graph. The Decoder will now impute tokens that
      have been masked from the input seqeunce.
  """
  gen_decoder_rnn_size = hparams.gen_rnn_size

  targets = tf.Print(targets, [targets], message='targets', summarize=50)
  if FLAGS.seq2seq_share_embedding:
    with tf.variable_scope('decoder/rnn', reuse=True):
      embedding = tf.get_variable('embedding',
                                  [FLAGS.vocab_size, hparams.gen_rnn_size])

  with tf.variable_scope('decoder', reuse=reuse):

    def lstm_cell():
      return tf.contrib.rnn.BasicLSTMCell(
          gen_decoder_rnn_size,
          forget_bias=0.0,
          state_is_tuple=True,
          reuse=reuse)

    attn_cell = lstm_cell
    if is_training and hparams.gen_vd_keep_prob < 1:

      def attn_cell():
        return variational_dropout.VariationalDropoutWrapper(
            lstm_cell(), FLAGS.batch_size, hparams.gen_rnn_size,
            hparams.gen_vd_keep_prob, hparams.gen_vd_keep_prob)

    cell_gen = tf.contrib.rnn.MultiRNNCell(
        [attn_cell() for _ in range(hparams.gen_num_layers)],
        state_is_tuple=True)

    # Hidden encoder states.
    hidden_vector_encodings = encoding_state[0]

    # Carry forward the final state tuple from the encoder.
    # State tuples.
    state_gen = encoding_state[1]

    if FLAGS.attention_option is not None:
      (attention_keys, attention_values, _,
       attention_construct_fn) = attention_utils.prepare_attention(
           hidden_vector_encodings,
           FLAGS.attention_option,
           num_units=gen_decoder_rnn_size,
           reuse=reuse)

    def make_mask(keep_prob, units):
      random_tensor = keep_prob
      # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
      random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units]))
      return tf.floor(random_tensor) / keep_prob

    if is_training:
      output_mask = make_mask(hparams.gen_vd_keep_prob, hparams.gen_rnn_size)

    with tf.variable_scope('rnn'):
      sequence, logits, log_probs = [], [], []

      if not FLAGS.seq2seq_share_embedding:
        embedding = tf.get_variable('embedding',
                                    [FLAGS.vocab_size, hparams.gen_rnn_size])
      softmax_w = tf.matrix_transpose(embedding)
      softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size])

      rnn_inputs = tf.nn.embedding_lookup(embedding, inputs)
      # TODO(adai): Perhaps append IMDB labels placeholder to input at
      # each time point.

      rnn_outs = []

      fake = None
      for t in xrange(FLAGS.sequence_length):
        if t > 0:
          tf.get_variable_scope().reuse_variables()

        # Input to the Decoder.
        if t == 0:
          # Always provide the real input at t = 0.
          rnn_inp = rnn_inputs[:, t]

        # If the input is present, read in the input at t.
        # If the input is not present, read in the previously generated.
        else:
          real_rnn_inp = rnn_inputs[:, t]

          # While validating, the decoder should be operating in teacher
          # forcing regime.  Also, if we're just training with cross_entropy
          # use teacher forcing.
          if is_validating or FLAGS.gen_training_strategy == 'cross_entropy':
            rnn_inp = real_rnn_inp
          else:
            fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake)
            rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp,
                               fake_rnn_inp)

        # RNN.
        rnn_out, state_gen = cell_gen(rnn_inp, state_gen)

        if FLAGS.attention_option is not None:
          rnn_out = attention_construct_fn(rnn_out, attention_keys,
                                           attention_values)
        if is_training:
          rnn_out *= output_mask

        rnn_outs.append(rnn_out)
        if FLAGS.gen_training_strategy != 'cross_entropy':
          logit = tf.nn.bias_add(tf.matmul(rnn_out, softmax_w), softmax_b)

          # Output for Decoder.
          # If input is present:   Return real at t+1.
          # If input is not present:  Return fake for t+1.
          real = targets[:, t]

          categorical = tf.contrib.distributions.Categorical(logits=logit)
          if FLAGS.use_gen_mode:
            fake = categorical.mode()
          else:
            fake = categorical.sample()
          log_prob = categorical.log_prob(fake)
          output = tf.where(targets_present[:, t], real, fake)

        else:
          real = targets[:, t]
          logit = tf.zeros(tf.stack([FLAGS.batch_size, FLAGS.vocab_size]))
          log_prob = tf.zeros(tf.stack([FLAGS.batch_size]))
          output = real

        # Add to lists.
        sequence.append(output)
        log_probs.append(log_prob)
        logits.append(logit)

      if FLAGS.gen_training_strategy == 'cross_entropy':
        logits = tf.nn.bias_add(
            tf.matmul(
                tf.reshape(tf.stack(rnn_outs, 1), [-1, gen_decoder_rnn_size]),
                softmax_w), softmax_b)
        logits = tf.reshape(logits,
                            [-1, FLAGS.sequence_length, FLAGS.vocab_size])
      else:
        logits = tf.stack(logits, axis=1)

  return (tf.stack(sequence, axis=1), logits, tf.stack(log_probs, axis=1))