예제 #1
0
def gauss_kl_diag(q_mu, q_sqrt, K,  num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(q_sqrt)))  # Log-det of q-cov
    L_inv = tf.matrix_triangular_solve(L, eye(tf.shape(L)[0]), lower=True)
    K_inv = tf.matrix_triangular_solve(tf.transpose(L), L_inv, lower=False)
    KL += 0.5 * tf.reduce_sum(tf.expand_dims(tf.diag_part(K_inv), 1)
                              * tf.square(q_sqrt))  # Trace term.
    return KL
예제 #2
0
파일: sgpr.py 프로젝트: ShuaiW/GPflow
    def build_likelihood(self):
        """
        Constuct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook.
        """

        num_inducing = tf.shape(self.Z)[0]
        num_data = tf.shape(self.Y)[0]
        output_dim = tf.shape(self.Y)[1]

        err = self.Y - self.mean_function(self.X)
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = self.kern.K(self.Z, self.X)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        L = tf.cholesky(Kuu)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, Kuf, lower=True) /\
            tf.sqrt(self.likelihood.variance)
        AAT = tf.matmul(A, tf.transpose(A))
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, err), lower=True) /\
            tf.sqrt(self.likelihood.variance)

        # compute log marginal bound
        bound = -0.5 * tf.cast(num_data * output_dim, tf.float64)*np.log(2*np.pi)
        bound += -tf.cast(output_dim, tf.float64)*tf.reduce_sum(tf.log(tf.diag_part(LB)))
        bound += -0.5*tf.cast(num_data*output_dim, tf.float64)*tf.log(self.likelihood.variance)
        bound += -0.5*tf.reduce_sum(tf.square(err))/self.likelihood.variance
        bound += 0.5*tf.reduce_sum(tf.square(c))
        bound += -0.5*(tf.reduce_sum(Kdiag)/self.likelihood.variance - tf.reduce_sum(tf.diag_part(AAT)))

        return bound
예제 #3
0
def gauss_kl(q_mu, q_sqrt, K, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
        KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
예제 #4
0
    def compute_upper_bound(self):
        num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type)

        Kdiag = self.kern.Kdiag(self.X)
        Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
        Kuf = self.feature.Kuf(self.kern, self.X)

        L = tf.cholesky(Kuu)
        LB = tf.cholesky(Kuu + self.likelihood.variance ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True))

        LinvKuf = tf.matrix_triangular_solve(L, Kuf, lower=True)
        # Using the Trace bound, from Titsias' presentation
        c = tf.reduce_sum(Kdiag) - tf.reduce_sum(LinvKuf ** 2.0)
        # Kff = self.kern.K(self.X)
        # Qff = tf.matmul(Kuf, LinvKuf, transpose_a=True)

        # Alternative bound on max eigenval:
        # c = tf.reduce_max(tf.reduce_sum(tf.abs(Kff - Qff), 0))
        corrected_noise = self.likelihood.variance + c

        const = -0.5 * num_data * tf.log(2 * np.pi * self.likelihood.variance)
        logdet = tf.reduce_sum(tf.log(tf.diag_part(L))) - tf.reduce_sum(tf.log(tf.diag_part(LB)))

        LC = tf.cholesky(Kuu + corrected_noise ** -1.0 * tf.matmul(Kuf, Kuf, transpose_b=True))
        v = tf.matrix_triangular_solve(LC, corrected_noise ** -1.0 * tf.matmul(Kuf, self.Y), lower=True)
        quad = -0.5 * corrected_noise ** -1.0 * tf.reduce_sum(self.Y ** 2.0) + 0.5 * tf.reduce_sum(v ** 2.0)

        return const + logdet + quad
예제 #5
0
파일: maxKL.py 프로젝트: blutooth/dgp
def gauss_kl(min_q_mu, q_sq,K):
    q_mu=-1*min_q_mu

    #q_sqrt=tf.cholesky(tf.squeeze(q_sqrt))
        # K is a variance...we sqrt later
    '''
    N=1
    Q=5
    q_mu=tf.random_normal([Q,1],dtype=tf.float64)
    q_var=tf.random_normal([Q,Q],dtype=tf.float64)
    q_var=q_var+tf.transpose(q_var [1,0])+1e+1*np.eye(Q)
    K=q_var
    q_sqrt=tf.cholesky(q_var)
    q_sqrt=tf.expand_dims(q_sqrt,-1)
    num_latent=1
    s=tf.Session()
    s.run(tf.initialize_all_variables())
    '''
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).

    q_sqrt=tf.cholesky(K)
    L = tf.cholesky(q_sq)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL +=   0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0], tf.float64)

    Lq = tf.batch_matrix_band_part(q_sqrt, -1, 0)
    # Log determinant of q covariance:
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
    LiLq = tf.matrix_triangular_solve(L, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    """
    V2=tf.cholesky(K)
    V1=tf.cholesky(q_sq)
    KL=h.Mul(tf.transpose(q_mu),tf.cholesky_solve(V2,q_mu))
    KL+=tf.trace(tf.cholesky_solve(V2,q_sq))
    KL-=h.get_dim(K,0)
    KL+=tf.reduce_sum(2*tf.log(tf.diag_part(V2))-2*tf.log(tf.diag_part(V1)))
    return KL/2
예제 #6
0
파일: helper.py 프로젝트: blutooth/gp
def log_det(Z):
    #conditioned=condition(Z)
    Z=(Z+tf.transpose(Z))/2
    return 2*tf.reduce_sum(tf.log(tf.diag_part(tf.cholesky(Z))))

    chol=tf.cholesky(Z)
    logdet=2*tf.reduce_sum(tf.log(tf.diag_part(chol)))
    return logdet
예제 #7
0
def multivariate_gaussian_log_density(x, mu,
                                      Sigma=None, L=None,
                                      prec=None, L_prec=None):
    """
    Assume X is a single vector described by a multivariate Gaussian
    distribution with x ~ N(mu, Sigma).

    We accept parameterization in terms of the covariance matrix or
    its cholesky decomposition L (more efficient if available), or the
    precision matrix or its cholesky decomposition L_prec.
    The latter is useful when representing a Gaussian in its natural 
    parameterization. Note that we still require the explicit mean mu
    (not the natural parameter prec*mu) since I'm too lazy to cover
    all the permutations of possible arguments (though this should be
    straightforward). 

    """
    s = extract_shape(x)
    try:
        n, = s
    except:
        n, m = s
        assert(m==1)

    if L is None and Sigma is not None:
        L = tf.cholesky(Sigma)        
    if L_prec is None and prec is not None:
        L_prec = tf.cholesky(prec)
        
    if L is not None:
        neg_half_logdet = -tf.reduce_sum(tf.log(tf.diag_part(L)))
    else:
        assert(L_prec is not None)
        neg_half_logdet = tf.reduce_sum(tf.log(tf.diag_part(L_prec)))
        
    d = tf.reshape(x - mu, (n,1))
    if L is not None:
        alpha = tf.matrix_triangular_solve(L, d, lower=True)
        exponential_part= tf.reduce_sum(tf.square(alpha))
    elif prec is not None:
        d = tf.reshape(d, (n, 1))
        exponential_part = tf.reduce_sum(d * tf.matmul(prec, d))
    else:
        assert(L_prec is not None)
        d = tf.reshape(d, (1, n))
        alpha = tf.matmul(d, L_prec)
        exponential_part= tf.reduce_sum(tf.square(alpha))

    n_log2pi = n * 1.83787706641
    logp =  -0.5 * n_log2pi
    logp += neg_half_logdet
    logp += -0.5 * exponential_part
        
    return logp
예제 #8
0
def multivariate_gaussian_entropy(Sigma=None, L=None, L_prec=None):

    if L is None and Sigma is not None:
        L = tf.cholesky(Sigma)
    
    if L is not None:
        half_logdet = tf.reduce_sum(tf.log(tf.diag_part(L)))
        n, _ = extract_shape(L)
    else:
        half_logdet = -tf.reduce_sum(tf.log(tf.diag_part(L_prec)))
        n, _ = extract_shape(L_prec)

    log_2pi = 1.83787706641
    entropy = .5*n*(1 + log_2pi) + half_logdet
    return entropy
예제 #9
0
 def diagPartOp(self, tensor, dtype, expected_ans, use_gpu=False):
   with self.test_session(use_gpu=use_gpu):
     tensor = tf.convert_to_tensor(tensor.astype(dtype))
     tf_ans_inv = tf.diag_part(tensor)
     inv_out = tf_ans_inv.eval()
   self.assertAllClose(inv_out, expected_ans)
   self.assertShapeEqual(expected_ans, tf_ans_inv)
예제 #10
0
def gauss_kl_white(q_mu, q_sqrt, num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, I)

    We assume num_latent independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and the last dim of q_sqrt).
    """
    KL = 0.5 * tf.reduce_sum(tf.square(q_mu))  # Mahalanobis term
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    for d in range(num_latent):
        Lq = tf.batch_matrix_band_part(q_sqrt[:, :, d], -1, 0)
        # Log determinant of q covariance:
        KL -= 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(Lq))))
        KL += 0.5 * tf.reduce_sum(tf.square(Lq))  # Trace term.
    return KL
예제 #11
0
파일: vgp.py 프로젝트: ShuaiW/GPflow
    def build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R

        This method computes the variational lower lound on the likelihood, which is:

            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]

        with

            q(f) = N(f | K alpha, [K^-1 + diag(square(lambda))]^-1) .

        """
        K = self.kern.K(self.X)
        f_mean = tf.matmul(K, self.q_alpha) + self.mean_function(self.X)
        #for each of the data-dimensions (columns of Y), find the diagonal of the
        #variance, and also relevant parts of the KL.
        f_var, A_logdet, trAi = [], tf.zeros((1,), tf.float64), tf.zeros((1,), tf.float64)
        for d in range(self.num_latent):
            b = self.q_lambda[:,d]
            B = tf.expand_dims(b, 1)
            A = eye(self.num_data) + K*B*tf.transpose(B)
            L = tf.cholesky(A)
            Li = tf.matrix_triangular_solve(L, eye(self.num_data), lower=True)
            LiBi = Li / b
            #full_sigma:return tf.diag(b**-2) - LiBi.T.dot(LiBi)
            f_var.append(1./tf.square(b) - tf.reduce_sum(tf.square(LiBi),0))
            A_logdet += 2*tf.reduce_sum(tf.log(tf.diag_part(L)))
            trAi += tf.reduce_sum(tf.square(Li))

        f_var = tf.transpose(tf.pack(f_var))

        KL = 0.5*(A_logdet + trAi - self.num_data*self.num_latent + tf.reduce_sum(f_mean*self.q_alpha))

        return tf.reduce_sum(self.likelihood.variational_expectations(f_mean, f_var, self.Y)) - KL
예제 #12
0
파일: laplace.py 프로젝트: wujsAct/edward
  def initialize(self, *args, **kwargs):
    # Store latent variables in a temporary attribute; MAP will
    # optimize `PointMass` random variables, which subsequently
    # optimizes mean parameters of the normal approximations.
    latent_vars_normal = self.latent_vars.copy()
    self.latent_vars = {z: PointMass(params=qz.loc)
                        for z, qz in six.iteritems(latent_vars_normal)}

    super(Laplace, self).initialize(*args, **kwargs)

    hessians = tf.hessians(self.loss, list(six.itervalues(self.latent_vars)))
    self.finalize_ops = []
    for z, hessian in zip(six.iterkeys(self.latent_vars), hessians):
      qz = latent_vars_normal[z]
      if isinstance(qz, (MultivariateNormalDiag, Normal)):
        scale_var = get_variables(qz.variance())[0]
        scale = 1.0 / tf.diag_part(hessian)
      else:  # qz is MultivariateNormalTriL
        scale_var = get_variables(qz.covariance())[0]
        scale = tf.matrix_inverse(tf.cholesky(hessian))

      self.finalize_ops.append(scale_var.assign(scale))

    self.latent_vars = latent_vars_normal.copy()
    del latent_vars_normal
예제 #13
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], float_type)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), float_type)  # constant term
    Lq = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # force lower triangle
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
예제 #14
0
    def logpdf(self, x, mean=None, cov=1):
        """Log of the probability density function.

        Parameters
        ----------
        x : tf.Tensor
            A 1-D or 2-D tensor.
        mean : tf.Tensor, optional
            A 1-D tensor. Defaults to zero mean.
        cov : tf.Tensor, optional
            A 1-D or 2-D tensor. Defaults to identity matrix.

        Returns
        -------
        tf.Tensor
            A tensor of one dimension less than the input.
        """
        x = tf.cast(x, dtype=tf.float32)
        x_shape = get_dims(x)
        if len(x_shape) == 1:
            d = x_shape[0]
        else:
            d = x_shape[1]

        if mean is None:
            r = x
        else:
            mean = tf.cast(mean, dtype=tf.float32)
            r = x - mean

        if cov is 1:
            L_inv = tf.diag(tf.ones([d]))
            det_cov = tf.constant(1.0)
        else:
            cov = tf.cast(cov, dtype=tf.float32)
            if len(cov.get_shape()) == 1: # vector
                L_inv = tf.diag(1.0 / tf.sqrt(cov))
                det_cov = tf.reduce_prod(cov)
            else: # matrix
                L = tf.cholesky(cov)
                L_inv = tf.matrix_inverse(L)
                det_cov = tf.pow(tf.reduce_prod(tf.diag_part(L)), 2)

        lps = -0.5*d*tf.log(2*np.pi) - 0.5*tf.log(det_cov)
        if len(x_shape) == 1: # vector
            r = tf.reshape(r, shape=(d, 1))
            inner = tf.matmul(L_inv, r)
            lps -= 0.5 * tf.matmul(inner, inner, transpose_a=True)
            return tf.squeeze(lps)
        else: # matrix
            # TODO vectorize further
            out = []
            for r_vec in tf.unpack(r):
                r_vec = tf.reshape(r_vec, shape=(d, 1))
                inner = tf.matmul(L_inv, r_vec)
                out += [tf.squeeze(lps -
                        0.5 * tf.matmul(inner, inner, transpose_a=True))]

            return tf.pack(out)
예제 #15
0
파일: test_kerns.py 프로젝트: blutooth/dgp
 def test(self):
     for k in self.kernels:
         with k.tf_mode():
             k1 = k.Kdiag(self.X)
             k2 = tf.diag_part(k.K(self.X))
             k1, k2 = tf.Session().run([k1, k2],
                                       feed_dict={self.x_free: k.get_free_state(), self.X: self.X_data})
         self.failUnless(np.allclose(k1, k2))
예제 #16
0
파일: dgp3.py 프로젝트: blutooth/gp
def pred(X,X_m_1,mu,len_sc_1,noise_1):
        Kmm=h.tf_SE_K(X_m_1,X_m_1,len_sc_1,noise_1)
        Knm=h.tf_SE_K(X,X_m_1,len_sc_1,noise_1)
        posterior_mean= h.Mul(Knm,tf.matrix_solve(Kmm,mu))
        K_nn=h.tf_SE_K(X,X,len_sc_1,noise_1)
        full_cov=K_nn-h.Mul(Knm,tf.matrix_solve(Kmm,tf.transpose(Knm)))
        posterior_cov=tf.diag_part(full_cov)
        return posterior_mean,tf.reshape(posterior_cov,[N,1]),full_cov
예제 #17
0
파일: deepGP.py 프로젝트: blutooth/gp
def predict2():
    # predicitions
    cov=h.Mul(K_mm_2,tf.matrix_inverse(K_mm_2+K_mnnm_2/tf.square(sigma_2)),K_mm_2)
    cov_chol=tf.cholesky(cov)
    mu=h.Mul(K_mm_2,tf.cholesky_solve(cov_chol,K_mn_2),Ytr)/tf.square(sigma_2)
    mean=h.Mul(K_nm_2,tf.matrix_solve(K_mm_1,mu))
    variance=K_nn_2-h.Mul(K_nm_2,h.safe_chol(K_mm_2,tf.transpose(K_nm_2)))
    var_terms=2*tf.sqrt(tf.reshape(tf.diag_part(variance)+tf.square(sigma_2),[N,1]))
    return mean, var_terms
예제 #18
0
파일: main.py 프로젝트: DSLituiev/fast5-rnn
    def _compute_predictions(self, init = None):
        """ Compute vanilla-RNN states and predictions. """

        with tf.variable_scope('states'):
            with tf.variable_scope("HMM"):
                with tf.variable_scope("transition"):
                    skip_prob = tf.get_variable("skip", shape=[1], initializer=tf.constant_initializer(1e-1))
                    #skip_prob = tf.Variable( np.array(1e-1, dtype=np.float32), name="skip") # .astype(np.float32)
                    self.W_trans = (1-skip_prob) * get_transition_matrix().astype(np.float32)  + skip_prob* np.eye(self.hidden_layer_size).astype(np.float32)
                    #self.W_trans = tf.Variable( transition_with_skips,
                    #                       name='W_trans', trainable=True)
                    print("W_trans", self.W_trans.get_shape())

                with tf.variable_scope("emission"):
                    "W_emit: [self.input_size, self.hidden_layer_size]"
                    if self.emission_init is None:
                        self.W_emit = tf.get_variable("W_emit", shape = [self.hidden_layer_size, self.input_size],
                                                  initializer = tf.random_normal_initializer(0.0, 1e-6))
                    else:
                        if not (self.emission_init.shape == (self.hidden_layer_size, self.input_size)):
                            print("self.emission_init.shape", self.emission_init.shape)
                            print("(self.hidden_layer_size, self.input_size)", (self.hidden_layer_size, self.input_size))
                            raise ValueError("wrong dimensions of  `self.emission_init`")
                        self.W_emit = tf.Variable(self.emission_init.astype(np.float32), name = "W_emit", trainable = False)
                    self.W_emit_summary = tf.image_summary("W_emit", tf.reshape(self.W_emit, [1,self.hidden_layer_size, self.input_size,1]))
                    "idea: impose kernel similarity:  maximize(W K W)"
                    "[ self.hidden_layer_size, self.nt_in_pore ]"

                    emission_in_pore_space = tf.matmul( self.map_hex_to_pore, self.W_emit)
                    self.emission_similarity = tf.reduce_sum( tf.diag_part( tf.matmul( tf.transpose(emission_in_pore_space),(emission_in_pore_space)) ),
                            name="emission_w_similarity")
            if init is None:
                initial_state = tf.ones([self.hidden_layer_size],
                                     name='initial_state')
                initial_state = initial_state/ self.hidden_layer_size
            else:
                initial_state = init
            #states = self._rnn_step_fw(initial_state[:,0], self.inputs[0,:])
            states = functional_ops.scan(self._rnn_step_fw, tf.identity(self.inputs),
                                         initializer=initial_state, name='states')

            states_fw_summary = tf.histogram_summary("states_fw", states)
            #states = states_fw
            #print("states:", states.get_shape())

        with tf.variable_scope('predictions'):
            # set some explicit initializer, orthogonal inialization
            "for now, keep identity mapping from hidden states to labels"
            "assume probability interpretation of values: should sum to one"
            W_pred = tf.Variable(np.eye(self.target_size, dtype = np.float32), name="W_pred", trainable=False)
            predictions = tf.matmul(states, W_pred, name='predictions')
            #predictions = states
            predictions_summary = tf.histogram_summary("predictions", predictions)
            #predictions = tf.nn.softmax(tf.matmul(states, W_pred), name='predictions'))
            # do predictions sum to one?

        return states, predictions
예제 #19
0
 def diagOp(self, diag, dtype, expected_ans, use_gpu=False):
   with self.test_session(use_gpu=use_gpu):
     tf_ans = tf.diag(tf.convert_to_tensor(diag.astype(dtype)))
     out = tf_ans.eval()
     tf_ans_inv = tf.diag_part(expected_ans)
     inv_out = tf_ans_inv.eval()
   self.assertAllClose(out, expected_ans)
   self.assertAllClose(inv_out, diag)
   self.assertShapeEqual(expected_ans, tf_ans)
   self.assertShapeEqual(diag, tf_ans_inv)
예제 #20
0
파일: dgp3.py 프로젝트: blutooth/gp
def predict(K_mn,sigma,K_mm,K_nn):
    # predicitions
    K_nm=tf.transpose(K_mn)
    Sig_Inv=1e-1*np.eye(M)+K_mm+K_mnnm_2/tf.square(sigma)
    mu_post=h.Mul(tf.matrix_solve(Sig_Inv,K_mn),Ytr)/tf.square(sigma)
    mean=h.Mul(K_nm,mu_post)
    variance=K_nn-h.Mul(K_nm,h.safe_chol(K_mm,K_mn))+h.Mul(K_nm,tf.matrix_solve(Sig_Inv,K_mn))
    var_terms=2*tf.sqrt(tf.reshape(tf.diag_part(variance)+tf.square(sigma),[N,1]))

    return mean, var_terms
예제 #21
0
 def test(self):
     with self.test_context() as session:
         for k in self.kernels:
             k.initialize(session=session, force=True)
             X = tf.placeholder(tf.float64, [30, self.dim])
             rng = np.random.RandomState(1)
             X_data = rng.randn(30, self.dim)
             k1 = k.Kdiag(X)
             k2 = tf.diag_part(k.K(X))
             k1, k2 = session.run([k1, k2], feed_dict={X: X_data})
             self.assertTrue(np.allclose(k1, k2))
예제 #22
0
파일: gplvm.py 프로젝트: blutooth/dgp
    def build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood.
        """
        num_inducing = tf.shape(self.Z)[0]

        psi0, psi1, psi2 = ke.build_psi_stats(self.Z, self.kern, self.X_mean, self.X_var)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
        L = tf.cholesky(Kuu)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma
        tmp = tf.matrix_triangular_solve(L, psi2, lower=True)
        AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        log_det_B = 2. * tf.reduce_sum(tf.log(tf.diag_part(LB)))
        c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma

        # KL[q(x) || p(x)]
        NQ = tf.cast(tf.size(self.X_mean), tf.float64)
        D = tf.cast(tf.shape(self.Y)[1], tf.float64)
        KL = -0.5*tf.reduce_sum(tf.log(self.X_var)) \
            + 0.5*tf.reduce_sum(tf.log(self.X_prior_var))\
            - 0.5 * NQ\
            + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + self.X_var) / self.X_prior_var)

        # compute log marginal bound
        ND = tf.cast(tf.size(self.Y), tf.float64)
        bound = -0.5 * ND * tf.log(2 * np.pi * sigma2)
        bound += -0.5 * D * log_det_B
        bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 -
                             tf.reduce_sum(tf.diag_part(AAT)))
        bound -= KL

        return bound
예제 #23
0
  def test_multivariate_normal_diag(self):
    with self.test_session() as sess:
      N, D, w_true, X_train, y_train, X, w, b, y = self._setup()

      # INFERENCE. Initialize scales at identity to verify if we
      # learned an approximately zero determinant.
      qw = MultivariateNormalDiag(
          loc=tf.Variable(tf.random_normal([D])),
          scale_diag=tf.Variable(tf.ones(D)))
      qb = MultivariateNormalDiag(
          loc=tf.Variable(tf.random_normal([1])),
          scale_diag=tf.Variable(tf.ones(1)))

      inference = ed.Laplace({w: qw, b: qb}, data={X: X_train, y: y_train})
      inference.run(n_iter=100)

      self._test(sess, qw, qb, w_true)
      self.assertAllClose(qw.covariance().eval(),
                          tf.diag(tf.diag_part(qw.covariance())).eval())
      self.assertAllClose(qb.covariance().eval(),
                          tf.diag(tf.diag_part(qb.covariance())).eval())
예제 #24
0
def decov_loss(xs):
    """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    """
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss 
예제 #25
0
 def testRankFourFloatTensorUnknownShape(self):
   x = np.random.rand(3, 3)
   i = np.arange(3)
   expected_ans = x[i, i]
   for shape in None, (None, 3), (3, None):
     with self.test_session(use_gpu=False):
       t = tf.convert_to_tensor(x.astype(np.float32))
       t.set_shape(shape)
       tf_ans = tf.diag_part(t)
       out = tf_ans.eval()
     self.assertAllClose(out, expected_ans)
     self.assertShapeEqual(expected_ans, tf_ans)
예제 #26
0
 def testDiagPartGrad(self):
   np.random.seed(0)
   shapes = ((3,3), (3,3,3,3))
   dtypes = (tf.float32, tf.float64)
   with self.test_session(use_gpu=False):
     errors = []
     for shape in shapes:
       for dtype in dtypes:
         x1 = tf.constant(np.random.rand(*shape), dtype=dtype)
         y = tf.diag_part(x1)
         error = tf.test.compute_gradient_error(x1, x1.get_shape().as_list(),
                                                y, y.get_shape().as_list())
         tf.logging.info("error = %f", error)
         self.assertLess(error, 1e-4)
예제 #27
0
파일: sgpr.py 프로젝트: GPflow/GPflow
    def build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook.
        """

        num_inducing = tf.shape(self.Z)[0]
        num_data = tf.cast(tf.shape(self.Y)[0], settings.dtypes.float_type)
        output_dim = tf.cast(tf.shape(self.Y)[1], settings.dtypes.float_type)

        err = self.Y - self.mean_function(self.X)
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = self.kern.K(self.Z, self.X)
        Kuu = self.kern.K(self.Z) + eye(num_inducing) * settings.numerics.jitter_level
        L = tf.cholesky(Kuu)
        sigma = tf.sqrt(self.likelihood.variance)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
        AAT = tf.matmul(A, tf.transpose(A))
        B = AAT + eye(num_inducing)
        LB = tf.cholesky(B)
        Aerr = tf.matmul(A, err)
        c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma

        # compute log marginal bound
        bound = -0.5 * num_data * output_dim * np.log(2 * np.pi)
        bound += -output_dim * tf.reduce_sum(tf.log(tf.diag_part(LB)))
        bound -= 0.5 * num_data * output_dim * tf.log(self.likelihood.variance)
        bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * tf.reduce_sum(Kdiag) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.diag_part(AAT))

        return bound
예제 #28
0
파일: helper.py 프로젝트: blutooth/gp
def multivariate_normal(x, mu, L):
    """
    L is the Cholesky decomposition of the covariance.
    x and mu are either vectors (ndim=1) or matrices. In the matrix case, we
    assume independence over the *columns*: the number of rows must match the
    size of L.
    """
    d = x - mu
    alpha = tf.matrix_triangular_solve(L, d, lower=True)
    num_col = 1 if tf.rank(x) == 1 else tf.shape(x)[1]
    num_col = tf.cast(num_col, tf.float32)
    num_dims = tf.cast(tf.shape(x)[0], tf.float32)
    ret = - 0.5 * num_dims * num_col * np.log(2 * np.pi)
    ret += - num_col * tf.reduce_sum(tf.log(tf.diag_part(L)))
    ret += - 0.5 * tf.reduce_sum(tf.square(alpha))
    return tf.reduce_sum(ret)
예제 #29
0
파일: densities.py 프로젝트: ShuaiW/GPflow
def multivariate_normal(x, mu, L):
    """
    L is the Cholesky decomposition of the covaraince.

    x and mu are either vectors (ndim=1) or matrices. in the matrix case, we
    assume independence over the *columns*: the number of rows must match the
    size of L.
    """
    d = x - mu
    alpha = tf.matrix_triangular_solve(L, d, lower=True)
    num_col = 1 if tf.rank(x)==1 else tf.shape(x)[1]
    #TODO: this call to get_diag relies on x being a numpy object (ie. having a shape)
    ret =  - 0.5 * tf.cast(tf.size(x), tf.float64) * np.log(2 * np.pi) 
    ret += - tf.cast(num_col, tf.float64) * tf.reduce_sum(tf.log(tf.diag_part(L)))
    ret += - 0.5 * tf.reduce_sum(tf.square(alpha))
    return ret
예제 #30
0
 def call(self, inputs):
   if self.coeffs_mean is None and self.coeffs_precision_tril_op is None:
     # p(mean(ynew) | xnew) = Normal(ynew | mean = 0, variance = xnew xnew^T)
     predictive_mean = 0.
     predictive_variance = tf.reduce_sum(tf.square(inputs), -1)
   else:
     # p(mean(ynew) | xnew, x, y) = Normal(ynew |
     #   mean = xnew (1/noise_variance) (1/noise_variance x^T x + I)^{-1}x^T y,
     #   variance = xnew (1/noise_variance x^T x + I)^{-1} xnew^T)
     predictive_mean = tf.einsum('nm,m->n', inputs, self.coeffs_mean)
     predictive_covariance = tf.matmul(
         inputs,
         self.coeffs_precision_tril_op.solve(
             self.coeffs_precision_tril_op.solve(inputs, adjoint_arg=True),
             adjoint=True))
     predictive_variance = tf.diag_part(predictive_covariance)
   return ed.Normal(loc=predictive_mean, scale=tf.sqrt(predictive_variance))
예제 #31
0
def kl_term(m, S, K_zz, K_zz_inv, u_ovln, L, stabilizer_value):
    # mean_diff = (u_ovln * tf.ones([tf.shape(Z_ph)[0]]) - m)
    mean_diff = tf.expand_dims(
        u_ovln * tf.ones([tf.shape(m)[0]], dtype=DTYPE) - m, 1)
    first = tf.trace(tf.matmul(K_zz_inv, S), name='kl_first')

    # #########################################
    # TODO: solve matrix determinant Problem
    # Approaches:

    # 1. naive impl of determinants
    # -> Problem: NaN as Determimants get very large for big matrices
    # Code:
    # kzz_det = tf.matrix_determinant(K_zz)
    # S_det   = tf.matrix_determinant(S)
    # second = tf.log(kzz_det / S_det, name='kl_second')

    # 2. Logdet and Cholesky decomp
    # -> Problem: Cholesky decomp not always possible (only pos semidefinite by our constr?)
    # -> Adding Eye to S might be a possible solution

    with tf.name_scope('log_of_determinant_ratio'):

        # posdef_stabilizer = tf.diag(tf.random_normal([tf.shape(K_zz)[0]], stddev=stabilizer_value))
        posdef_stabilizer = tf.eye(tf.shape(K_zz)[0],
                                   dtype=DTYPE) * stabilizer_value

        with tf.name_scope('K_zz_logdet'):
            K_zz_logdet = tf.linalg.logdet(K_zz + posdef_stabilizer)

        with tf.name_scope('S_logdet'):
            S_logdet = tf.linalg.logdet(S + posdef_stabilizer)

            alt_logdet_via_L = tf.diag_part(
                L)  # 2 * tf.reduce_sum(tf.log(tf.diag_part(L)))

        # S_logdet = 2 * tf.reduce_sum(tf.log(tf.diag_part(L)))
        # posdef_stabilizer = tf.eye(L_shape[0]) * lambda
        second = tf.subtract(K_zz_logdet, S_logdet, name='kl_second')

    # 3. Using tf.slogdet
    # -> Problem: slogdet doesn't seem to have a gradient defined
    #kzz_lds, kzz_ldav = tf.linalg.slogdet(tf.expand_dims(K_zz, 0))
    #K_zz_logdet = kzz_lds[0] * kzz_ldav[0]
    #S_lds, S_ldav = tf.linalg.slogdet(tf.expand_dims(S, 0))
    #S_logdet = S_lds[0] * S_ldav[0]
    #second = tf.subtract(K_zz_logdet, S_logdet, name='kl_second')
    # #########################################

    if DTYPE == tf.float32:
        third = tf.to_float(tf.shape(m)[0], name='kl_third')
    elif DTYPE == tf.float64:
        third = tf.to_double(tf.shape(m)[0], name='kl_third')
    else:
        print('ERROR: DTYPE must be set to either tf.float32 or tf.float64')
    # fourth = tf.reduce_sum(tf.multiply(tf.reduce_sum(tf.multiply(mean_diff, tf.transpose(K_zz_inv)), axis=1) , mean_diff))

    fourth = tf.squeeze(tf.matmul(tf.matmul(tf.transpose(mean_diff), K_zz_inv),
                                  mean_diff),
                        name='kl_fourth')

    return 0.5 * (first + second - third + fourth), [
        S_logdet, alt_logdet_via_L
    ]
예제 #32
0
    def _compute_prediction_and_loss(self, l, label_inputs, unit_idx):
        l_label, l_eval_mask, l_dyn_hw = label_inputs

        ## Ground truth
        # compute block idx
        layer_idx = unit_idx
        # first idx that is > layer_idx
        bi = bisect.bisect_right(self.cumsum_blocks, layer_idx)
        label_img_idx = self.bi_to_scale_idx(
            bi) if not self.do_scale_feat_to_label else 0
        label = l_label[
            label_img_idx]  # note this is a probability of label distri
        eval_mask = l_eval_mask[label_img_idx]
        dyn_hw = l_dyn_hw[label_img_idx]
        n_non_void_samples = tf.reduce_sum(eval_mask)
        n_non_void_samples += tf.cast(tf.less_equal(n_non_void_samples, 1e-12),
                                      tf.float32)

        ## Compute flattened logits
        # Assume all previous layers have gone through BNReLU, so conv directly
        ch_in = l.get_shape().as_list()[self.ch_dim]
        l = Conv2D('linear', l, self.num_classes, 1, use_bias=True)
        logit_vars = l.variables
        if self.data_format == 'channels_first':
            l = tf.transpose(l, [0, 2, 3, 1])
        if self.do_scale_feat_to_label:
            # at this stage, the logits are already channels_last
            l = ResizeImages('resize_logits',
                             l,
                             dyn_hw,
                             data_format='channels_last')
        logits = tf.reshape(l, [-1, self.num_classes], name='logits')
        logits.variables = logit_vars

        ## Square error between distributions.
        # Implement our own here b/c class weighting.
        prob = tf.nn.softmax(logits, name='pred_prob')
        prob_img_shape = tf.stack([-1, dyn_hw[0], dyn_hw[1], self.num_classes])
        prob_img = tf.reshape(prob, prob_img_shape, name='pred_prob_img')
        sqr_err = tf.reduce_sum(\
            tf.multiply(tf.square(label - prob), self.class_weight), \
            axis=1, name='pixel_prob_square_err')
        sqr_err = tf.divide(tf.reduce_sum(sqr_err * eval_mask),
                            n_non_void_samples,
                            name='prob_sqr_err')
        add_moving_summary(sqr_err)

        ## Weighted cross entropy
        # Have to implement our own weighted softmax cross entroy
        # because TF doesn't provide one
        # Because logits and cost are returned in the end of this func,
        # we use _logit to represent  the shifted logits.
        max_logits = tf.reduce_max(logits, axis=1, keep_dims=True)
        _logits = logits - max_logits
        normalizers = tf.reduce_sum(tf.exp(_logits), axis=1, keep_dims=True)
        _logits = _logits - tf.log(normalizers)
        cross_entropy = -tf.reduce_sum(\
            tf.multiply(label * _logits, self.class_weight), axis=1)
        cross_entropy = cross_entropy * eval_mask
        cross_entropy = tf.divide(tf.reduce_sum(cross_entropy),
                                  n_non_void_samples,
                                  name='cross_entropy_loss')
        add_moving_summary(cross_entropy)

        ## Unweighted total abs diff
        sum_abs_diff = sum_absolute_difference(prob, label)
        sum_abs_diff *= eval_mask
        sum_abs_diff = tf.divide(tf.reduce_sum(sum_abs_diff),
                                 n_non_void_samples,
                                 name='sum_abs_diff')
        add_moving_summary(sum_abs_diff)

        ## confusion matrix for iou and pixel level accuracy
        int_pred = tf.argmax(logits, 1, name='int_pred')
        int_label = tf.argmax(label, 1, name='int_label')
        cm = tf.confusion_matrix(labels=int_label, predictions=int_pred,\
            num_classes=self.num_classes, name='confusion_matrix', weights=eval_mask)

        ## pixel level accuracy
        accu = tf.divide(tf.cast(tf.reduce_sum(tf.diag_part(cm)), dtype=tf.float32), \
                         n_non_void_samples, name='accuracy')
        add_moving_summary(accu)
        return logits, cross_entropy
예제 #33
0
    def train(self):
        """
        This methods builds and trains the current model.
        """

        self.logger.info("train model")

        tf.reset_default_graph()

        # define placeholder
        x = tf.placeholder('float32', [None, 10])
        y = tf.placeholder('float32', [None, 10])
        lambda_val = tf.placeholder('float32', [1, 1])

        # build encoder
        z_mu = self.__build_encoder(x, self.hidden_dim)

        # parametrize sparsity layer
        ada = tf.matmul(tf.transpose(z_mu), z_mu) * (1.0 / self.batch_size)
        a_dp_a = tf.diag_part(ada)
        z_ls2 = tf.log(a_dp_a + 1)

        # calc z
        eps = tf.random_normal((self.batch_size, self.hidden_dim),
                               0,
                               1,
                               dtype=tf.float32)  # Adding a random number
        z = tf.add(z_mu, eps)

        # build decoder
        y_hat, y_ls2 = self.__build_decoder(z, 10)

        # define loss
        reconstr_loss = lambda_val * tf.reduce_sum(
            0.5 * y_ls2 + (tf.square(y - y_hat) / (2.0 * tf.exp(y_ls2))), 1)
        latent_loss = 0.5 * tf.reduce_sum(z_ls2)
        total_loss = tf.reduce_mean(reconstr_loss) + latent_loss

        # define optimizer
        optimizer = tf.train.AdamOptimizer(self.learning_rate,
                                           epsilon=1e-8).minimize(total_loss)

        # run training
        with tf.Session() as session:
            session.run(tf.global_variables_initializer())

            # init data iterator
            number_of_iterations = 70000

            itx = list()
            ity = list()
            h_y = list()
            nzn = list()
            lambda_list = list()
            latent_list = list()
            lambda_value = 0.4

            # run training procedure
            for epoch in tqdm(range(number_of_iterations)):

                # sample new batch
                x_batch, y_batch, y_orig_batch = ArtificialDataIterator.next_batch(
                    self.batch_size, self.doTransform)

                # run training
                _, loss, ll, rl, sparse_matrix, y_mu, zmu = session.run(
                    (optimizer, total_loss, latent_loss, reconstr_loss, a_dp_a,
                     y_hat, z_mu),
                    feed_dict={
                        x: x_batch,
                        y: y_batch,
                        lambda_val: np.asarray([[lambda_value]])
                    })

                if (epoch % 500 == 0 and epoch > 0):

                    # if latent loss higher 0.1
                    if (np.mean(ll) > 1e-1):

                        # save MI(x,z)
                        itx.append(np.mean(ll))
                        lambda_list.append(lambda_value)
                        latent_list.append(zmu)

                        #calc empirical Y entropy
                        entropy = np.mean(np.absolute(np.asarray(h_y)))

                        print("Cost: %.2f, I(x,t): %.4f, I(t,y): %4f" %
                              (loss, np.mean(ll),
                               -(np.mean(rl) / lambda_value) - entropy))

                        # save MI (z,y)
                        ity.append(-(np.mean(rl) / lambda_value) - entropy)

                        # save size of used latent dimensions
                        num_latent_dim = len([
                            i for i, v in enumerate(sparse_matrix) if v > 0.25
                        ])
                        nzn.append(num_latent_dim)

                        mi_x_t = np.asarray(itx)
                        mi_t_y = np.asarray(ity)
                        nzn_array = np.asarray(nzn)

                        nbins = int(min(12, max(1, np.floor(len(mi_x_t) / 3))))
                        breaks = np.linspace(0.99 * min(mi_x_t), max(mi_x_t),
                                             nbins + 1)

                        xl = list()
                        yl = list()
                        yl_means = list()

                        nzn_list = list()

                        kc = 0

                        for k in range(nbins):
                            matchings_indices = [
                                i for i, item in enumerate(mi_x_t)
                                if item > breaks[k] and item < breaks[k + 1]
                            ]
                            # if more than 3 MI -> create new bin
                            if len(matchings_indices) > 3:
                                xl.append(np.mean(mi_x_t[matchings_indices]))
                                yl.append(mi_t_y[matchings_indices])
                                yl_means.append(
                                    np.median(mi_t_y[matchings_indices]))

                                nzn_list.append(
                                    np.min(nzn_array[matchings_indices]))
                                kc += 1
                    else:
                        # collect mutual information in order to calculate the empirical entropy of Y
                        h_y.append(-(np.mean(rl) / lambda_value))

                    # increase compression parameter lambda
                    lambda_value = lambda_value * 1.06

            IOTools.save_to_file(
                (yl_means, yl, xl, sparse_matrix, nzn_list, nzn, ity, itx,
                 y_orig_batch, zmu,
                 Transformation.UniformToOrig(y_orig_batch,
                                              y_mu), lambda_list, latent_list),
                self.dump_path)
    def __init__(self, n_input, kernel_size, n_hidden, reg_constant1 = 1.0, re_constant2 = 1.0, batch_size = 100, reg = None, \
                denoise = False, model_path = None, restore_path = None, \
                logs_path = './logs', num_modalities=2):
        self.n_input = n_input
        self.kernel_size = kernel_size
        self.n_hidden = n_hidden
        self.batch_size = batch_size
        self.reg = reg
        self.model_path = model_path
        self.restore_path = restore_path
        self.iter = 0
        self.num_modalities =num_modalities
        weights = self._initialize_weights()
        self.x={}
        
        #input required to be fed
        for i in range(0, self.num_modalities):
            modality = str(i)
            self.x[modality] = tf.placeholder(tf.float32, [None, self.n_input[0], self.n_input[1], 1])

        self.learning_rate = tf.placeholder(tf.float32, [],
                                        name='learningRate')
        

        
        if denoise == False:
            x_input = self.x
            latents, shape = self.encoder(x_input,weights,self.num_modalities)

        Coef = weights['Coef']
        Coef = Coef - tf.diag(tf.diag_part(Coef))
        self.Coef = Coef
        z={}
        z_c={}
        latent_c={}
        for i in range(0, self.num_modalities):
            modality = str(i)
            z[modality] = tf.reshape(latents[modality], [batch_size, -1])
            z_c[modality] = tf.matmul(Coef,z[modality])
            latent_c[modality] = tf.reshape(z_c[modality], tf.shape(latents[modality]))


        self.z = z
        self.z_c =z_c

        self.x_r = self.decoder(latent_c, weights, self.num_modalities, shape)

        # l_2 reconstruction loss

        self.reconst_cost_x =  0.6*tf.reduce_sum(tf.pow(tf.subtract(self.x['0'], self.x_r['0']), 2.0))
        for i in range(1, self.num_modalities):
            modality = str(i)
            self.reconst_cost_x = self.reconst_cost_x +  0.1*tf.reduce_sum(tf.pow(tf.subtract(self.x[modality],
                                                                                               self.x_r[modality]), 2.0))



        tf.summary.scalar("recons_loss", self.reconst_cost_x)
                
        self.reg_losses = tf.reduce_sum(tf.pow(self.Coef,2.0))
        
        tf.summary.scalar("reg_loss", reg_constant1 * self.reg_losses )


        self.selfexpress_losses =  0.3*tf.reduce_sum(tf.pow(tf.subtract(self.z['0'], self.z_c['0']), 2.0))
        for i in range(1, self.num_modalities):
            modality = str(i)
            self.selfexpress_losses = self.selfexpress_losses +  0.05*tf.reduce_sum(tf.pow(tf.subtract(self.z[modality],
                                                                                               self.z_c[modality]), 2.0))



        tf.summary.scalar("selfexpress_loss", re_constant2 * self.selfexpress_losses )
        
        self.loss = self.reconst_cost_x + reg_constant1 * self.reg_losses + re_constant2 * self.selfexpress_losses
        
        self.merged_summary_op = tf.summary.merge_all()
        self.optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate).minimize(self.loss) #GradientDescentOptimizer #AdamOptimizer
        
        self.init = tf.global_variables_initializer()
        tfconfig = tf.ConfigProto(allow_soft_placement=True)
        tfconfig.gpu_options.allow_growth = True
        self.sess = tf.InteractiveSession(config=tfconfig)
        self.sess.run(self.init)
        self.saver = tf.train.Saver([v for v in tf.trainable_variables() if not (v.name.startswith("Coef"))])
        self.summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
예제 #35
0
    def __init__(self, is_training, word_embeddings, simple_position=False):
        NN.__init__(self, is_training, word_embeddings, simple_position)
        with tf.name_scope("conv-maxpool"):
            mask_embedding = tf.constant(
                [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
            pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask)
            input_sentence = tf.expand_dims(self.input_embedding, axis=1)
            x = tf.layers.conv2d(inputs=input_sentence,
                                 filters=FLAGS.hidden_size,
                                 kernel_size=[1, 3],
                                 strides=[1, 1],
                                 padding='same',
                                 kernel_initializer=tf.contrib.layers.
                                 xavier_initializer_conv2d())
            x = tf.reshape(x, [-1, self.max_length, FLAGS.hidden_size, 1])
            x = tf.reduce_max(
                tf.reshape(pcnn_mask, [-1, 1, self.max_length, 3]) *
                tf.transpose(x, [0, 2, 1, 3]),
                axis=2)
            x = tf.nn.relu(tf.reshape(x, [-1, self.output_size]))

        if FLAGS.katt_flag != 0:
            stack_repre = self.katt(x, is_training)
        else:
            stack_repre = self.att(x, is_training)

        with tf.name_scope("loss"):
            logits = tf.matmul(stack_repre, tf.transpose(
                self.relation_matrix)) + self.bias
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=self.label,
                                                        logits=logits))
            self.loss = tf.losses.softmax_cross_entropy(
                onehot_labels=self.label, logits=logits, weights=self.weights)
            self.output = tf.nn.softmax(logits)
            tf.summary.scalar('loss', self.loss)
            self.predictions = tf.argmax(logits, 1, name="predictions")
            self.correct_predictions = tf.equal(self.predictions,
                                                tf.argmax(self.label, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions,
                                                   "float"),
                                           name="accuracy")

        if not is_training:
            with tf.name_scope("test"):
                if FLAGS.katt_flag != 0:
                    test_attention_logit = self.katt_test(x)
                else:
                    test_attention_logit = self.att_test(x)
                test_tower_output = []
                for i in range(FLAGS.test_batch_size):
                    test_attention_score = tf.nn.softmax(
                        tf.transpose(test_attention_logit[
                            self.scope[i]:self.scope[i + 1], :]))
                    final_repre = tf.matmul(test_attention_score,
                                            x[self.scope[i]:self.scope[i + 1]])
                    logits = tf.matmul(final_repre,
                                       tf.transpose(relation_matrix)) + bias
                    output = tf.diag_part(tf.nn.softmax(logits))
                    test_tower_output.append(output)
                test_stack_output = tf.reshape(
                    tf.stack(test_tower_output),
                    [FLAGS.test_batch_size, self.num_classes])
                self.test_output = test_stack_output
def train_main(hparams):
    """
    Main training routine for the dot semantic network bot
    :return:
    """

    # -----------------------
    # INIT EXPERIMENT
    # ----------------------
    exp = Experiment(name=hparams.exp_name,
                     debug=hparams.debug,
                     description=hparams.exp_desc,
                     autosave=False,
                     save_dir=hparams.test_tube_dir)

    exp.add_argparse_meta(hparams)
    exp.save()

    # -----------------------
    # LOAD DATASET
    # ----------------------
    udc_dataset = UDCDataset(vocab_path=hparams.vocab_path,
                             train_path=hparams.dataset_train_path,
                             test_path=hparams.dataset_test_path,
                             val_path=hparams.dataset_val_path,
                             max_seq_len=hparams.max_seq_len)

    # -----------------------
    # INIT TF VARS
    # ----------------------
    # input_x holds chat history
    # input_y holds our responses
    # labels holds the ground truth labels
    input_x = tf.placeholder(
        dtype=tf.int32, shape=[hparams.batch_size, None], name='input_x')
    input_y = tf.placeholder(
        dtype=tf.int32, shape=[hparams.batch_size, None], name='input_y')

    # ----------------------
    # EMBEDDING LAYER
    # ----------------------
    # you can preload your own or learn in the network
    # in this case we'll just learn it in the network
    embedding = tf.get_variable('embedding',
        [udc_dataset.vocab_size, hparams.embedding_dim])

    # ----------------------
    # RESOLVE EMBEDDINGS
    # ----------------------
    # Lookup the embeddings.
    embedding_x = tf.nn.embedding_lookup(embedding, input_x)
    embedding_y = tf.nn.embedding_lookup(embedding, input_y)

    # Generates 1 vector per training example.
    x = tf.reduce_sum(embedding_x, axis=1)
    y = tf.reduce_sum(embedding_y, axis=1)

    # ----------------------
    # OPTIMIZATION PROBLEM
    # ----------------------
    S = dot_product_scoring(x, y, is_training=True)
    K = tf.reduce_logsumexp(S, axis=1)
    loss = -tf.reduce_mean(tf.diag_part(S) - K)

    # allow optimizer to be changed through hyper params
    optimizer = get_optimizer(hparams=hparams, minimize=loss)

    # ----------------------
    # TF ADMIN (VAR INIT, SESS)
    # ----------------------
    sess = tf.Session()
    init_vars = tf.global_variables_initializer()
    sess.run(init_vars)

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    # ----------------------
    # TRAINING ROUTINE
    # ----------------------
    # admin vars
    nb_batches_served = 0
    eval_every_n_batches = hparams.eval_every_n_batches

    train_err = 1000
    prec_at_1 = 0
    prec_at_2 = 0

    # iter for the needed epochs
    print('\n\n', '-'*100,'\n  {} TRAINING\n'.format(hparams.exp_name.upper()), '-'*100, '\n\n')
    for epoch in range(hparams.nb_epochs):
        print('training epoch:', epoch + 1)
        progbar = Progbar(target=udc_dataset.nb_tng, width=50)
        train_gen = udc_dataset.train_generator(batch_size=hparams.batch_size, max_epochs=1)

        # mini batches
        for batch_context, batch_utterance in train_gen:
            feed_dict = {
                input_x: batch_context,
                input_y: batch_utterance
            }

            # OPT: run one step of optimization
            optimizer.run(session=sess, feed_dict=feed_dict)
            # update loss metrics
            if nb_batches_served % eval_every_n_batches == 0:

                # calculate test error
                train_err = loss.eval(session=sess, feed_dict=feed_dict)
                prec_at_1 = test_precision_at_k(S, feed_dict, k=1, sess=sess)
                prec_at_2 = test_precision_at_k(S, feed_dict, k=2, sess=sess)

                # update prog bar
                exp.add_metric_row({'tng loss': train_err, 'P@1': prec_at_1, 'P@2': prec_at_2})

            nb_batches_served += 1

            progbar.add(n=len(batch_context), values=[('train_err', train_err),
                                                      ('P@1', prec_at_1),
                                                      ('P@2', prec_at_2)])

        # ----------------------
        # END OF EPOCH PROCESSING
        # ----------------------
        # calculate the val loss
        print('\nepoch complete...\n')
        check_val_stats(loss, S, udc_dataset, hparams, input_x, input_y, exp, sess, epoch)

        # save model
        save_model(saver=saver, hparams=hparams, sess=sess, epoch=epoch)

        # save exp data
        exp.save()

    tf.reset_default_graph()
예제 #37
0
    def build_model(self):
        """Defines the GP model.

    The loss is computed for partial feedback settings (bandits), so only
    the observed outcome is backpropagated (see weighted loss).
    Selects the optimizer and, finally, it also initializes the graph.
    """

        logging.info("Initializing model %s.", self.name)
        self.global_step = tf.train.get_or_create_global_step()

        # Define state for the model (inputs, etc.)
        self.x_train = tf.get_variable(
            "training_data",
            initializer=tf.ones([self.hparams.batch_size, self.n_in],
                                dtype=tf.float64),
            validate_shape=False,
            trainable=False)
        self.y_train = tf.get_variable("training_labels",
                                       initializer=tf.zeros(
                                           [self.hparams.batch_size, 1],
                                           dtype=tf.float64),
                                       validate_shape=False,
                                       trainable=False)
        self.weights_train = tf.get_variable(
            "weights_train",
            initializer=tf.ones([self.hparams.batch_size, self.n_out],
                                dtype=tf.float64),
            validate_shape=False,
            trainable=False)
        self.input_op = tf.assign(self.x_train,
                                  self.x_in,
                                  validate_shape=False)
        self.input_w_op = tf.assign(self.weights_train,
                                    self.weights,
                                    validate_shape=False)

        self.input_std = tf.get_variable("data_standard_deviation",
                                         initializer=tf.ones([1, self.n_out],
                                                             dtype=tf.float64),
                                         dtype=tf.float64,
                                         trainable=False)
        self.input_mean = tf.get_variable("data_mean",
                                          initializer=tf.zeros(
                                              [1, self.n_out],
                                              dtype=tf.float64),
                                          dtype=tf.float64,
                                          trainable=True)

        # GP Hyperparameters
        self.noise = tf.get_variable("noise",
                                     initializer=tf.cast(0.0,
                                                         dtype=tf.float64))
        self.amplitude = tf.get_variable("amplitude",
                                         initializer=tf.cast(1.0,
                                                             dtype=tf.float64))
        self.amplitude_linear = tf.get_variable("linear_amplitude",
                                                initializer=tf.cast(
                                                    1.0, dtype=tf.float64))
        self.length_scales = tf.get_variable("length_scales",
                                             initializer=tf.zeros(
                                                 [1, self.n_in],
                                                 dtype=tf.float64))
        self.length_scales_lin = tf.get_variable("length_scales_linear",
                                                 initializer=tf.zeros(
                                                     [1, self.n_in],
                                                     dtype=tf.float64))

        # Latent embeddings of the different outputs for task covariance
        self.task_vectors = tf.get_variable(
            "latent_task_vectors",
            initializer=tf.random_normal([self.n_out, self.task_latent_dim],
                                         dtype=tf.float64))

        # Normalize outputs across each dimension
        # Since we have different numbers of observations across each task, we
        # normalize by their respective counts.
        index_counts = self.atleast_2d(tf.reduce_sum(self.weights, axis=0),
                                       self.n_out)
        index_counts = tf.where(
            index_counts > 0, index_counts,
            tf.ones(tf.shape(index_counts), dtype=tf.float64))
        self.var_op = tf.assign(
            self.input_std,
            tf.sqrt(1e-4 + tf.reduce_sum(tf.square(
                self.y - tf.reduce_sum(self.y, axis=0) / index_counts),
                                         axis=0) / index_counts))

        with tf.control_dependencies([self.var_op]):
            y_normed = self.atleast_2d(
                (self.y - self.input_mean) / self.input_std, self.n_out)
            y_normed = self.atleast_2d(
                tf.boolean_mask(y_normed, self.weights > 0), 1)
        self.out_op = tf.assign(self.y_train, y_normed, validate_shape=False)

        # Observation noise
        self.alpha = tf.nn.softplus(self.noise) + 1e-6

        # Covariance
        with tf.control_dependencies(
            [self.input_op, self.input_w_op, self.out_op]):
            self.self_cov = (
                self.cov(self.x_in, self.x_in) *
                self.task_cov(self.weights, self.weights) +
                tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64) * self.alpha)

        self.chol = tf.cholesky(self.self_cov)
        self.kinv = tf.cholesky_solve(
            self.chol, tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64))

        self.input_inv = tf.Variable(tf.eye(self.hparams.batch_size,
                                            dtype=tf.float64),
                                     validate_shape=False,
                                     trainable=False)
        self.input_cov_op = tf.assign(self.input_inv,
                                      self.kinv,
                                      validate_shape=False)

        # Log determinant by taking the singular values along the diagonal
        # of self.chol
        with tf.control_dependencies([self.input_cov_op]):
            logdet = 2.0 * tf.reduce_sum(
                tf.log(tf.diag_part(self.chol) + 1e-16))

        # Log Marginal likelihood
        self.marginal_ll = -tf.reduce_sum(
            -0.5 *
            tf.matmul(tf.transpose(y_normed), tf.matmul(self.kinv, y_normed)) -
            0.5 * logdet - 0.5 * self.n * np.log(2 * np.pi))

        zero = tf.cast(0., dtype=tf.float64)
        one = tf.cast(1., dtype=tf.float64)
        standard_normal = tfd.Normal(loc=zero, scale=one)

        # Loss is marginal likelihood and priors
        self.loss = tf.reduce_sum(self.marginal_ll - (
            standard_normal.log_prob(self.amplitude) +
            standard_normal.log_prob(tf.exp(self.noise)) +
            standard_normal.log_prob(self.amplitude_linear) +
            tfd.Normal(loc=zero, scale=one * 10.).log_prob(self.task_vectors)))

        # Optimizer for hyperparameters
        optimizer = tf.train.AdamOptimizer(learning_rate=self.hparams.lr)
        vars_to_optimize = [
            self.amplitude, self.length_scales, self.length_scales_lin,
            self.amplitude_linear, self.noise, self.input_mean
        ]

        if self.learn_embeddings:
            vars_to_optimize.append(self.task_vectors)
        grads = optimizer.compute_gradients(self.loss, vars_to_optimize)
        self.train_op = optimizer.apply_gradients(grads,
                                                  global_step=self.global_step)

        # Predictions for test data
        self.y_mean, self.y_pred = self.posterior_mean_and_sample(self.x)

        # create tensorboard metrics
        self.create_summaries()
        self.summary_writer = tf.summary.FileWriter(
            "{}/graph_{}".format(FLAGS.logdir, self.name), self.sess.graph)
        self.check = tf.add_check_numerics_ops()
예제 #38
0
    def DoOneRun(self,
                 run_id,
                 rf_number,
                 nn_replication,
                 prefix='',
                 seed=0,
                 batch_count=1):
        batch_size = self.config.batch_size

        self.config.rf_number = rf_number
        self.config.rf_file_name = ('features_' + prefix + '_' +
                                    str(rf_number) + '_' + str(run_id) +
                                    '.pkl')
        srf = rf.GenerateOrLoadRF(self.config, seed=run_id + 2718281828 + seed)

        if isinstance(nn_replication, (list, tuple)):
            self.skeleton.SetReplication(nn_replication)
        else:
            self.skeleton.SetReplication(
                [int(x * nn_replication) for x in self.original_replication])
        with tf.Graph().as_default(), tf.Session('') as sess:
            examples = self.get_inputs(batch_size)

            # Calculate the exact gram matrix for the batch
            gram = tf.reshape(kf.Kernel(self.skeleton, examples, examples),
                              [batch_size, batch_size])

            # Calculate the approximate gram matrix using a neural net
            rep, _ = NN.NeuralNet(self.skeleton, self.config, examples)
            srep = tf.squeeze(rep)
            approx_gram = tf.matmul(srep, tf.transpose(srep))

            # Normalize the approximate gram matrix to so that the norm of
            # each element is 1.
            norms = tf.reshape(tf.sqrt(tf.diag_part(approx_gram)), [-1, 1])
            nn_gram = tf.div(approx_gram, tf.matmul(norms,
                                                    tf.transpose(norms)))

            # Compute the approximate gram matrix using random features
            parameters = tf.constant(
                np.zeros((rf_number,
                          self.config.number_of_classes)).astype(np.float32))
            rand_features = tf.SparseTensor(srf.features[0], srf.features[1],
                                            srf.features[2])
            _, rf_vectors = rf.RandomFeaturesGraph(
                self.skeleton, self.config.number_of_classes, examples,
                rf_number, rand_features, parameters, srf.weights)
            rf_gram = tf.matmul(rf_vectors, rf_vectors, transpose_b=True)
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess, coord)
            RF_K_stat = Stat()
            NN_K_stat = Stat()
            for i in xrange(batch_count):
                gram_np, nn_gram_np, rf_gram_np, approx_gram_np = sess.run(
                    [gram, nn_gram, rf_gram, approx_gram])
                RF_K_stat.AddToStat(gram_np, rf_gram_np)
                NN_K_stat.AddToStat(gram_np, nn_gram_np)
            coord.request_stop()
            coord.join(threads)
            return NN_K_stat, RF_K_stat
예제 #39
0
    def init_issue(self, Xtrain, Ytrain, Xtest=None, Ytest=None):
        size_temp, dim = Xtrain.shape
        flag_test_exists = False
        size_test = None
        self.ph.real_size = size_temp
        if (Xtest is not None) and (Ytest is not None):
            flag_test_exists = True
            size_test = Xtest.shape[0]
            self.ph.use_test = True
            self.ph.X_test_comp = Xtest
            self.ph.Y_test_comp = Ytest
            self.ph.real_size = size_test
            self.ph_tf.SubXTest = tf.constant(Xtest, dtype=tf.float64)
            self.ph_tf.SubYTest = tf.constant(Ytest, dtype=tf.float64)

        if self.batch_size < size_temp:
            self.ph_tf.SubXTrain = tf.placeholder(dtype=tf.float64,
                                                  shape=(self.batch_size, dim))
            self.ph_tf.SubYTrain = tf.placeholder(dtype=tf.float64,
                                                  shape=(self.batch_size, ))
        else:
            self.ph.full_batch = True
            self.batch_size = size_temp
            self.ph_tf.SubXTrain = tf.constant(Xtrain, dtype=tf.float64)
            self.ph_tf.SubYTrain = tf.constant(Ytrain, dtype=tf.float64)
        if self.ph.r_ww is None:
            ww_init = np.zeros(dim)
        else:
            ww_init = self.ph.r_ww
        self.ph_tf.ww_ = tf.Variable(ww_init, dtype=tf.float64)
        self.ph_tf.w_ = tf.nn.softmax(self.ph_tf.ww_)
        self.ph_tf.ws_ = tf.reshape(tf.sqrt(self.ph_tf.w_), (-1, 1))
        self.ph_tf.WS_ = tf.matmul(self.ph_tf.ws_,
                                   self.ph_tf.ws_,
                                   transpose_b=True)
        if self.ph.r_WMMMM is None:
            WMMMM_init = np.zeros([dim, dim])
        else:
            WMMMM_init = self.ph.r_WMMMM
        self.ph_tf.WMMMM_ = tf.Variable(WMMMM_init, dtype=tf.float64)
        self.ph_tf.WMMM_ = (tf.sigmoid(self.ph_tf.WMMMM_) - 0.5) * 2
        self.ph_tf.WMM_ = (self.ph_tf.WMMM_ +
                           tf.transpose(self.ph_tf.WMMM_)) / 2
        self.ph_tf.WM_ = self.ph_tf.WMM_ - tf.diag(
            tf.diag_part(self.ph_tf.WMM_)) + tf.diag(np.ones(dim))

        # self.pp_ = tf.Variable(1, dtype=tf.float64)
        # self.p_ = tf.sigmoid(self.pp_) + 1
        # self.p_ = tf.pow(self.pp_, 2) + 0.01

        self.ph_tf.m_ = self.ph_tf.WS_ * self.ph_tf.WM_
        # self.m_ = tf.diag(tf.nn.softmax(tf.diag_part((self.mm_ + tf.transpose(self.mm_)) / 2)))

        if flag_test_exists:
            self.ph_tf.Ad = tf.reduce_sum(
                tf.matmul(self.ph_tf.SubXTrain, self.ph_tf.m_) *
                self.ph_tf.SubXTrain,
                axis=1)
            self.ph_tf.Bd = tf.reduce_sum(
                tf.matmul(self.ph_tf.SubXTest, self.ph_tf.m_) *
                self.ph_tf.SubXTest,
                axis=1)
            self.ph_tf.AD = tf.tile(tf.reshape(self.ph_tf.Ad, (-1, 1)),
                                    [1, size_test])
            self.ph_tf.BD = tf.tile(tf.reshape(self.ph_tf.Bd, (1, -1)),
                                    [self.batch_size, 1])
            self.ph_tf.AM = tf.matmul(tf.matmul(self.ph_tf.SubXTrain,
                                                self.ph_tf.m_),
                                      self.ph_tf.SubXTest,
                                      transpose_b=True)
            self.ph_tf.DistP = self.ph_tf.AD + self.ph_tf.BD - 2 * self.ph_tf.AM
        else:
            self.ph_tf.AM = tf.matmul(tf.matmul(self.ph_tf.SubXTrain,
                                                self.ph_tf.m_),
                                      self.ph_tf.SubXTrain,
                                      transpose_b=True)
            self.ph_tf.Ad = tf.diag_part(self.ph_tf.AM)
            self.ph_tf.AD = tf.tile(tf.reshape(self.ph_tf.Ad, (1, -1)),
                                    [self.batch_size, 1])
            self.ph_tf.DistP = self.ph_tf.AD + tf.transpose(
                self.ph_tf.AD) - 2 * self.ph_tf.AM

        self.ph_tf.Dist = tf.cast(self.ph_tf.DistP, tf.float64)
        # self.Dist = tf.pow(self.DistP, self.p_)
        # self.Dist = self.AD + tf.transpose(self.AD) - 2 * self.AM
        if self.ph.r_KN is None:
            init_kn = 1
        else:
            init_kn = self.ph.r_KN
        # self.KN_base = tf.Variable(initial_value=np.log(init_kn), dtype=tf.float64)
        # self.KN = tf.exp(self.KN_base)
        self.ph_tf.KN = tf.Variable(initial_value=init_kn, dtype=tf.float64)
        self.ph_tf.DistR = self.ph_tf.Dist * self.ph_tf.KN * self.ph.KN0
        if not flag_test_exists:
            self.ph_tf.DistR += tf.cast(tf.diag([np.inf] * self.batch_size),
                                        dtype=tf.float64)

        if self.cal_dist_mode == 0:
            self.ph_tf.DistRR = -self.ph_tf.DistR
        elif self.cal_dist_mode == 1:
            self.ph_tf.DistRR = tf.reciprocal(self.ph_tf.DistR)
            if not flag_test_exists:
                self.ph_tf.DistR -= tf.cast(tf.diag([np.inf] *
                                                    self.batch_size),
                                            dtype=tf.float64)
        elif self.cal_dist_mode == 2:
            self.ph_tf.DistRR = tf.sigmoid(-self.ph_tf.DistR)
        else:
            self.ph_tf.DistRR = tf.reciprocal(
                self.ph_tf.DistR) - self.ph_tf.DistR

        self.ph_tf.IMatch = tf.nn.softmax(self.ph_tf.DistRR, axis=0)
        # self.ph_tf.IMatch = tf.nn.softmax(self.ph_tf.DistRR)
        SubYTrain_vec = tf.reshape(self.ph_tf.SubYTrain, (1, -1))
        self.ph_tf.Y_predict = tf.matmul(SubYTrain_vec, self.ph_tf.IMatch)
        if flag_test_exists:
            self.ph_tf.Y_compare = tf.reshape(self.ph_tf.SubYTest, (1, -1))
        else:
            self.ph_tf.Y_compare = SubYTrain_vec
        self.ph_tf.loss = tf.nn.l2_loss(
            tf.subtract(self.ph_tf.Y_compare, self.ph_tf.Y_predict))
        self.ph_tf.my_loss = tf.sqrt(
            tf.reduce_mean(
                tf.square(self.ph_tf.Y_compare - self.ph_tf.Y_predict)))
        self.ph_tf.reg_term = tf.reduce_sum(self.ph_tf.m_) * self.ph.reg_alpha
        self.ph_tf.op_tar = self.ph_tf.loss + self.ph_tf.reg_term
        self.ph_tf.optimizer = tf.train.AdamOptimizer(
            **self.ph.dict_para_optimizer)
        # self.optimizer = tf.train.AdamOptimizer(learning_rate=10, beta1=0.5, beta2=0.8, epsilon=1e-8)
        # self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=1,)
        self.ph_tf.train = self.ph_tf.optimizer.minimize(self.ph_tf.op_tar)
        self.ph_tf.init_op = tf.global_variables_initializer()
예제 #40
0
 def test_trace_KiX_against_solve(self):
     B = np.random.randn(self.N, self.N)
     tr_AiB_tf = self.session.run(self.mat.trace_KiX(B), self.feed)
     tr_AiB_tf2 = self.session.run(
         tf.reduce_sum(tf.diag_part(self.mat.solve(B))), self.feed)
     self.assertTrue(np.allclose(tr_AiB_tf, tr_AiB_tf2))
예제 #41
0
    def logpdf(self, x, mean=None, cov=1):
        """Log of the probability density function.

        Parameters
        ----------
        x : tf.Tensor
            A 1-D or 2-D tensor.
        mean : tf.Tensor, optional
            A 1-D tensor. Defaults to zero mean.
        cov : tf.Tensor, optional
            A 1-D or 2-D tensor. Defaults to identity matrix.

        Returns
        -------
        tf.Tensor
            A tensor of one dimension less than the input.
        """
        x = tf.cast(x, dtype=tf.float32)
        x_shape = get_dims(x)
        if len(x_shape) == 1:
            d = x_shape[0]
        else:
            d = x_shape[1]

        if mean is None:
            r = x
        else:
            mean = tf.cast(mean, dtype=tf.float32)
            r = x - mean

        if cov is 1:
            L_inv = tf.diag(tf.ones([d]))
            det_cov = tf.constant(1.0)
        else:
            cov = tf.cast(cov, dtype=tf.float32)
            if len(cov.get_shape()) == 1:  # vector
                L_inv = tf.diag(1.0 / tf.sqrt(cov))
                det_cov = tf.reduce_prod(cov)
            else:  # matrix
                L = tf.cholesky(cov)
                L_inv = tf.matrix_inverse(L)
                det_cov = tf.pow(tf.reduce_prod(tf.diag_part(L)), 2)

        lps = -0.5 * d * tf.log(2 * np.pi) - 0.5 * tf.log(det_cov)
        if len(x_shape) == 1:  # vector
            r = tf.reshape(r, shape=(d, 1))
            inner = tf.matmul(L_inv, r)
            lps -= 0.5 * tf.matmul(inner, inner, transpose_a=True)
            return tf.squeeze(lps)
        else:  # matrix
            # TODO vectorize further
            out = []
            for r_vec in tf.unpack(r):
                r_vec = tf.reshape(r_vec, shape=(d, 1))
                inner = tf.matmul(L_inv, r_vec)
                out += [
                    tf.squeeze(lps -
                               0.5 * tf.matmul(inner, inner, transpose_a=True))
                ]

            return tf.pack(out)
예제 #42
0
 def bpr(self, yhat):
     yhatT = tf.transpose(yhat)
     return tf.reduce_mean(
         -tf.log(tf.nn.sigmoid(tf.diag_part(yhat) - yhatT)))
예제 #43
0
 def cross_entropy(self, yhat):
     # tf.diag_part取出对角线的值
     return tf.reduce_mean(-tf.log(tf.diag_part(yhat) + 1e-24))
예제 #44
0
    def __init__(self,
                 is_training,
                 word_embeddings,
                 cell_name,
                 simple_position=False):
        NN.__init__(self, is_training, word_embeddings, simple_position)
        input_sentence = tf.layers.dropout(self.input_embedding,
                                           rate=self.keep_prob,
                                           training=is_training)
        with tf.name_scope('bi-rnn'):
            fw_cell = self.get_rnn_cell(FLAGS.hidden_size, cell_name)
            bw_cell = self.get_rnn_cell(FLAGS.hidden_size, cell_name)
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                input_sentence,
                sequence_length=self.len,
                dtype=tf.float32,
                scope='bi-dynamic-rnn')
            fw_states, bw_states = states
            if isinstance(fw_states, tuple):
                fw_states = fw_states[0]
                bw_states = bw_states[0]
            x = tf.concat(states, axis=1)

        if FLAGS.katt_flag != 0:
            stack_repre = self.katt(x, is_training, False)
        else:
            stack_repre = self.att(x, is_training, False)

        with tf.name_scope("loss"):
            logits = tf.matmul(stack_repre, tf.transpose(
                self.relation_matrix)) + self.bias
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=self.label,
                                                        logits=logits))
            self.loss = tf.losses.softmax_cross_entropy(
                onehot_labels=self.label, logits=logits, weights=self.weights)
            self.output = tf.nn.softmax(logits)
            tf.summary.scalar('loss', self.loss)
            self.predictions = tf.argmax(logits, 1, name="predictions")
            self.correct_predictions = tf.equal(self.predictions,
                                                tf.argmax(self.label, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions,
                                                   "float"),
                                           name="accuracy")

        if not is_training:
            with tf.name_scope("test"):
                if FLAGS.katt_flag != 0:
                    test_attention_logit = self.katt_test(x)
                else:
                    test_attention_logit = self.att_test(x)
                test_tower_output = []
                for i in range(FLAGS.test_batch_size):
                    test_attention_score = tf.nn.softmax(
                        tf.transpose(test_attention_logit[
                            self.scope[i]:self.scope[i + 1], :]))
                    final_repre = tf.matmul(test_attention_score,
                                            x[self.scope[i]:self.scope[i + 1]])
                    logits = tf.matmul(final_repre,
                                       tf.transpose(relation_matrix)) + bias
                    output = tf.diag_part(tf.nn.softmax(logits))
                    test_tower_output.append(output)
                test_stack_output = tf.reshape(
                    tf.stack(test_tower_output),
                    [FLAGS.test_batch_size, self.num_classes])
                self.test_output = test_stack_output
예제 #45
0
def mu_tilde_square(X_data, Z, S, m, Kzz_inv, a, g):

    # DEBUG:
    # Kzz_inv = tf.eye(tf.shape(Z)[0])
    '''
    N : num datapoints
    D : datapoint dimensionality
    M : number inducing points

    IN: 
    ---
    X_data   : (N, D)
    Z        : (M, D)
    S        : (M, M)
    m        : (M)
    K_zz_inv : (M, M)
    a        : (D)
    g        : ()

    OUT:
    ----
    mu      : (N)
    sig_sqr : (N)
    '''

    with tf.name_scope('K_ZX'):
        # k_zx : (M, N)
        k_zx = ard_kernel(Z, X_data, gamma=g, alphas=a)
    with tf.name_scope('K_XZ'):
        # k_xz : (N, M)
        k_xz = tf.transpose(k_zx, name='K_XZ')
    with tf.name_scope('K_XX'):
        # k_xx : (N, N)
        K_xx = ard_kernel(X_data, X_data, gamma=g, alphas=a)

    with tf.name_scope('kernel_matrices_summaries'):
        tf.summary.histogram('KZZ_inv', Kzz_inv)
        tf.summary.histogram('KZX', k_zx)
        tf.summary.histogram('KXX', K_xx)

    # mu = tf.matmul(tf.matmul(tf.transpose(tf.expand_dims(m,1)),Kzz_inv),k_zx, name='mu')

    # mu : (N, M)dot(M, M)dot(M) = (N)
    mu = tf.squeeze(
        tf.matmul(tf.matmul(k_xz, Kzz_inv), tf.expand_dims(m, 1), name='mu'))

    # sig_sqr : (N, N) - (N, M)dot(M,M)dot(M,N)

    with tf.name_scope('XX_variance'):
        middle = tf.diag_part(tf.matmul(tf.matmul(k_xz, Kzz_inv), k_zx))

        right = tf.diag_part(
            tf.matmul(
                tf.matmul(tf.matmul(tf.matmul(k_xz, Kzz_inv), S), Kzz_inv),
                k_zx))

        XX_cov = tf.diag_part(K_xx)
        sig_sqr = XX_cov - middle + right

    tf.summary.histogram('mean_at_datapoints', mu)
    tf.summary.histogram('variance_at_datapoints', sig_sqr)

    return mu, sig_sqr, [XX_cov, middle, right, k_zx]
예제 #46
0
 def trace_KiX(self, X):
     """
     X is a square matrix of the same size as this one.
     if self is K, compute tr(K^{-1} X)
     """
     return tf.reduce_sum(tf.diag_part(X) / self.d)
예제 #47
0
tf.reset_default_graph()

x_place = tf.placeholder(tf.float32,
                         shape=(None, x_array.shape[-1]),
                         name='x_place')
y_place = tf.placeholder(tf.float32,
                         shape=(None, y_array.shape[-1]),
                         name='y_place')

x_proj = dense_layer(x_place, units=NVECS, smoothness=X_SMOOTH, name='x_proj')
y_proj = dense_layer(y_place, units=NVECS, smoothness=Y_SMOOTH, name='y_proj')

#covar_mat = tf.matmul(tf.transpose((x_proj - y_proj)), (x_proj - y_proj))
covar_mat = tf.matmul(tf.transpose(x_proj), y_proj)
cca_loss = tf.reduce_sum(tf.diag_part(tf.abs(covar_mat)))
upper_loss = tf.reduce_sum(tf.matrix_band_part(tf.abs(covar_mat), 0, -1))
lower_loss = tf.reduce_sum(tf.matrix_band_part(tf.abs(covar_mat), -1, 0))
total_loss = -3. * cca_loss + upper_loss + lower_loss

## create optimizer and train op
#optimizer = tf.train.MomentumOptimizer(learning_rate=LEARN_RATE, momentum=0.9)
optimizer = tf.train.AdamOptimizer(learning_rate=LEARN_RATE)
train_op = optimizer.minimize(total_loss)

## create eval op
eval_op = tf_pearson_correlation(x_proj, y_proj)
## get weight clipping ops
#maxnorm_ops = tf.get_collection('maxnorm')
ortho_ops = tf.get_collection('ortho')
예제 #48
0
 def logdet(self):
     part1 = tf.reduce_sum(tf.log(self.d))
     I = tf.eye(tf.shape(self.W)[1], float_type)
     M = I + tf.matmul(tf.transpose(self.W) / self.d, self.W)
     part2 = 2 * tf.reduce_sum(tf.log(tf.diag_part(tf.cholesky(M))))
     return part1 + part2
예제 #49
0
파일: util.py 프로젝트: rikahoa/AutoGP
def log_cholesky_det(chol):
    return 2 * tf.reduce_sum(tf.log(tf.diag_part(chol)))
  def energy(x):
    """Unnormalized minus log density of 2d strongly correlated Gaussian."""

    xmmu = x - mu
    return .5 * tf.diag_part(
        tf.matmul(tf.matmul(xmmu, sigma_inv), tf.transpose(xmmu)))
예제 #51
0
def triangular_inv(L):
    eye = tf.diag(tf.ones_like(tf.diag_part(L)))
    invL = tf.matrix_triangular_solve(L, eye)
    return invL
예제 #52
0
    def build_variance_standard(self):
        print('build variance')
        num_test = self.x_test.get_shape().as_list()[0]
        total_sum = [0.0 for y in range(self.num_outputs)]
        r = self.r

        full_var_flag = False

        precomp_intermediate = [[] for x in range(self.num_components)]
        for l in range(self.num_components):
            x_test = tf.expand_dims(self.x_test, 1)  #N* x 1 x D
            mu_f, sigma_f, _, _ = self.sparsity._build_intermediate_conditionals(
                l, self.a, x_test, predict=not full_var_flag)
            mu_f, sigma_f = self.get_expected_values(mu_f, sigma_f)
            pi_l = self.q_weights[l]
            precomp_intermediate[l].append(pi_l)
            precomp_intermediate[l].append(mu_f)
            precomp_intermediate[l].append(sigma_f)

        if self.context.plot_posterior:
            noise_sigma = 0.0
        else:
            noise_sigma = tf.square(util.var_postive(self.sigma_y[0]))

        noise_sigma = tf.Print(
            noise_sigma,
            [noise_sigma,
             tf.square(util.var_postive(self.sigma_y[0]))], 'noise_sigma: ')
        noise_sigma = tf.Print(noise_sigma, [noise_sigma], 'noise_sigma: ')

        for k in range(self.num_components):
            #mu_f = [Q, N, 1]
            #mu_w = [Q, P, N, 1]

            x_test = tf.expand_dims(self.x_test, 1)  #N* x 1 x D
            #mu_f = #Q * N* x 1
            #sigma_f = #Q * N* x 1 x 1
            mu_f, sigma_f, _, _ = self.sparsity._build_intermediate_conditionals(
                k, self.a, x_test, predict=not full_var_flag)
            mu_f, sigma_f = self.get_expected_values(mu_f, sigma_f)

            pi_k = self.q_weights[k]
            i = 0
            j = 0
            mu_f = mu_f[j, :, 0]  # N x 1
            #sigma_f = tf.matrix_diag_part(sigma_f[j, :])
            sigma_f = sigma_f[j, :, :, 0]  # N x 1

            s = sigma_f[:, 0]
            s = tf.Print(s, [noise_sigma], 'noise_sigma: ')
            s = tf.Print(s, [tf.shape(sigma_f)], 'tf.shape(sigma_f): ')
            s += noise_sigma

            total_sum[i] += pi_k * s

        if full_var_flag:
            total_sum = total_sum[0]
        else:
            total_sum = tf.stack(total_sum, axis=1)

        total_sum = tf.Print(total_sum, [self.likelihood_weights[self.r]],
                             'self.likelihood_weights[self.r]: ')
        if full_var_flag:
            total_sum = tf.Print(total_sum, [tf.shape(total_sum)],
                                 'total_sum: ')
            return tf.expand_dims(tf.diag_part(total_sum), -1)

        return total_sum
예제 #53
0
def invert(settings,
           epoch,
           samples,
           g_tolerance=None,
           e_tolerance=0.1,
           n_iter=None,
           max_iter=10000,
           heuristic_sigma=None,
           C_samples=None):
    """
    Return the latent space points corresponding to a set of a samples
    ( from gradient descent )
    """
    # cast samples to float32
    samples = np.float32(samples[:, :, :])
    # get the model
    if type(settings) == str:
        settings = json.load(
            open('./experiments/settings/' + settings + '.txt', 'r'))
    num_samples = samples.shape[0]
    print(
        'Inverting',
        num_samples,
        'samples using model',
        settings['identifier'],
        'at epoch',
        epoch,
    )
    if not g_tolerance is None:
        print('until gradient norm is below', g_tolerance)
    else:
        print('until error is below', e_tolerance)
    # get parameters
    parameters = load_parameters(settings['identifier'] + '_' + str(epoch))
    # assertions
    assert samples.shape[2] == settings['num_generated_features']
    # create VARIABLE Z
    Z = tf.get_variable(
        name='Z',
        shape=[num_samples, settings['seq_length'], settings['latent_dim']],
        initializer=tf.random_normal_initializer())
    if C_samples is None:
        # create outputs
        G_samples = generator(Z,
                              settings['hidden_units_g'],
                              settings['seq_length'],
                              num_samples,
                              settings['num_generated_features'],
                              reuse=False,
                              parameters=parameters)
        fd = None
    else:
        CG = tf.placeholder(tf.float32, [num_samples, settings['cond_dim']])
        assert C_samples.shape[0] == samples.shape[0]
        # CGAN
        G_samples = generator(Z,
                              settings['hidden_units_g'],
                              settings['seq_length'],
                              num_samples,
                              settings['num_generated_features'],
                              reuse=False,
                              parameters=parameters,
                              cond_dim=settings['cond_dim'],
                              c=CG)
        fd = {CG: C_samples}

    # define loss
    if heuristic_sigma is None:
        heuristic_sigma = mmd.median_pairwise_distance(
            samples)  # this is noisy
        print('heuristic_sigma:', heuristic_sigma)
    Kxx, Kxy, Kyy, wts = mmd._mix_rbf_kernel(G_samples,
                                             samples,
                                             sigmas=tf.constant(
                                                 value=heuristic_sigma,
                                                 shape=(1, 1)))
    similarity_per_sample = tf.diag_part(Kxy)
    reconstruction_error_per_sample = 1 - similarity_per_sample
    #reconstruction_error_per_sample = tf.reduce_sum((tf.nn.l2_normalize(G_samples, dim=1) - tf.nn.l2_normalize(samples, dim=1))**2, axis=[1,2])
    similarity = tf.reduce_mean(similarity_per_sample)
    reconstruction_error = 1 - similarity
    # updater
    #    solver = tf.train.AdamOptimizer().minimize(reconstruction_error_per_sample, var_list=[Z])
    #solver = tf.train.RMSPropOptimizer(learning_rate=500).minimize(reconstruction_error, var_list=[Z])
    solver = tf.train.RMSPropOptimizer(learning_rate=0.1).minimize(
        reconstruction_error_per_sample, var_list=[Z])
    #solver = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=0.9).minimize(reconstruction_error_per_sample, var_list=[Z])

    grad_Z = tf.gradients(reconstruction_error_per_sample, Z)[0]
    grad_per_Z = tf.norm(grad_Z, axis=(1, 2))
    grad_norm = tf.reduce_mean(grad_per_Z)
    #solver = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(reconstruction_error, var_list=[Z])
    print('Finding latent state corresponding to samples...')
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        error = sess.run(reconstruction_error, feed_dict=fd)
        g_n = sess.run(grad_norm, feed_dict=fd)
        print(g_n)
        i = 0
        if not n_iter is None:
            while i < n_iter:
                _ = sess.run(solver, feed_dict=fd)
                error = sess.run(reconstruction_error, feed_dict=fd)
                i += 1
        else:
            if not g_tolerance is None:
                while g_n > g_tolerance:
                    _ = sess.run(solver, feed_dict=fd)
                    error, g_n = sess.run([reconstruction_error, grad_norm],
                                          feed_dict=fd)
                    i += 1
                    print(error, g_n)
                    if i > max_iter:
                        break
            else:
                while np.abs(error) > e_tolerance:
                    _ = sess.run(solver, feed_dict=fd)
                    error = sess.run(reconstruction_error, feed_dict=fd)
                    i += 1
                    print(error)
                    if i > max_iter:
                        break
        Zs = sess.run(Z, feed_dict=fd)
        error_per_sample = sess.run(reconstruction_error_per_sample,
                                    feed_dict=fd)
        print('Z found in', i, 'iterations with final reconstruction error of',
              error)
    tf.reset_default_graph()
    return Zs, error_per_sample, heuristic_sigma
예제 #54
0
    def define(self):
        self.abstract = tf.placeholder("float",
                                       [None, self.n_steps, self.n_input])
        self.x1_label = tf.placeholder("float",
                                       [None, self.n_steps, self.n_input])
        self.x2_label = tf.placeholder("float",
                                       [None, self.n_steps, self.n_input])
        self.x1_defn = tf.placeholder("float",
                                      [None, self.n_steps, self.n_input])
        self.x2_defn = tf.placeholder("float",
                                      [None, self.n_steps, self.n_input])
        self.x1_unit = tf.placeholder("float",
                                      [None, self.n_steps, self.n_input])
        self.x2_unit = tf.placeholder("float",
                                      [None, self.n_steps, self.n_input])
        self.y_mt = tf.placeholder("float", [None, self.n_class])
        self.y_ent = tf.placeholder("float", [None, self.n_class])
        self.y_char = tf.placeholder("float", [None, self.n_class])
        self.keep_prob = tf.placeholder(tf.float32)

        with tf.variable_scope("BiLSTM_Abstract"):
            self.rep_abstract, self.seq_len_abstract = self.bidirectional_rnn(
                self.abstract)

        with tf.variable_scope("BiLSTM_Label") as scope:
            self.rep_x1_label, self.seq_len_x1_label = self.bidirectional_rnn(
                self.x1_label)
            scope.reuse_variables()
            self.rep_x2_label, self.seq_len_x2_label = self.bidirectional_rnn(
                self.x2_label)

        with tf.variable_scope("BiLSTM_Defn") as scope:
            self.rep_x1_defn, self.seq_len_x1_defn = self.bidirectional_rnn(
                self.x1_defn)
            scope.reuse_variables()
            self.rep_x2_defn, self.seq_len_x2_defn = self.bidirectional_rnn(
                self.x2_defn)

        with tf.variable_scope("BiLSTM_Unit") as scope:
            self.rep_x1_unit, self.seq_len_x1_unit = self.bidirectional_rnn(
                self.x1_unit)
            scope.reuse_variables()
            self.rep_x2_unit, self.seq_len_x2_unit = self.bidirectional_rnn(
                self.x2_unit)

        self.w_sim = tf.get_variable(
            "w_sim",
            shape=[self.n_hidden, self.n_hidden],
            initializer=tf.contrib.layers.xavier_initializer())

        self.sim_score_label = tf.diag_part(
            tf.matmul(tf.matmul(self.rep_x1_label, self.w_sim),
                      tf.transpose(self.rep_x2_label)))
        self.sim_score_label = tf.expand_dims(self.sim_score_label, 1)

        self.sim_score_defn = tf.diag_part(
            tf.matmul(tf.matmul(self.rep_x1_defn, self.w_sim),
                      tf.transpose(self.rep_x2_defn)))
        self.sim_score_defn = tf.expand_dims(self.sim_score_defn, 1)

        self.sim_score_unit = tf.diag_part(
            tf.matmul(tf.matmul(self.rep_x1_unit, self.w_sim),
                      tf.transpose(self.rep_x2_unit)))
        self.sim_score_unit = tf.expand_dims(self.sim_score_unit, 1)

        self.joined_vec = tf.concat([
            self.rep_abstract, self.rep_x1_label, self.rep_x1_defn,
            self.rep_x1_unit, self.sim_score_label, self.sim_score_defn,
            self.sim_score_unit, self.rep_x2_label, self.rep_x2_defn,
            self.rep_x2_unit
        ], 1)

        self.w_out_mt = tf.get_variable(
            "w_out_mt",
            shape=[7 * self.n_hidden + 3, self.n_class],
            initializer=tf.contrib.layers.xavier_initializer())
        self.b_out_mt = tf.get_variable(
            "b_out_mt", [self.n_class],
            initializer=tf.constant_initializer(0.0))

        self.w_out_ent = tf.get_variable(
            "w_out_ent",
            shape=[7 * self.n_hidden + 3, self.n_class],
            initializer=tf.contrib.layers.xavier_initializer())
        self.b_out_ent = tf.get_variable(
            "b_out_ent", [self.n_class],
            initializer=tf.constant_initializer(0.0))

        self.w_out_char = tf.get_variable(
            "w_out_char",
            shape=[7 * self.n_hidden + 3, self.n_class],
            initializer=tf.contrib.layers.xavier_initializer())
        self.b_out_char = tf.get_variable(
            "b_out_char", [self.n_class],
            initializer=tf.constant_initializer(0.0))

        self.pred_mt = tf.matmul(
            tf.nn.dropout(self.joined_vec, self.keep_prob),
            self.w_out_mt) + self.b_out_mt
        self.pred_softmax_mt = tf.nn.softmax(self.pred_mt)

        self.pred_ent = tf.matmul(
            tf.nn.dropout(self.joined_vec, self.keep_prob),
            self.w_out_ent) + self.b_out_ent
        self.pred_softmax_ent = tf.nn.softmax(self.pred_ent)

        self.pred_char = tf.matmul(
            tf.nn.dropout(self.joined_vec, self.keep_prob),
            self.w_out_char) + self.b_out_char
        self.pred_softmax_char = tf.nn.softmax(self.pred_char)

        self.loss_orig = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.pred_mt, labels=self.y_mt)) \
                         + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.pred_ent, labels=self.y_ent)) \
                         + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.pred_char, labels=self.y_char))

        l2_loss = tf.nn.l2_loss(self.w_out_mt) + tf.nn.l2_loss(self.b_out_mt) \
                  + tf.nn.l2_loss(self.w_out_ent) + tf.nn.l2_loss(self.b_out_ent) \
                  + tf.nn.l2_loss(self.w_out_char) + tf.nn.l2_loss(self.b_out_char)
        l2_reg_lambda = 0.5
        self.loss = self.loss_orig + l2_reg_lambda * l2_loss
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.loss)

        self.init = tf.global_variables_initializer()
        self.saver = tf.train.Saver()
예제 #55
0
def getLogDet(M):
    return 2.0 * tf.reduce_sum(tf.log(tf.diag_part(tf.cholesky(M))), 0)
예제 #56
0
def mmd(data, gen, sigma=1., is_tf=False, weights=None):
    """Computes MMD between NumPy arrays.

    The smaller the value, the closer the sets.
    
    Args:
      data: ND NumPy array of any length, e.g. (1000, 2).
      gen: ND NumPy array of any length, e.g. (10, 2).
      sigma: Float, kernel bandwidth.
      is_tf: Boolean. Selects for TensorFlow functions.
      weights: (M,1) NumPy array with random weight for each data point.
    
    Returns:
      mmd: Scalar, the MMD between the sets.
      gradients_mmd: NumPy array of MMD gradients for each generated point.
    """

    #print('  [*] Analytical gradients not yet implemented for MMD.')

    x = data
    y = gen

    # ------------- TensorFlow VERSION -------------

    if is_tf:
        dim = tf.shape(x)[1]
        data_num = tf.shape(x)[0]
        gen_num = tf.shape(y)[0]

        v = tf.concat([x, y], 0)
        VVT = tf.matmul(v, tf.transpose(v))
        v_sq = tf.reshape(tf.diag_part(VVT), [-1, 1])

        #v_sq_tiled = tf.tile(v_sq, [1, v_sq.get_shape().as_list()[0]])
        #v_sq_tiled_T = tf.transpose(v_sq_tiled)
        v_sq_tiled = tf.tile(v_sq, [1, data_num + gen_num])
        v_sq_tiled_T = tf.transpose(v_sq_tiled)

        #v_sq_tiled = tf.tile(tf.expand_dims(v_sq, 1), [1, tf.shape(v_sq)[0], 1])
        #v_sq_tiled_T = tf.transpose(v_sq_tiled, [1, 0, 2])

        # Build kernel matrix, and optionally multiple by data weights.
        exp_object = v_sq_tiled - 2 * VVT + v_sq_tiled_T

        gamma = 1.0 / (2.0 * sigma**2)
        K = tf.exp(-gamma * exp_object)
        if weights is not None:
            weights = tf.constant(weights)
            p1_gen_num_weights = tf.tile(weights, (1, gen_num))
            K_xy = K[:data_num, data_num:] * p1_gen_num_weights
        else:
            K_xy = K[:data_num, data_num:]
        K_xx = K[:data_num, :data_num]
        K_yy = K[data_num:, data_num:]

        m = tf.cast(data_num, tf.float32)
        n = tf.cast(gen_num, tf.float32)

        mmd = (1. / m / m * tf.reduce_sum(K_xx) +
               1. / n / n * tf.reduce_sum(K_yy) -
               2. / m / n * tf.reduce_sum(K_xy))

        # TODO: MMD gradients.
        gradients_mmd = None

        return mmd, gradients_mmd

    # ------------- NumPy VERSION -------------

    elif not is_tf:
        data_num = len(x)
        gen_num = len(y)

        if len(x.shape) == 1:
            x = np.reshape(x, [-1, 1])
            y = np.reshape(y, [-1, 1])
        v = np.concatenate((x, y), 0)
        VVT = np.matmul(v, np.transpose(v))
        sqs = np.reshape(np.diag(VVT), [-1, 1])
        sqs_tiled_horiz = np.tile(sqs, np.transpose(sqs).shape)

        # Build kernel matrix, and optionally multiple by data weights.
        exp_object = sqs_tiled_horiz - 2 * VVT + np.transpose(sqs_tiled_horiz)
        gamma = 1.0 / (2.0 * sigma**2)
        K = np.exp(-gamma * exp_object)
        if weights is not None:
            p1_gen_num_weights = np.tile(weights, (1, gen_num))
            K_xy = K[:data_num, data_num:] * p1_gen_num_weights
        else:
            K_xy = K[:data_num, data_num:]
        K_xx = K[:data_num, :data_num]
        K_yy = K[data_num:, data_num:]

        mmd = (1. / data_num / data_num * np.sum(K_xx) +
               1. / gen_num / gen_num * np.sum(K_yy) -
               2. / data_num / gen_num * np.sum(K_xy))

        # TODO: MMD gradients.
        gradients_mmd = None

        return mmd, gradients_mmd
예제 #57
0
파일: loss_func.py 프로젝트: akshaym96/NLP
def nce_loss(inputs, weights, biases, labels, sample, unigram_prob):
    """
    ==========================================================================

    inputs: Embeddings for context words. Dimension is [batch_size, embedding_size].
    weigths: Weights for nce loss. Dimension is [Vocabulary, embeeding_size].
    biases: Biases for nce loss. Dimension is [Vocabulary, 1].
    labels: Word_ids for predicting words. Dimesion is [batch_size, 1].
    samples: Word_ids for negative samples. Dimension is [num_sampled].
    unigram_prob: Unigram probability. Dimesion is [Vocabulary].

    Implement Noise Contrastive Estimation Loss Here

    ==========================================================================
    """
    ###########################################################################33
    K = len(sample)
    batch_size = inputs.get_shape().as_list()[0]
    embedding_size = inputs.get_shape().as_list()[1]
    sample_size = len(sample)
    delta = tf.exp(-10.0)

    # Lookup for fetching the embeddings for the labels
    label_embedding = tf.reshape(
        tf.nn.embedding_lookup(weights, labels, name="labels_embedding"),
        [batch_size, embedding_size])

    # Lookup for fetching the embeddings for the samples
    sample_embedding = tf.reshape(
        tf.nn.embedding_lookup(weights, sample, name="sample_embedding"),
        [sample_size, embedding_size])

    # Lookup for fetching the bias for the samples
    sample_bias = tf.reshape(
        tf.nn.embedding_lookup(biases, sample, name="sample_bias"),
        [sample_size, 1])

    unigram_prob = tf.reshape(unigram_prob,
                              [weights.get_shape().as_list()[0], 1])

    # Lookup for fetching the unigram probabilities for the sample
    sample_prob = tf.reshape(
        tf.nn.embedding_lookup(unigram_prob, sample, name="unigram_sample"),
        [sample_size, 1])

    # Matrix multiplication for samples and inputs {sample*batch_size}
    sample_matmul = tf.matmul(sample_embedding, inputs, transpose_b=True)

    # Replicating the sample bias for easy addition
    sample_bias_multiple = tf.tile(sample_bias, [1, batch_size])

    s_wxwc = tf.add(sample_matmul, sample_bias_multiple)

    # Replicating the probabilities for samples for easy arithematic
    sample_prob_multiple = tf.tile(sample_prob, [1, batch_size])

    k_sample_prob_multiple = tf.scalar_mul(K, sample_prob_multiple)

    log_k_sample = tf.log(k_sample_prob_multiple + delta)

    sub_swxwc_logk_sample = tf.subtract(s_wxwc,
                                        log_k_sample,
                                        name="Inner-sigmoid-B")

    sigmoid_wxwc = tf.sigmoid(sub_swxwc_logk_sample, name="sigmoid-B")

    log_red_sum_sample = tf.log(1 - sigmoid_wxwc + delta)

    red_sum_sample = tf.reduce_sum(log_red_sum_sample, [0])
    #######################################################################################
    # Lookup for fetching the biases for the labels
    label_bias = tf.reshape(
        tf.nn.embedding_lookup(biases, labels, name="label_bias"),
        [batch_size, 1])

    # Lookup for fetching the unigram probabilities for the labels
    label_prob = tf.reshape(
        tf.nn.embedding_lookup(unigram_prob, labels, name="unigram_sample"),
        [batch_size, 1])

    # Matrix multiplication and taking the diagonal elements
    label_matmul = tf.reshape(
        tf.diag_part(tf.matmul(label_embedding, inputs, transpose_b=True)),
        [batch_size, 1])

    s_wowc = tf.add(label_matmul, label_bias)

    k_label_prob_multiple = tf.scalar_mul(K, label_prob)

    log_k_label = tf.log(k_label_prob_multiple)

    sub_swowc_logk_label = tf.subtract(s_wowc,
                                       log_k_label,
                                       name="Inner-sigmoid-B")

    sigmoid_wowc = tf.sigmoid(sub_swowc_logk_label, name="sigmoid-B")

    log_red_sum_label = tf.log(sigmoid_wowc + delta)

    final_sum = tf.add(red_sum_sample, log_red_sum_label)

    return tf.negative(final_sum)
예제 #58
0
with tf.variable_scope('D_loss'):
    label = tf.concat([y,tf.zeros([batch_size,1])],axis=1)
    d_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
        logits=D_logits,labels=label))

with tf.variable_scope('accuracy'):
    correct_prediction = tf.equal(tf.argmax(D[:,:-1],1), tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

with tf.name_scope('gradients'):
    grad_loss_over_X = tf.gradients(d_loss, X)[0]

    grad_features_over_X = tf.gradients(
        tf.reduce_mean(tf.diag_part(flat_features[0:64,0:64])),X)[0]
    grad_logit_over_X = tf.gradients(
        tf.reduce_mean(tf.diag_part(D_logits[0:10,0:10])),X)[0]

dvar = tf.global_variables()
saver = tf.train.Saver(dvar)

sess = tf.InteractiveSession()

init = tf.global_variables_initializer()
sess.run(init)

#saver.restore(sess,tf.train.latest_checkpoint('GAN/discriminator/'))
saver.restore(sess,tf.train.latest_checkpoint('discriminator_no_GAN/'))

coord = tf.train.Coordinator()
예제 #59
0
    def build_model(self,
                    video,
                    video_mask,
                    caption,
                    caption_mask,
                    train_flag,
                    reuse_variable=False):

        self.video = video  # [batch_size, length, kernel, kernel, channel]
        self.video_mask = video_mask  # [batch_size, length]
        video_mask_leng = tf.cast(tf.reduce_sum(self.video_mask,1),tf.int32)
        self.caption = caption  # [batch_size, length]
        self.caption_mask = caption_mask  # [batch_size, length]
        caption_mask_leng = tf.cast(tf.reduce_sum(self.caption_mask,1),tf.int32)

        #Make Mask list
        self.video_mask_list = []
        self.caption_mask_list = []
        max_len = self.config.caption_length
        for mi in range(2):
            video_mask_leng = tf.maximum(1, video_mask_leng-2)
            caption_mask_leng = tf.maximum(1, caption_mask_leng-2)
            max_len -= 2
            self.video_mask_list.append(tf.reverse(tf.sequence_mask(video_mask_leng,max_len,tf.float32),[-1]))
            self.caption_mask_list.append(tf.sequence_mask(caption_mask_leng,max_len,tf.float32))
        max_len = int((max_len-1)/2)
        video_mask_leng = tf.cast((video_mask_leng-1)/2,tf.int32)
        video_mask_leng = tf.maximum(1, video_mask_leng)
        caption_mask_leng = tf.cast((caption_mask_leng-1)/2,tf.int32)
        caption_mask_leng = tf.maximum(1, caption_mask_leng)
        self.video_mask_list.append(tf.reverse(tf.sequence_mask(video_mask_leng,max_len,tf.float32),[-1]))
        self.caption_mask_list.append(tf.sequence_mask(caption_mask_leng,max_len,tf.float32))

        self.train_flag = train_flag

        #Batch normalization
        self.bn_fn = slim.batch_norm
        self.bn_params = {'is_training':self.train_flag}
        

        self.word_embed_t = tf.Variable(self.word_embed, dtype=tf.float32, name="word_embed", trainable=True)
        #video drop
        self.squeezed_feat = tf.squeeze(self.video)
        self.embedded_feat = tf.reshape(self.squeezed_feat, [self.batch_size,
                                                             self.video_steps,
                                                             self.channel_size])

        #  [batch_size, length, channel_size]
        self.embedded_feat = self.embedded_feat * tf.expand_dims(video_mask, 2)

        self.video_cell_d = lambda: rnn_cell.DropoutWrapper(
                self.video_cell(),
                input_keep_prob = self.dropout_keep_prob,
                output_keep_prob = self.dropout_keep_prob)
        self.caption_cell_d = lambda: rnn_cell.DropoutWrapper(
                self.caption_cell(),
                input_keep_prob = self.dropout_keep_prob,
                output_keep_prob = self.dropout_keep_prob)

        video_cell1 = rnn_cell.MultiRNNCell([self.video_cell_d() for _ in range(self.config.num_layers)],
                                                state_is_tuple=True)
        video_cell2 = rnn_cell.MultiRNNCell([self.video_cell_d() for _ in range(self.config.num_layers)],
                                                state_is_tuple=True)
        video_cell = [video_cell1, video_cell2]

        caption_cell1 = rnn_cell.MultiRNNCell([self.caption_cell_d() for _ in range(self.config.num_layers)],
                                                state_is_tuple=True)
        caption_cell2 = rnn_cell.MultiRNNCell([self.caption_cell_d() for _ in range(self.config.num_layers)],
                                                state_is_tuple=True)
        caption_cell = [caption_cell1, caption_cell2]

        video_emb_state = self.build_video_embedding(video_cell,
                                                     self.embedded_feat, self.video_mask, reuse_variable)
        rnn_emb_state = self.build_caption_encoder(caption_cell, reuse_variable)

        with tf.variable_scope("multimodal", initializer=self.initializer) as scope:
            margin_list = []
            logit_list = []
            for i in range(self.batch_size):
                if i > 0:
                    scope.reuse_variables()
                fuse = self.fusion(tf.tile(tf.expand_dims(video_emb_state[i,:,:],0),[self.batch_size,1,1]) , rnn_emb_state, i, reuse=(i>0))
                with slim.arg_scope([slim.fully_connected],
                                    weights_regularizer=slim.l2_regularizer(0.0005),
                                    normalizer_fn=self.bn_fn,
                                    normalizer_params=self.bn_params):
                    logit = slim.fully_connected(fuse, 256, activation_fn=tf.nn.leaky_relu, scope='fc1',reuse=(i>0))
                    logit = slim.fully_connected(logit, 256, activation_fn=tf.nn.leaky_relu, scope='fc2',reuse=(i>0))
                    logit = slim.fully_connected(logit, 128, activation_fn=tf.nn.leaky_relu, scope='fc3',reuse=(i>0))
                    logit = slim.fully_connected(logit, 1, activation_fn=None, scope='scorefn', reuse=(i>0))
                score = logit

                logit_list.append(score)
                margin_list.append(score)

        margin_mat = tf.squeeze(tf.stack(margin_list))
        logit_mat = tf.squeeze(tf.stack(logit_list))
        self.logit = logit_mat
        diag_elem = tf.diag_part(margin_mat)
        loss_mat = tf.maximum(0.0, 10. + margin_mat - tf.reshape(diag_elem, [-1,1]))
        margin_loss = tf.reduce_sum(loss_mat) / (self.batch_size*self.batch_size)
        self.scores = margin_mat
        self.mean_loss = margin_loss
        self.concept_loss = tf.constant(0)
예제 #60
0
    def __init__(self,
                 input_means,
                 input_vars,
                 n_points,
                 n_inducing_points,
                 set_for_training,
                 initial = None):
        BaseNode.__init__(self, input_means, input_vars)
        self.input_means = input_means
        self.input_vars = input_vars
        self.n_inducing_points = n_inducing_points
        self.input_d = input_means.get_shape().as_list()[1]
        self.batch_size = tf.shape(input_means)[0]
        self.n_points = n_points
        self.set_for_training = set_for_training

        # Covariance parameters of the cavities 
        self.LParamPost = tf.Variable(
          tf.random_normal(((self.n_inducing_points, self.n_inducing_points))))
        # Mean parameters of the cavities
        self.mParamPost = tf.Variable(
            tf.random_normal((self.n_inducing_points, 1)))
        self.lls = tf.Variable(tf.zeros([1, self.input_d], dtype=tf.float32))
        self.lsf = tf.Variable(0.0, dtype=tf.float32)
        if (initial is None):
            self.z = tf.Variable(
                tf.random_uniform([self.n_inducing_points, self.input_d], -1, 1))
        else:
            self.z = tf.Variable(initial, dtype=tf.float32)
        jitter = tf.cast(1e-3, tf.float32)

        # Below is based on the equations from page 8
        # Expectation of Kxz w.r.t the input
        EKxz = SE.get_psi1(self.lls,    
                           self.lsf,
                           self.input_means,
                           self.input_vars,
                           self.z)
        Kzz = SE.get_kernel(self.lls, self.lsf, self.z, self.z)
        Kzz += tf.eye(self.n_inducing_points) * jitter * tf.exp(self.lsf)
        KzzInv = getInversePSD(Kzz)
        Lu = tf.matrix_band_part(self.LParamPost, 0, -1)
        LParamPost_tri = Lu + tf.diag(tf.exp(tf.diag_part(self.LParamPost)) \
                                      - tf.diag_part(self.LParamPost))
        LtL = tf.matmul(tf.transpose(LParamPost_tri), LParamPost_tri)
        scalar = (self.n_points - self.set_for_training) / self.n_points
        covCavityInv = KzzInv + LtL * scalar

        covCavity = getInversePSD(covCavityInv)
        meanCavity = tf.matmul(covCavity, scalar * self.mParamPost)
        KzzInvcovCavity = tf.matmul(KzzInv, covCavity)
        KzzInvmeanCavity = tf.matmul(KzzInv, meanCavity)
        self.output_means = tf.matmul(EKxz, KzzInvmeanCavity)

        Kxz = SE.get_kernel(self.lls, self.lsf, self.input_means, self.z)
        B1 = tf.matmul(KzzInvcovCavity, KzzInv) - KzzInv 
        v_out = tf.exp(self.lsf) + tf.reduce_sum(Kxz * tf.matmul(Kxz, B1),
                                                 1,
                                                 keep_dims = True)
        B2 = tf.matmul(KzzInvmeanCavity, tf.transpose(KzzInvmeanCavity))

        # Below is based on the equation (35)
        # L is the expectation of Kzz
        # B1 is Kinv
        # B2 is betabetaT
        L = SE.get_L(self.lls,    
                     self.lsf,
                     self.z,
                     self.input_means,
                     self.input_vars)
        k = tf.expand_dims(Kxz, 2)
        kT = tf.expand_dims(Kxz, 1)
        kkT = tf.matmul(k, kT)
        l = tf.expand_dims(EKxz, 2)
        lT = tf.expand_dims(EKxz, 1)
        llT = tf.matmul(l, lT)
        L_kk = L - kkT 
        L_ll = L - llT
        # Calculating the traces for the two terms
        v1 = tf.reduce_sum(tf.expand_dims(B2, 0) \
                           * tf.transpose(L_ll, [0, 2, 1]), [1, 2])
        v2 = tf.reduce_sum(tf.expand_dims(B1, 0) \
                           * tf.transpose(L_kk, [0, 2, 1]), [1, 2])
        v1 = tf.abs(tf.expand_dims(v1, 1))
        v2 = tf.abs(tf.expand_dims(v2, 1))
        
        self.output_vars = v_out + v2 + v1

        # Finally calculate the energy (page 9)
        logZpost = self.getLogNormalizerPosterior(KzzInv, LtL)
        logZprior = self.getLogNormalizerPrior(KzzInv)
        logZcav = self.getLogNormalizerCavity(meanCavity,
                                              covCavity,
                                              covCavityInv)

        # We multiply by the minibatch size and normalize terms
        # according to the total number of points (n_points)
        self.v1 = v1
        self.v2 = v2
        self.vout = v_out
        self.energy = (logZcav - logZpost) * self.n_points + logZpost \
            - logZprior