Exemple #1
0
    def predict_u(self,X_star):
        X_u = self.X_u
        y_u = self.y_u
        
        X_f = self.X_f
        y_f = self.y_f
        
        y = np.vstack((y_u, y_f))

        L = self.L
                
        theta = self.hyp[:-1]
        
        K_uu = self.k_uu(X_star, X_u[0:1,:], theta)
        K_uu1 = self.k_uu1(X_star, X_u[1:2,:], theta)
        K_uu2 = self.k_uu2(X_star, X_u[2:3,:], theta)
        K_uu3 = self.k_uu3(X_star, X_u[3:4,:], theta)
        K_uf = self.k_uf(X_star, X_f, theta)
        psi = np.hstack((K_uu, K_uu1, K_uu2, K_uu3, K_uf))
        
        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y))
        pred_u_star = np.matmul(psi,alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T))
        var_u_star = self.k_uu(X_star, X_star, theta) - np.matmul(psi,beta)
        
        return pred_u_star, var_u_star
    def calculate_Fi_ci_si(self):
        ''' Calculate simple calculation of Fi, ci, si: does not include any CPV effects, not time dependece '''
        bin_num = self.binning.get_number_of_bins()
        Fi = np.array([])
        ci = np.array([])
        si = np.array([])

        A_mag = abs(self.amplitude.get_A(
            0))  # Just make simple calculation in this class
        A_ph = np.angle(self.amplitude.get_A(0))
        A_mag_inv = np.transpose(A_mag)
        A_ph_inv = np.transpose(A_ph)

        avg_eff_over_phsp = self.efficiency.get_time_averaged_eff()
        for i in range(-bin_num, bin_num + 1):
            if i == 0: continue
            bin_idx = self.binning.get_bin_indices(i)
            inv_bin_idx = self.binning.get_bin_indices(-i)
            avg_eff = avg_eff_over_phsp[bin_idx]
            Fi = np.append(Fi, np.sum(avg_eff * A_mag[bin_idx]**2))
            ci = np.append(
                ci,
                np.sum(avg_eff * A_mag[bin_idx] * A_mag_inv[bin_idx] *
                       np.cos(A_ph[bin_idx] - A_ph_inv[bin_idx])))
            si = np.append(
                si,
                np.sum(avg_eff * A_mag[bin_idx] * A_mag_inv[bin_idx] *
                       np.sin(A_ph[bin_idx] - A_ph_inv[bin_idx])))

        Fi_inv = np.flip(Fi, 0)
        ci = ci / np.sqrt(Fi * np.flip(Fi, 0))
        si = si / np.sqrt(Fi * np.flip(Fi, 0))
        Fi = Fi / sum(Fi)

        return Fi, ci, si
Exemple #3
0
    def predict_f(self,X_star):
        X_u = self.X_u
        y_u = self.y_u
        
        X_f = self.X_f
        y_f = self.y_f
        
        y = np.vstack((y_u, y_f))

        L = self.L
                
        theta = self.hyp[:-1]
        
        K_uf = self.k_uf(X_u[0:1,:], X_star, theta)
        K_u1f = self.k_u1f(X_u[1:2,:], X_star, theta)
        K_u2f = self.k_u2f(X_u[2:3,:], X_star, theta)
        K_u3f = self.k_u3f(X_u[3:4,:], X_star, theta)
        K_ff = self.k_ff(X_star, X_f, theta)
        psi = np.hstack((K_uf.T, K_u1f.T, K_u2f.T, K_u3f.T, K_ff))
        
        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y))
        pred_u_star = np.matmul(psi,alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T))
        var_u_star = self.k_ff(X_star, X_star, theta) - np.matmul(psi,beta)
        
        return pred_u_star, var_u_star
    def likelihood(self, hyp):
        X_L = self.X_L
        y_L = self.y_L
        X_H = self.X_H
        y_H = self.y_H

        y = np.vstack((y_L,y_H))
        
        NL = y_L.shape[0]
        NH = y_H.shape[0]
        N = y.shape[0]
        
        rho = hyp[-3]
        logsigma_n_L = hyp[-2]
        logsigma_n_H = hyp[-1]        
        sigma_n_L = np.exp(logsigma_n_L)
        sigma_n_H = np.exp(logsigma_n_H)
        
        theta_L = hyp[self.idx_theta_L]
        theta_H = hyp[self.idx_theta_H]
        
        K_LL = self.kernel(X_L, X_L, theta_L) + np.eye(NL)*sigma_n_L
        K_LH = rho*self.kernel(X_L, X_H, theta_L)
        K_HH = rho**2 * self.kernel(X_H, X_H, theta_L) + \
                        self.kernel(X_H, X_H, theta_H) + np.eye(NH)*sigma_n_H
        K = np.vstack((np.hstack((K_LL,K_LH)),
                       np.hstack((K_LH.T,K_HH))))
        L = np.linalg.cholesky(K + np.eye(N)*self.jitter) 
        self.L = L
        
        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y))    
        NLML = 0.5*np.matmul(np.transpose(y),alpha) + \
               np.sum(np.log(np.diag(L))) + 0.5*np.log(2.*np.pi)*N  
        return NLML[0,0]
Exemple #5
0
def partial_derivatives(x, y, W, V, b, c):
	# Filling in some dummy values
	# THIS IS WHERE YOU WILL WRITE YOUR PARTIAL DERIVATIVES
	s = b + W @ x
	h = np.tanh(s)
	f = c + V @ h
	eHat = np.zeros(c.shape)
	eHat[y] = 1

	# dLdf: -e + g(f(x))
	dLdf = -eHat + np.exp(f) / np.sum(np.exp(f))

	# dLdc = dL/df * df/dc
	dLdc = -eHat + (np.exp(f) / np.sum(np.exp(f)))

	# dLdV = dL/df * htranspose
	h_transpose = np.transpose(h)
	dLdV = dLdf * h_transpose

	# dLdb = sig'(b + Wx) elementwise mult (Vtranspose * dLdf)
	sigp = lambda x : 1 - np.tanh(x)**2
	V_transpose = np.transpose(V)
	dLh = V_transpose @ dLdf
	dLdb = sigp(s) * dLh

	# dLdW = dL/df * df/dW
	x_transpose = np.transpose(x)
	dLdW = dLdb * x_transpose

	return dLdW, dLdV, dLdb, dLdc
Exemple #6
0
def log_py_zM_bin_j(lambda_bin_j, y_bin_j, zM, k, nj_bin_j): 
    ''' Compute log p(y_j | zM, s1 = k1) of the jth
    
    lambda_bin_j ( (r + 1) 1darray): Coefficients of the binomial distributions in the GLLVM layer
    y_bin_j (numobs 1darray): The subset containing only the binary/count variables in the dataset
    zM (M x r x k ndarray): M Monte Carlo copies of z for each component k1 of the mixture
    k (int): The number of components of the mixture
    nj_bin_j (int): The number of possible values/maximum values of the jth binary/count variable
    --------------------------------------------------------------
    returns (ndarray): p(y_j | zM, s1 = k1)
    '''
    M = zM.shape[0]
    r = zM.shape[1]
    numobs = len(y_bin_j)
    
    yg = np.repeat(y_bin_j[np.newaxis], axis = 0, repeats = M)
    yg = yg.astype(np.float)

    nj_bin_j = np.float(nj_bin_j)

    coeff_binom = binom(nj_bin_j, yg).reshape(M, 1, numobs)
    
    eta = np.transpose(zM, (0, 2, 1)) @ lambda_bin_j[1:].reshape(1, r, 1)
    eta = eta + lambda_bin_j[0].reshape(1, 1, 1) # Add the constant
    
    den = nj_bin_j * log_1plusexp(eta)
    num = eta @ y_bin_j[np.newaxis, np.newaxis]  
    log_p_y_z = num - den + np.log(coeff_binom)
    
    return np.transpose(log_p_y_z, (0, 2, 1)).astype(np.float)
Exemple #7
0
 def vjp(g):
     vjps = []
     q_vjp = solve_sylvester(anp.transpose(a), anp.transpose(b), g)
     if 0 in argnums: vjps.append(-anp.dot(q_vjp, anp.transpose(ans)))
     if 1 in argnums: vjps.append(-anp.dot(anp.transpose(ans), q_vjp))
     if 2 in argnums: vjps.append(q_vjp)
     return tuple(vjps)
    def predict(self,X_star):
        X_L = self.X_L
        y_L = self.y_L
        X_H = self.X_H
        y_H = self.y_H
        L = self.L
        
        y = np.vstack((y_L,y_H))
        
        rho = self.hyp[-3]
        theta_L = self.hyp[self.idx_theta_L]
        theta_H = self.hyp[self.idx_theta_H]
                               
        psi1 = rho*self.kernel(X_star, X_L, theta_L)
        psi2 = rho**2 * self.kernel(X_star, X_H, theta_L) + \
                        self.kernel(X_star, X_H, theta_H)
        psi = np.hstack((psi1,psi2))

        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y))
        pred_u_star = np.matmul(psi,alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T))
        var_u_star = rho**2 * self.kernel(X_star, X_star, theta_L) + \
                     self.kernel(X_star, X_star, theta_H) - np.matmul(psi,beta)
        
        return pred_u_star, var_u_star
Exemple #9
0
    def predict_u(self,X_star):
        X_u = self.X_u
        y_u = self.y_u
        
        X_f = self.X_f
        y_f = self.y_f
        
        y = np.vstack((y_u, y_f))

        L = self.L
                
        theta = self.hyp[:-1]
        
        K_uu = self.k_uu(X_star, X_u, theta)
        K_uf = self.k_uf(X_star, X_f, theta)
        psi = np.hstack((K_uu, K_uf))
        
        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y))
        pred_u_star = np.matmul(psi,alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T))
        var_u_star = self.k_uu(X_star, X_star, theta) - np.matmul(psi,beta)
        
        if isinstance(pred_u_star, np.ndarray) == False:
            pred_u_star = pred_u_star._value
        if isinstance(var_u_star, np.ndarray) == False:
            var_u_star = var_u_star._value
        
        return pred_u_star, var_u_star
Exemple #10
0
    def likelihood(self, hyp):
        X_u = self.X_u
        y_u = self.y_u
        
        X_f = self.X_f
        y_f = self.y_f
        
        y = np.vstack((y_u, y_f))

        N = y.shape[0]
        N_f = y_f.shape[0]
        
        theta = hyp[:-1]
        sigma_n = np.exp(hyp[-1])
               
        K_uu = self.k_uu(X_u, X_u, theta)
        K_uf = self.k_uf(X_u, X_f, theta)
    
        K_ff = self.k_ff(X_f, X_f, theta) + np.eye(N_f)*sigma_n
        
        K = np.vstack((np.hstack((K_uu, K_uf)),
                       np.hstack((K_uf.T, K_ff))))
        
        L = np.linalg.cholesky(K + np.eye(N)*self.jitter) 
        self.L = L
        
        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y))    
        NLML = 0.5*np.matmul(np.transpose(y),alpha) + \
               np.sum(np.log(np.diag(L))) + 0.5*np.log(2.*np.pi)*N  
        return NLML[0,0]
def partial_derivatives(x, y, W, V, b, c):
    # Filling in some dummy values
    # THIS IS WHERE YOU WILL WRITE YOUR PARTIAL DERIVATIVES
    s = b + W @ x
    h = np.tanh(s)
    f = c + np.matmul(V, h)

    # dLdf: -e + g(f(x))
    dLdf = -f * y + np.exp(f) / np.sum(np.exp(f))

    # dLdc = dL/df * df/dc
    dLdc = dLdf * np.exp(f) / np.sum(np.exp(c))

    # dLdV = dL/df * htranspose
    h_transpose = np.transpose(h)
    dLdV = dLdf * h_transpose

    # dLdb = sig'(b + Wx) @ (Vtranspose * dLdf)
    sigp = lambda x: 1 - np.tanh(x)**2
    V_transpose = np.transpose(V)
    dLh = np.matmul(V_transpose, dLdf)
    dLdb = sigp(s) * dLh

    # dLdW = dL/df * df/dW
    x_transpose = np.transpose(x)
    sigp_of_s = sigp(s)
    dLdW = sigp_of_s * np.multiply(dLh, x_transpose)

    return dLdW, dLdV, dLdb, dLdc
Exemple #12
0
    def predict(self, X_star):
        # Normalize data
        X_star = (X_star - self.Xmean) / self.Xstd

        X = self.X
        y = self.y

        L = self.L

        theta = self.hyp[:-1]

        psi = self.kernel(X_star, X, theta)

        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y))
        pred_u_star = np.matmul(psi, alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T))
        var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi, beta)

        # De-normalize
        pred_u_star = pred_u_star * self.Ystd + self.Ymean
        var_u_star = var_u_star * self.Ystd**2

        if isinstance(pred_u_star, np.ndarray) == False:
            pred_u_star = pred_u_star._value
        if isinstance(var_u_star, np.ndarray) == False:
            var_u_star = var_u_star._value

        return pred_u_star, var_u_star
Exemple #13
0
def Local2Global_Coord(rot_mat, trans_vector, points_in_local):
    '''
    function Local2Global_Coord(rot_mat, trans_vector, points_in_local)

    - Takes "rotation matrix", whereby the columns form an orthonormal basis. The "rotation matrix" should describe the axes of the new coordinate system in terms of the global coordinate system. The matrix should be 3x3 and be invertible.
    [ e_1  e_2  e_3 ]

    - Takes translation vector of size 3, which describes translation from global origin to the new local origin (global origin ----> local origin).

    - Takes points defined in the local coordinate frame.

    - Returns positions (which were originally defined in the local coordinate frame) in the global coordinate frame.
    '''
    if rot_mat.shape[0] != rot_mat.shape[1]:
        raise ValueError('Rotation Matrix should be square')
    elif trans_vector.shape != (3, ) and trans_vector.shape != (1, 3):
        raise ValueError(
            'Translation Matrix should be an array of size 3 or 1x3 matrix')

    rotated_points = np.transpose(
        np.matmul(rot_mat, np.transpose(points_in_local)))

    points_in_global = rotated_points + trans_vector

    return points_in_global
Exemple #14
0
def debug_scp_iteration_plot( tx_next, u_next, xbar, ubar, x0, T, i_iter):

	unl = u_next
	x_curr = x0
	
	Xnl = []
	Vnl_nlx = []
	Vnl_lx = []
	tV_nlx = []
	tV_lx = []

	for k,t in enumerate(T):
		x_next = x_curr + dynamics.get_dxdt( x_curr, unl[:,k], t) * param.get('dt')
		R_k, w_k = dynamics.get_linear_lyapunov( xbar[:,k], ubar[:,k], t)
		Vnl_nlx.append( dynamics.get_V( x_curr, t))
		Vnl_lx.append( dynamics.get_V( tx_next[:,k],t))
		tV_nlx.append( np.matmul( R_k, x_curr) + w_k )
		tV_lx.append( np.matmul( R_k, tx_next[:,k]) + w_k)		
		Xnl.append( x_curr)
		x_curr = x_next

	Xnl = np.asarray(Xnl)
	Vnl_nlx = np.asarray(Vnl_nlx)
	Vnl_lx = np.asarray(Vnl_lx)
	tV_nlx = np.asarray(tV_nlx)
	tV_lx = np.asarray(tV_lx)

	plot_scp_iteration_state( Xnl, np.transpose(tx_next,(1,0,2)), \
		np.transpose(xbar,(1,0,2)), T, title = str(param.get('controller')) + ' State' + \
		'\nIteration: ' + str(i_iter) + '\nTime: ' + str(T[0]))

	plot_scp_iteration_lyapunov( np.squeeze(Vnl_nlx), np.squeeze(Vnl_lx), np.squeeze( tV_nlx), \
		np.squeeze( tV_lx), T, title = str(param.get('controller')) + ' Lyapunov' + \
		'\nIteration: ' + str(i_iter) + '\nTime: ' + str(T[0]))
Exemple #15
0
def log_py_zM_categ_j(lambda_categ_j, y_categ_j, zM, k, nj_categ_j):
    ''' Compute log p(y_j | zM, s1 = k1) of each categorical variable 
    
    lambda_categ_j (nj_categ x (r + 1) ndarray): Coefficients of the categorical distributions in the GLLVM layer
    y_categ_j (numobs 1darray): The jth categorical variable in the dataset
    zM (M x r x k ndarray): M Monte Carlo copies of z for each component k1 of the mixture
    k (int): The number of components of the mixture
    nj_categ_j (int): The number of possible values values of the jth categorical variable
    --------------------------------------------------------------
    returns (ndarray): The p(y_j | zM, s1 = k1) for the jth categorical variable
    '''  
    epsilon = 1E-10

    r = zM.shape[1]
    nj = y_categ_j.shape[1]
        
    zM_broad = np.expand_dims(np.expand_dims(np.transpose(zM, (0, 2, 1)), 2), 3)
    lambda_categ_j_ = lambda_categ_j.reshape(nj, r + 1, order = 'C')

    eta = zM_broad @ lambda_categ_j_[:, 1:][n_axis, n_axis, ..., n_axis] # Check que l'on fait r et pas k ?
    eta = eta + lambda_categ_j_[:,0].reshape(1, 1, nj_categ_j, 1, 1) # Add the constant
    
    pi = softmax_(eta.astype(np.float), axis = 2)
    # Numeric stability
    pi = np.where(pi <= 0, epsilon, pi)
    pi = np.where(pi >= 1, 1 - epsilon, pi)

    yg = np.expand_dims(np.expand_dims(y_categ_j, 1), 1)[..., np.newaxis, np.newaxis] 
    log_p_y_z = yg * np.log(pi[n_axis]) 
    
    # Reshaping output
    log_p_y_z = log_p_y_z.sum((3)) # Suming over the modalities nj
    log_p_y_z = log_p_y_z[:,:,:,0,0] # Deleting useless axes
        
    return np.transpose(log_p_y_z,(1,0, 2))
Exemple #16
0
def batched_dot(a, b):
    if len(a.shape) != 3 or len(b.shape) != 3 or a.shape[0] != b.shape[0]:
        raise ValueError(
            "a,b must be 3-dimensional arrays, with a.shape[0]==b.shape[0] and a.shape[2]==b.shape[1]"
        )
    elif a.shape[0] == 1:
        ## use numpy.dot for blas
        a = np.reshape(a, a.shape[1:])
        b = np.reshape(b, b.shape[1:])
        c = np.dot(a, b)
        return np.reshape(c, [1] + list(c.shape))
    elif a.shape[2] == 1:
        ## the main cost is simply allocating space for the array,
        ## so we are better off doing things in serial
        a = np.reshape(a, a.shape[:-1])
        b = np.reshape(b, (b.shape[0], b.shape[2]))
        if a.shape[-1] > 1 and b.shape[-1] > 1:
            ## batch outer product
            return np.einsum("ij,ik->ijk", a, b)
        else:
            ## broadcasted elementary-wise multiplication
            outshape = (a.shape[0], a.shape[1], b.shape[1])
            a = np.transpose(a)
            b = np.transpose(b)
            if a.shape[0] == 1:
                a = np.reshape(a, [-1])
            if b.shape[0] == 1:
                b = np.reshape(b, [-1])
            return np.transpose(np.reshape(a * b, outshape[::-1]))
    else:
        ## parallel batched matrix multiply
        return _par_matmul(a, b)
Exemple #17
0
 def _grad_L2loss(self, beta, reg_lambda, X, y):
     n_samples = np.float(X.shape[0])
     z = np.dot(X, beta)
     #grad_beta = 1. / n_samples * np.transpose(np.dot(np.transpose(z - y), X))
     grad_beta = np.transpose(np.dot(np.transpose(z - y), X))
     print('grad_beta 0,1', grad_beta[0:2])
     return grad_beta
Exemple #18
0
    def draw_posterior_samples(self, X_star, N_samples=1):
        # Normalize data
        X_star = (X_star - self.Xmean) / self.Xstd

        X = self.X
        y = self.y

        L = self.L

        theta = self.hyp[:-1]

        psi = self.kernel(X_star, X, theta)

        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y))
        pred_u_star = np.matmul(psi, alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T))
        var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi, beta)

        samples = np.random.multivariate_normal(pred_u_star.flatten(),
                                                var_u_star, N_samples).T

        # De-normalize
        samples = samples * self.Ystd + self.Ymean

        return samples
Exemple #19
0
    def ExpectedImprovement(self, X_star):
        # Normalize data
        X_star = (X_star - self.Xmean) / self.Xstd

        X = self.X
        y = self.y

        L = self.L

        theta = self.hyp[:-1]

        psi = self.kernel(X_star, X, theta)

        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y))
        pred_u_star = np.matmul(psi, alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T))
        var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi, beta)
        var_u_star = np.abs(np.diag(var_u_star))[:, None]

        # Expected Improvement
        best = np.min(y)
        Z = (best - pred_u_star) / var_u_star
        EI_acq = (best - pred_u_star) * norm.cdf(Z) + var_u_star * norm.pdf(Z)

        return EI_acq
Exemple #20
0
    def predict_H(self, X_star):
        # Normalize data
        X_star = (X_star - self.Xmean) / self.Xstd

        X_L = self.X_L
        y_L = self.y_L
        X_H = self.X_H
        y_H = self.y_H

        L = self.L

        y = np.vstack((y_L, y_H))

        rho = self.hyp[-3]
        theta_L = self.hyp[self.idx_theta_L]
        theta_H = self.hyp[self.idx_theta_H]

        psi1 = rho * self.kernel(X_star, X_L, theta_L)
        psi2 = rho**2 * self.kernel(X_star, X_H, theta_L) + \
                        self.kernel(X_star, X_H, theta_H)
        psi = np.hstack((psi1, psi2))

        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y))
        pred_u_star = np.matmul(psi, alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T))
        var_u_star = rho**2 * self.kernel(X_star, X_star, theta_L) + \
                     self.kernel(X_star, X_star, theta_H) - np.matmul(psi,beta)

        # De-normalize
        pred_u_star = pred_u_star * self.Ystd + self.Ymean
        var_u_star = var_u_star * self.Ystd**2

        return pred_u_star, var_u_star
Exemple #21
0
def nn_predict_GCN(params, x):

    # x: NSAMPLES x NFEATURES
    U = hyper['U']
    xf = np.matmul(x, U)
    xf = np.expand_dims(xf, 1)  # NSAMPLES x 1 x NFEATURES
    xf = np.transpose(xf)  # NFEATURES x 1 x NSAMPLES

    # Filter
    yf = np.matmul(params['W1'], xf)  # for each feature
    yf = np.transpose(yf)  # NSAMPLES x NFILTERS x NFEATURES
    yf = np.reshape(yf, [-1, hyper['NFEATURES']])

    # Transform back to graph domain
    Ut = np.transpose(U)
    y = np.matmul(yf, Ut)
    y = np.reshape(y, [-1, hyper['F'], hyper['NFEATURES']])
    y += params['b1']  # NSAMPLES x NFILTERS x NFEATURES

    # nonlinear layer
    y = ReLU(y)
    # y = np.tanh(y)

    # dense layer
    y = np.reshape(y, [-1, hyper['F']*hyper['NFEATURES']])
    y = np.matmul(y, params['W2']) + params['b2']


    outputs = y

    return outputs - logsumexp(outputs, axis=1, keepdims=True)
Exemple #22
0
 def vjp(g):
     vjps = []
     q_vjp = solve_sylvester(anp.transpose(a), anp.transpose(b), g)
     if 0 in argnums: vjps.append(-anp.dot(q_vjp, anp.transpose(ans)))
     if 1 in argnums: vjps.append(-anp.dot(anp.transpose(ans), q_vjp))
     if 2 in argnums: vjps.append(q_vjp)
     return tuple(vjps)
    def predict(self, X_star):
        hyp = self.hyp
        theta_L = hyp[self.idx_theta_L]
        theta_H = hyp[self.idx_theta_H]
        rho = np.exp(hyp[-3])
        mean_L = theta_L[0]
        mean_H = rho * mean_L + theta_H[0]



        X_L = self.X_L
        y_L = self.y_L - mean_L
        X_H = self.X_H
        y_H = self.y_H - mean_H
        L = self.L

        y = np.vstack((y_L, y_H))

        psi1 = rho * self.kernel(X_star, X_L, theta_L)
        psi2 = rho ** 2 * self.kernel(X_star, X_H, theta_L) + \
               self.kernel(X_star, X_H, theta_H)
        psi = np.hstack((psi1, psi2))

        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y))
        pred_u_star = mean_H + np.matmul(psi, alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T))
        var_u_star = rho ** 2 * self.kernel(X_star, X_star, theta_L) + \
                     self.kernel(X_star, X_star, theta_H) - np.matmul(psi, beta)

        return pred_u_star, var_u_star
    def forward(self, X1, X2):
        """
        Actual computation of the matrix of squared distances (see details above)

        :param X1: input data of size (n1,d)
        :param X2: input data of size (n2,d)
        :param inverse_bandwidths_internal: self.inverse_bandwidths_internal
        """
        # In case inverse_bandwidths if of size (1, dimension), dimension>1,
        # ARD is handled by broadcasting
        inverse_bandwidths = anp.reshape(self._inverse_bandwidths(), (1, -1))

        if X2 is X1:
            X1_scaled = anp.multiply(X1, inverse_bandwidths)
            D = -2.0 * anp.dot(X1_scaled, anp.transpose(X1_scaled))
            X1_squared_norm = anp.sum(anp.square(X1_scaled), axis=1)
            D = D + anp.reshape(X1_squared_norm, (1, -1))
            D = D + anp.reshape(X1_squared_norm, (-1, 1))
        else:
            X1_scaled = anp.multiply(X1, inverse_bandwidths)
            X2_scaled = anp.multiply(X2, inverse_bandwidths)
            X1_squared_norm = anp.sum(anp.square(X1_scaled), axis=1)
            X2_squared_norm = anp.sum(anp.square(X2_scaled), axis=1)
            D = -2.0 * anp.matmul(X1_scaled, anp.transpose(X2_scaled))
            D = D + anp.reshape(X1_squared_norm, (-1, 1))
            D = D + anp.reshape(X2_squared_norm, (1, -1))

        return anp.abs(D)
Exemple #25
0
    def ExpectedImprovement(self, X_star):
        # Normalize data
        X_star = (X_star - self.Xmean) / self.Xstd

        X = self.X
        y = self.y

        L = self.L

        theta = self.hyp[:-1]

        psi = self.kernel(X_star, X, theta)

        alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L, y))
        pred_u_star = np.matmul(psi, alpha)

        beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L, psi.T))
        var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi, beta)
        var_u_star = np.abs(np.diag(var_u_star))[:, None]

        # Expected Improvement
        # from https://people.orie.cornell.edu/pfrazier/Presentations/2011.11.INFORMS.Tutorial.pdf
        best = np.min(y)
        delta = -(pred_u_star - best)
        deltap = -(pred_u_star - best)
        deltap[delta < 0] = 0
        Z = delta / np.sqrt(var_u_star)

        EI_acq = deltap - np.abs(deltap) * norm.cdf(-Z) + np.sqrt(
            var_u_star) * norm.pdf(Z)

        if isinstance(EI_acq, np.ndarray) == False:
            EI_acq = EI_acq._value

        return EI_acq
Exemple #26
0
def estimateC_weighted(W, R, B, D, lam):
    '''
    :param W : the heatmap
    :param R : the rotation matrix
    :param B : the base matrix
    :param D : the weight
    :param lam : lam value used to simplify some results
    :return : C0
    '''
    p = len(W[0])
    k = int(B.shape[0] / 3)
    d = np.diag(D)
    D = np.zeros((2 * p, 2 * p))
    eps = sys.float_info.epsilon

    for i in range(p):
        D[2 * i, 2 * i] = d[i]
        D[2 * i + 1, 2 * i + 1] = d[i]

    # next we work on the linear system y = X*C
    y = W.flatten()  # vectorized W
    X = np.zeros((2 * p, k))  # each colomn is a rotated Bk

    for i in range(k):
        RBi = np.dot(R, B[3 * i:3 * (i + 1), :])
        X[:, i] = RBi.flatten()

    # we want to calculate C = pinv(X'*D*X+lam*eye(size(X,2)))*X'*D*y and then C = C'

    A = np.dot(np.dot(np.transpose(X), D), X) + lam * np.eye(X.shape[1])
    tol = max(A.shape) * np.linalg.norm(A, np.inf) * eps
    C = np.dot(np.dot(np.linalg.pinv(A), np.dot(np.transpose(X), D)), y)

    return np.transpose(C)
def avg_pred_log(w,images):
    log_pc_x = 0
    for i in range(0,images.shape[0]):
        current_log_pc_x = np.dot(np.transpose(w),images[i,:]) - logsumexp(np.dot(np.transpose(w),images[i,:]))
        log_pc_x = log_pc_x + current_log_pc_x
        
    return np.sum(log_pc_x)/float(images.shape[0])
    def grad_power_noise(x):
        """
        Compute the gradient of the power criterion with respect to the width of Gaussian
        RBF kernel and the noise vector.

        Args:
            x: 1 + 2J*d_n vector
        Returns:
            the gradient of the power criterion with respect to kernel width/latent vector
        """

        with util.ContextTimer() as t:
            width, z = unflatten(x)
            zp = z[:J]
            zq = z[J:]

            # Compute the Jacobian of the generators with respect to noise vector
            torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1),
                                         requires_grad=True)
            torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1),
                                         requires_grad=True)
            gp_grad = compute_jacobian(torch_zp, gen_p(torch_zp).view(J, -1))  # J x d_pix x d_noise x 1 x 1
            gq_grad = compute_jacobian(torch_zq, gen_q(torch_zq).view(J, -1))  # J x d_pix x d_noise x 1 x 1
            v_grad_z = np.vstack([gp_grad, gq_grad])
            v_grad_z = np.squeeze(v_grad_z, [3, 4])  # 2J x d_pix x d_noise
            
            # Compute the Jacobian of the feature extractor with respect to noise vector
            vp_flatten = to_torch_variable(
                gen_p(torch_zp).view(J, -1).cpu().data.numpy(),
                shape=(J, 3, image_size, image_size),
                requires_grad=True
            )
            vq_flatten = to_torch_variable(
                gen_q(torch_zq).view(J, -1).cpu().data.numpy(),
                shape=(J, 3, image_size, image_size),
                requires_grad=True
            )
            size = (model_input_size, model_input_size)
            upsample = nn.Upsample(size=size, mode='bilinear')
            fp = model(upsample(vp_flatten))
            fq = model(upsample(vq_flatten))
            fp_grad = compute_jacobian(vp_flatten, fp.view(J, -1))  # J x d_nn x C x H x W
            fq_grad = compute_jacobian(vq_flatten, fq.view(J, -1))  # J x d_nn x C x H x W
            f_grad_v = np.vstack([fp_grad, fq_grad])
            f_grad_v = f_grad_v.reshape((2*J, f_grad_v.shape[1], -1))  # 2J x d_nn x d_pix

            # Compute the gradient of the objective function with respect to
            # the gaussian width and test locations
            F = np.vstack([fp.cpu().data.numpy(), fq.cpu().data.numpy()])
            F = np.reshape(F, (2*J, -1))
            grad_obj = autograd.elementwise_grad(flat_obj_feat)  # 1+(2J)*d_nn input
            obj_grad_f = grad_obj(flatten(width, F))
            obj_grad_width = obj_grad_f[0]
            obj_grad_f = np.reshape(obj_grad_f[1:], [(2*J), -1])  # 2J x d_nn array

            obj_grad_v = inner1d(obj_grad_f, np.transpose(f_grad_v, (2, 0, 1)))  # 2J x d_pix
            obj_grad_z = inner1d(obj_grad_v.T, np.transpose(v_grad_z, (2, 0, 1))).flatten()

        return np.concatenate([obj_grad_width.reshape([1]), obj_grad_z]) 
 def Predict(self, X):
     kstar = self.covariance(X, self.X, self.scales)
     predictive_mean = np.matmul(np.transpose(kstar), self.alpha)
     v = solve_triangular(self.cholesky, kstar, lower=True)
     predictive_variance = self.covariance(X, X, self.scales) - np.matmul(
         np.transpose(v), v)
     return predictive_mean.reshape(
         -1, 1), np.diag(predictive_variance).reshape(-1, 1, 1)
Exemple #30
0
    def get_direction(self, x):
        self.jacob_func = jacobian(self.evaluate)

        self.jacobian   = self.jacob_func(x)
        self.fx         = self.evaluate(x)
        self.gradient   = 2*np.transpose(self.jacobian) @ self.fx
        self.hessian    = np.transpose(self.jacobian) @ self.jacobian

        return np.zeros(4)
    def get_S(self):
        arg1 = (self.deltaK @ np.transpose(self.deltaK)) / (
            np.transpose(self.deltaK) @ self.gamma)
        arg2 = (self.S[self.k] @ self.gamma) @ (
            self.gamma.T @ self.S[self.k]) / (
                self.gamma.T @ self.S[self.k] @ self.gamma)

        self.S[self.k + 1] = self.S[self.k] + arg1 - arg2
        return self.S[self.k + 1]
Exemple #32
0
def compute_stats(Ex, ExxT, ExnxT, inhomog):
    T = Ex.shape[-1]
    E_init_stats = ExxT[:,:,0], Ex[:,0], 1., 1.
    E_pair_stats = np.transpose(ExxT, (2, 0, 1))[:-1], \
        ExnxT.T, np.transpose(ExxT, (2, 0, 1))[1:], np.ones(T-1)
    E_node_stats = np.diagonal(ExxT.T, axis1=-1, axis2=-2), Ex.T, np.ones(T)

    if not inhomog:
        E_pair_stats = map(lambda x: np.sum(x, axis=0), E_pair_stats)

    return E_init_stats, E_pair_stats, E_node_stats
Exemple #33
0
 def vjp(g):
     result = convolve(g, Y[flipped_idxs(Y.ndim, axes[_Y_]['conv'])],
                       axes     = [axes['out']['conv'],   axes[_Y_]['conv']],
                       dot_axes = [axes['out'][ignore_Y], axes[_Y_]['ignore']],
                       mode     = new_mode)
     new_order = npo.argsort(axes[_X_]['ignore'] + axes[_X_]['dot'] + axes[_X_]['conv'])
     return np.transpose(result, new_order)
Exemple #34
0
	def get_marginal(self, u, V, R, x_test):
		'''
		current metric to test convergence-- log space
		predictive marginal likelihood
		'''
		I = self.sigx*np.identity(self.dimx)
		mu = np.zeros(self.dimx,)
		n_samples = 200
		ll = 0
		test_size = x_test.shape[0]
		
		for i in xrange(test_size):
			x = x_test[i]
			
			mc = 0
			for j in xrange(n_samples):
				w = self.sample_w(u, V)
				var = np.dot(w, np.transpose(w))
				var = np.add(var, I)
				px = gaussian.Gaussian_full(mu, var)
				px = px.eval(x)#eval_log_properly(x)
				mc = mc + px
				
			mc = mc/float(n_samples)
			mc = np.log(mc)
			ll += mc
			
		
		return (ll/float(test_size))
Exemple #35
0
def grad_convolve(argnum, g, ans, vs, gvs, A, B, axes=None, dot_axes=[(),()], mode='full'):
    assert mode in ['valid', 'full'], "Grad for mode {0} not yet implemented".format(mode)
    axes, shapes = parse_axes(A.shape, B.shape, axes, dot_axes, mode)
    if argnum == 0:
        X, Y = A, B
        _X_, _Y_ = 'A', 'B'
        ignore_Y = 'ignore_B'
    elif argnum == 1:
        X, Y = B, A
        _X_, _Y_ = 'B', 'A'
        ignore_Y = 'ignore_A'
    else:
        raise NotImplementedError("Can't take grad of convolve w.r.t. arg {0}".format(argnum))

    if mode == 'full':
        new_mode = 'valid'
    else:
        if any([x_size > y_size for x_size, y_size in zip(shapes[_X_]['conv'], shapes[_Y_]['conv'])]):
            new_mode = 'full'
        else:
            new_mode = 'valid'

    result = convolve(g, Y[flipped_idxs(Y.ndim, axes[_Y_]['conv'])],
                      axes     = [axes['out']['conv'],   axes[_Y_]['conv']],
                      dot_axes = [axes['out'][ignore_Y], axes[_Y_]['ignore']],
                      mode     = new_mode)
    new_order = npo.argsort(axes[_X_]['ignore'] + axes[_X_]['dot'] + axes[_X_]['conv'])
    return np.transpose(result, new_order)
Exemple #36
0
	def eval_log_properly(self, x):
		det = np.linalg.det(self.Sigma)
		const = (self.size/2.0)*np.log(2*np.pi)
		const = -0.5*np.log(det) - const
		prec = np.linalg.inv(self.Sigma)
		t = np.subtract(x, self.Mu)
		v = np.dot(np.transpose(t), prec)
		v = -0.5*np.dot(v, t)
		return const + v
Exemple #37
0
def generate_data(beta,tau,n,num_times):
    num_features = len(beta)-1
    X = np.random.uniform(-2,2,(n,num_times,num_features))
    alpha = np.random.normal(0,tau,n)
    alpha = np.reshape(np.tile(alpha,num_times),(num_times,n))
    alpha = np.transpose(alpha)
    P = logistic(beta[0]+np.dot(X,beta[1:]))#+alpha)
    y = np.random.binomial(1,P)
    return X,y
Exemple #38
0
	def eval(self,x):
		#x and mu must have same dimensions
		det = np.linalg.det(self.Sigma)**(-0.5)
		const = (2*np.pi)**(-self.size/2.0)
		const = det*const
		prec = np.linalg.inv(self.Sigma)
		t = np.subtract(x, self.Mu)
		v = np.dot(np.transpose(t), prec)
		v = np.exp(-0.5*np.dot(v, t))
		return const*v
Exemple #39
0
def KL_two_gaussians(params):
    d = np.shape(params)[0]-1
    mu = params[0:d,0]
    toSigma = params[0:d,1:d+1]
    intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma)))
    Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma))
    muPrior = np.zeros(d)
    sigmaPrior = np.identity(d)
    #print Sigma
    #print np.linalg.det(Sigma)
    return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))
Exemple #40
0
    def _init_params(self, data, lengths=None, params='stmp'):
        X = data['obs']

        if 's' in params:
            self.startprob_.fill(1.0 / self.n_components)

        if 't' in params or 'm' in params or 'p' in params:

            kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                   random_state=self.random_state).fit(X)
            kmeans = kmmod.cluster_centers_

        if 't' in params:
            # TODO: estimate transitions from data (!) / consider n_tied=1
            if self.n_tied == 0:
                transmat = np.ones([self.n_components, self.n_components])
                np.fill_diagonal(transmat, 10.0)
                self.transmat_ = transmat  # .90 for self-transition

            else:
                transmat = np.zeros((self.n_components, self.n_components))
                transmat[range(self.n_components),
                         range(self.n_components)] = 100.0  # diagonal
                transmat[range(self.n_components-1),
                         range(1, self.n_components)] = 1.0  # diagonal + 1
                transmat[[r * (self.n_chain) - 1
                          for r in range(1, self.n_unique+1)
                          for c in range(self.n_unique-1)],
                         [c * (self.n_chain)
                          for r in range(self.n_unique)
                          for c in range(self.n_unique) if c != r]] = 1.0

                self.transmat_ = np.copy(transmat)

        if 'm' in params:
            mu_init = np.zeros((self.n_unique, self.n_features))
            for u in range(self.n_unique):
                for f in range(self.n_features):
                    mu_init[u][f] = kmeans[u, f]

            self.mu_ = np.copy(mu_init)

        if 'p' in params:
            precision_init = np.zeros((self.n_unique, self.n_features, self.n_features))
            for u in range(self.n_unique):
                if self.n_features == 1:
                    precision_init[u] = np.linalg.inv(np.cov(X[kmmod.labels_ == u], bias = 1))
                else:
                    precision_init[u] = np.linalg.inv(np.cov(np.transpose(X[kmmod.labels_ == u])))

            self.precision_ = np.copy(precision_init)
    def loss(weights):
        mu1 = parser.get(weights, 'mu1')
        mu2 = parser.get(weights, 'mu2')
        sig1 = parser.get(weights, 'sig1')*np.eye(mu1.size)
        sig2 = parser.get(weights, 'sig2')*np.eye(mu1.size)

        
        return 0.5*( \
            np.log(np.linalg.det(sig2) / np.linalg.det(sig1)) \
            - mu1.size \
            + np.trace(np.dot(np.linalg.inv(sig2),sig1)) \
            #+ np.dot(np.dot(np.transpose(mu2 - mu1), np.linalg.inv(sig2)), mu2 - mu1 )
            + np.dot(np.dot(mu2 - mu1, np.linalg.inv(sig2)), np.transpose(mu2 - mu1 ))
            )
Exemple #42
0
def pair_mean_to_natural(A, sigma):
    assert 2 <= A.ndim == sigma.ndim <= 3
    ndim = A.ndim

    einstring = 'tji,tjk->tik' if ndim == 3 else 'ji,jk->ik'
    trans = (0, 2, 1) if ndim == 3 else (1, 0)
    temp = np.linalg.solve(sigma, A)

    Jxx = -1./2 * np.einsum(einstring, A, temp)
    Jxy = np.transpose(temp, trans)
    Jyy = -1./2 * np.linalg.inv(sigma)
    logZ = -1./2 * np.linalg.slogdet(sigma)[1]

    return Jxx, Jxy, Jyy, logZ
Exemple #43
0
    def G(self):
        full_W = np.array([node.w for node in self.nodes])
        WB = full_W[:,1:].reshape((self.K,self.K, self.B))

        # Weight matrix is summed over impulse response functions
        WT = WB.sum(axis=2)

        # Impulse response weights are normalized weights
        GT = WB / WT[:,:,None]

        # Then we transpose so that the impuolse matrix is (outgoing x incoming x basis)
        G = np.transpose(GT, [1,0,2])

        # TODO: Decide if this is still necessary
        for k1 in xrange(self.K):
            for k2 in xrange(self.K):
                if G[k1,k2,:].sum() < 1e-2:
                    G[k1,k2,:] = 1.0/self.B
        return G
def pylds_E_step_inhomog(lds, data):
    T = data.shape[0]
    mu_init, sigma_init, A, sigma_states, C, sigma_obs = lds
    normalizer, smoothed_mus, smoothed_sigmas, E_xtp1_xtT = \
        _E_step(mu_init, sigma_init, A, sigma_states, C, sigma_obs, data)

    EyyT = np.einsum('ti,tj->tij', data, data)
    EyxT = np.einsum('ti,tj->tij', data, smoothed_mus)
    ExxT = smoothed_sigmas + np.einsum('ti,tj->tij', smoothed_mus, smoothed_mus)

    E_xt_xtT = ExxT[:-1]
    E_xtp1_xtp1T = ExxT[1:]
    E_xtp1_xtT = E_xtp1_xtT

    E_x1_x1T = smoothed_sigmas[0] + np.outer(smoothed_mus[0], smoothed_mus[0])
    E_x1 = smoothed_mus[0]

    E_init_stats = E_x1_x1T, E_x1, 1.
    E_pairwise_stats = E_xt_xtT.sum(0), E_xtp1_xtT.sum(0).T, E_xtp1_xtp1T.sum(0), T-1
    E_node_stats = ExxT, np.transpose(EyxT, (0, 2, 1)), EyyT, np.ones(T)

    return E_init_stats, E_pairwise_stats, E_node_stats
Exemple #45
0
def expectation(params,y,X,eps,N,u):
    #for each sample of theta, calculate likelihood
    #likelihood has participants
    #for each participant, we have N particles
    #with L samples, n participants, N particles per participant and sample, we have
    #L*n*N particles
    #get the first column to be mu
    d = np.shape(X)[-1]+1
    mu = params[0:d,0]
    toSigma = params[0:d,1:d+1]
    intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma)))
    Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma))
    print mu
    print Sigma
    n = X.shape[0]
    E = 0
    #iterate over number of samples of theta
    for j in range(np.shape(eps)[0]):
        beta = mu+np.dot(Sigma,eps[j,:])
        #this log likelihood will iterate over both the participants and the particles
        E+=log_likelihood(beta,y,X,u[j*(n*N):(j+1)*(n*N)])
    return E/len(beta)
def trance_quad(W, A): 
	return np.trace(np.dot(np.dot(np.transpose(W),A), W))
Exemple #47
0
def eval_log_prec(Mu, prec, x):
	t = np.subtract(x, Mu)
	v = np.dot(np.transpose(t), prec)
	v = -0.5*np.dot(v, t)
	return v
Exemple #48
0
def normal_pdf(theta,mu,Sigma):
    d = len(mu)
    #return np.exp(-(theta-mu)**2/(2*sigma**2))/np.sqrt(2*sigma**2*np.pi)
    return (2*np.pi)**(-d/2)*np.linalg.det(Sigma)**(-1/2)*np.exp(-np.dot(np.transpose(theta-mu),np.dot(np.linalg.inv(Sigma), theta-mu))/2)
Exemple #49
0
def _vjp_sqrtm(ans, A, disp=True, blocksize=64):
    assert disp, "sqrtm vjp not implemented for disp=False"
    ans_transp = anp.transpose(ans)
    def vjp(g):
        return anp.real(solve_sylvester(ans_transp, ans_transp, g))
    return vjp
Exemple #50
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(
                    n_clusters=self.n_unique,
                    random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []

                if not self.shared_alpha:
                    count = 0
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                            u,f]).fit(self.n_lags))
                            ar_alpha.append(ar_mod[count].params[1:])
                            ar_resid.append(ar_mod[count].resid)
                            count += 1
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    for f in range(self.n_features):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                    mf,f]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[f].params[1:])
                        ar_resid.append(ar_mod[f].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        ar_idx = u
                        if self.shared_alpha:
                            ar_idx = 0
                        mu_init[u,f] = kmeans[u, f] - np.dot(
                        np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:

                precision_init = \
                np.zeros((self.n_unique, self.n_features, self.n_features))

                for u in range(self.n_unique):
                    if self.n_features == 1:
                        precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u]))

                    else:
                        precision_init[u] = np.linalg.inv\
                        (np.cov(np.transpose(X[kmmod.labels_ == u])))

                        # Alternative: Initialization using ar_resid
                        #for f in range(self.n_features):
                        #    if not self.shared_alpha:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[count])
                        #        count += 1
                        #    else:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[f])'''

                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                if self.shared_alpha:
                    alpha_init = np.zeros((1, self.n_lags))
                    alpha_init = ar_alpha[0].reshape((1, self.n_lags))
                else:
                    alpha_init = np.zeros((self.n_unique, self.n_lags))
                    for u in range(self.n_unique):
                        ar_idx = 0
                        alpha_init[u] = ar_alpha[ar_idx]
                        ar_idx += self.n_features
                self.alpha_ = np.copy(alpha_init)
def minConf_PQN(funObj, x, funProj, options=None):
    """
    The problems are of the form
                min funObj(x) s.t. x in C
    The projected quasi-Newton sub-problems are solved using the spectral
    projected gradient algorithm

    Parameters
    ----------
    funObj: function to minimize, return objective value as the first argument
            and gradient as the second argument
    funProj: function that returns projection of x onto C
    options:
        1) verbose: level of verbosity (0: no output, 1: final, 2: iter
        (default), 3: debug)
        2) optTol: tolerance used to check for optimality (default: 1e-5)
        3) progTol: tolerance used to check for progress (default: 1e-9)
        4) maxIter: maximum number of calls to funObj (default: 500)
        5) maxProject: maximum number of calls to funProj (default: 100000)
        6) numDiff: compute derivatives numerically (0: use user-supplied
            derivatives (default), 1: use finite differences, 2: use complex
            differentials)
        7) suffDec: sufficient decrease parameter in Armijo condition (default:
            1e-4)
        8) corrections: number of lbfgs corrections to store (default: 10)
        9) adjustStep: use quadratic initialization of line search (default: 0)
        10) bbInit: initialize sub-problem with Barzilai-Borwein step (default:
            0)
        11) SPGoptTol: optimality tolerance for SPG direction finding (default:
            1e-6)
        12) SPGiters: maximum number of iterations for SPG direction finding
            (default: 10)

    Returns
    -------
    x: optimal parameter values
    f: optimal objective value
    funEvals: number of function evaluations 
    """
    
    # number of variables/parameters
    nVars = len(x)
    
    # set default optimization settings
    options_default = {'verbose':2, 'numDiff':0, 'optTol':1e-5, 'progTol':1e-9, \
                'maxIter':500, 'maxProject':100000, 'suffDec':1e-4, \
                'corrections':10, 'adjustStep':0, 'bbInit':0, 'SPGoptTol':1e-6,\
                'SPGprogTol':1e-10, 'SPGiters':10, 'SPGtestOpt':0}
    options = setDefaultOptions(options, options_default)
    
    if options['verbose'] == 3:
        print 'Running PQN...'
        print 'Number of L-BFGS Corrections to store: ' + \
                str(options['corrections'])
        print 'Spectral initialization of SPG: ' + str(options['bbInit'])
        print 'Maximum number of SPG iterations: ' + str(options['SPGiters'])
        print 'SPG optimality tolerance: ' + str(options['SPGoptTol'])
        print 'SPG progress tolerance: ' + str(options['SPGprogTol'])
        print 'PQN optimality tolerance: ' + str(options['optTol'])
        print 'PQN progress tolerance: ' + str(options['progTol'])
        print 'Quadratic initialization of line search: ' + \
                str(options['adjustStep'])
        print 'Maximum number of function evaluations: ' + \
                str(options['maxIter'])
        print 'Maximum number of projections: ' + str(options['maxProject'])

    if options['verbose'] >= 2:
        print '{:10s}'.format('Iteration') + \
                '{:10s}'.format('FunEvals') + \
                '{:10s}'.format('Projections') + \
                '{:15s}'.format('StepLength') + \
                '{:15s}'.format('FunctionVal') + \
                '{:15s}'.format('OptCond')
    
    funEvalMultiplier = 1
    # project initial parameter vector
    # translate this function (Done!)
    x = funProj(x)
    projects = 1

    # evaluate initial parameters
    # translate this function (Done!)
    [f, g] = funObj(x)
    funEvals = 1

    # check optimality of initial point
    projects = projects + 1
    if np.max(np.abs(funProj(x-g)-x)) < options['optTol']:
        if options['verbose'] >= 1:
            print "First-Order Optimality Conditions Below optTol at Initial Point"
            return (x, f, funEvals)
    
    i = 1
    while funEvals <= options['maxIter']:
        # compute step direction
        # this is for initialization
        if i == 1:
            p = funProj(x-g)
            projects = projects + 1
            S = np.zeros((nVars, 0))
            Y = np.zeros((nVars, 0))
            Hdiag = 1
        else:
            y = g - g_old
            s = x - x_old

            # translate this function (Done!)
            [S, Y, Hdiag] = lbfgsUpdate(y, s, options['corrections'], \
                    options['verbose']==3, S, Y, Hdiag)

            # make compact representation
            k = Y.shape[1]
            L = np.zeros((k,k))
            for j in range(k):
                L[j+1:,j] = np.dot(np.transpose(S[:,j+1:]), Y[:,j])
            N = np.hstack((S/Hdiag, Y.reshape(Y.shape[0], Y.size/Y.shape[0])))
            M1 = np.hstack((np.dot(S.T,S)/Hdiag, L))
            M2 = np.hstack((L.T, -np.diag(np.diag(np.dot(S.T,Y)))))
            M = np.vstack((M1, M2))
            
            # translate this function (Done!)
            HvFunc = lambda v: v/Hdiag - np.dot(N,np.linalg.solve(M,np.dot(N.T,v)))
            
            if options['bbInit'] == True:
                # use Barzilai-Borwein step to initialize sub-problem
                alpha = np.dot(s,s)/np.dot(s,y)
                if alpha <= 1e-10 or alpha > 1e10:
                    alpha = min(1., 1./np.sum(np.abs(g)))
                # solve sub-problem
                xSubInit = x - alpha*g
                feasibleInit = 0
            else:
                xSubInit = x
                feasibleInit = 1

            # solve Sub-problem
            # translate this function (Done!)
            [p, subProjects] = solveSubProblem(x, g, HvFunc, funProj, \
                    options['SPGoptTol'], options['SPGprogTol'], \
                    options['SPGiters'], options['SPGtestOpt'], feasibleInit,\
                    xSubInit)
            projects = projects + subProjects

        d = p - x
        g_old = g
        x_old = x

        # check the progress can be made along the direction
        gtd = np.dot(g,d)
        if gtd > -options['progTol']:
            if options['verbose'] >= 1:
                print "Directional Derivative below progTol"
            break
        
        # select initial guess to step length
        if i == 1 or options['adjustStep'] == 0:
            t = 1.
        else:
            t = min(1., 2.*(f-f_old)/gtd)
        
        # bound step length on first iteration
        if i == 1:
            t = min(1., 1./np.sum(np.abs(g)))

        # evluate the objective and gradient at the initial step
        if t == 1:
            x_new = p
        else:
            x_new = x + t*d
        [f_new, g_new] = funObj(x_new)
        funEvals = funEvals + 1

        # backtracking line search
        f_old = f
        # translate isLegal (Done!)
        while f_new > f + options['suffDec']*np.dot(g,x_new-x) or \
                not isLegal(f_new):
            temp = t
            # backtrack to next trial value
            if not isLegal(f_new) or not isLegal(g_new):
                if options['verbose'] == 3:
                    print "Halving step size"
                t = t/2.
            else:
                if options['verbose'] == 3:
                    print "Cubic backtracking"
                # translate polyinterp (Done!)
                t = polyinterp(np.array([[0.,f,gtd],\
                                        [t,f_new,np.dot(g_new,d)]]))[0]

            # adjust if change is too small/large
            if t < temp*1e-3:
                if options['verbose'] == 3:
                    print "Interpolated value too small, Adjusting"
                t = temp*1e-3
            elif t > temp*0.6:
                if options['verbose'] == 3:
                    print "Interpolated value too large, Adjusting"
                t = temp*0.6

            # check whether step has become too small
            if np.sum(np.abs(t*d)) < options['progTol'] or t == 0:
                if options['verbose'] == 3:
                    print "Line search failed"
                t = 0
                f_new = f
                g_new = g
                break

            # evaluate new point
            f_prev = f_new
            t_prev = temp
            x_new = x + t*d
            [f_new, g_new] = funObj(x_new)
            funEvals = funEvals + 1

        # take step
        x = x_new
        f = f_new
        g = g_new

        optCond = np.max(np.abs(funProj(x-g)-x))
        projects = projects + 1

        # output log
        if options['verbose'] >= 2:
            print '{:10d}'.format(i) + \
                  '{:10d}'.format(funEvals*funEvalMultiplier) + \
                  '{:10d}'.format(projects) + \
                  '{:15.5e}'.format(t) + \
                  '{:15.5e}'.format(f) + \
                  '{:15.5e}'.format(optCond)

        # check optimality
        if optCond < options['optTol']:
            print "First-order optimality conditions below optTol"
            break
        
        if np.max(np.abs(t*d)) < options['progTol']:
            if options['verbose'] >= 1:
                print "Step size below progTol"
            break

        if np.abs(f-f_old) < options['progTol']:
            if options['verbose'] >= 1:
                print "Function value changing by less than progTol"
            break

        if funEvals > options['maxIter']:
            if options['verbose'] >= 1:
                print "Function evaluation exceeds maxIter"
            break

        if projects > options['maxProject']:
            if options['verbose'] >= 1:
                print "Number of projections exceeds maxProject"
            break
        i = i + 1

    return (x, f, funEvals)
#!/usr/bin/python

import autograd.numpy as np
from autograd import grad

# Automatically find the gradient of a function
# Define a function Tr(W.T*A*W), we know that gradient = (A+A')*W
def trance_quad(W, A): 
	return np.trace(np.dot(np.dot(np.transpose(W),A), W))

#	Initial setup
n = 5
A = np.random.random((n,n))
W = np.random.random((n,1))



grad_foo = grad(trance_quad)       # Obtain its gradient function
print 'Autogen Gradient : \n', grad_foo(W,A)
print 'Theoretical Gradient : \n', np.dot((A+np.transpose(A)), W)

import pdb; pdb.set_trace()
Exemple #53
0
def KL_two_gaussians(params):
    mu = params[0:len(params)/2]
    Sigma = np.diag(np.exp(params[len(params)/2:]))
    muPrior = np.zeros(d)
    sigmaPrior = np.identity(d)
    return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))