コード例 #1
0
def full_conditional_Beta(y, X, sigma_iter, eta, inv_C):
    inv_Vb = inv_C + (matfuncs.dot(X.T,X))/sigma_iter 
    Vb     = np.linalg.inv(inv_Vb)
    X_tild = matfuncs.dot(inv_C, eta) + (matfuncs.dot(X.T, y))/ sigma_iter
    mub    = matfuncs.dot(Vb, X_tild)
    Beta_iter = RanMNormalPrec(mub, inv_Vb)
    return(Beta_iter)
コード例 #2
0
def full_conditional_bu(num_zeros, num_splines, Knots, Z, Zt, sigma_e_iter, sigma_u_iter, y_tild):
    # prior matrix over linear and non linear effects
    # the linear effects get the prior 1/100, the splines get the 
    # penalizing prior 1/sigma_u2
    D         = get_D(num_zeros, num_splines, 1 , sigma_u_iter, Knots) 
    inv_V_bu  = (D + matfuncs.dot(Zt,Z) / sigma_e_iter)
    V_bu      = np.linalg.inv(inv_V_bu)
    mu_bu     = matfuncs.dot(V_bu, matfuncs.dot(Zt,y_tild) / sigma_e_iter)
    bu        = RanMNormalPrec(mu_bu, inv_V_bu)
    return(bu)
コード例 #3
0
def update_topics(Kmax, k_tild, K, U1, n_m_z, n_z_t, n_z, z_m_n, tau, m, n, t, gamma, alpha, Beta_iter, Beta_new, BtM):
    
    # In order for the number of topics not to explode before convergence
    if k_tild >= Kmax:
        k_tild = Kmax-1
        
    if k_tild < int(K):
        # Preserve the topic in the used topics
        active = which(U1, 1)
        k_star = active[int(k_tild)]
        # and increase the counters
        n_m_z, n_z_t, n_z, z_m_n = increment(int(k_star), n_m_z, n_z_t, n_z, z_m_n, int(m), int(n), int(t))
        BtM[int(m)] += Beta_iter[int(k_star)]
    else:
        deactive   = which(U1, 0)
        k_star     = deactive[0]
        U1[int(k_star)] = 1. 
        Beta_iter[int(k_star)] = Beta_new
        Beta_new = np.random.normal(0., 1.)
        # Incerement the counters
        n_m_z, n_z_t, n_z, z_m_n = increment(int(k_star), n_m_z, n_z_t, n_z, z_m_n, int(m), int(n), int(t))
        K += 1
        tau = local_sample_tau(tau, gamma, K)
        
        # update scalar product
        BtM = matfuncs.dot(n_m_z, Beta_iter.T)
    return(K, U1, n_m_z, n_z_t, n_z, z_m_n, tau, Beta_iter, Beta_new, BtM)
コード例 #4
0
def plot_spline_pure_effect(j):

    xL_ = np.zeros((100, xL.shape[1]))
    xS_ = get_x_range(xS)
    for k in range(xS_.shape[1]):
        if k != j:
            xS_[:,k] = 0
    
    X = get_X(xL_, xS_, p)
    Z = get_Z(xS_, Knots, p)
    
    for t in range( np.max(time) + 1):
        yhat1   =  matfuncs.dot(X, thetaH[:,t,:].T)
        yhat2   =  matfuncs.dot(Z, UH.T)

        yhat = yhat1 + yhat2
        yhat = yhat - np.mean(yhat)
        sns.tsplot(data=yhat.T,ci=[95])
        plt.axhline(y = 0, color='black', linestyle='--')
コード例 #5
0
def full_conditional_sigma(Y, X, theta, T, time):
    a, b = 1., 1.
    
    N = len(Y)
    a1 = a + float(N)/2
    b1 = 0.
    for t in range(1,int(T)):
        idxt = which(time, t)
        Yt   = sub_array(Y, idxt)
        Xt   = sub_row_matrix(X, idxt)
        b1 += np.sum(np.power(Yt - matfuncs.dot(Xt, theta[t]), 2))
    b1 = 1/ (b1 /2. + 1/b)
    sigma_iter = 1/np.random.gamma(a1, b1, 1)
    return(sigma_iter[0])
コード例 #6
0
def plot_spline_pure_effect(j):

    xL_ = np.zeros((100, xL.shape[1]))
    xS_ = get_x_range(xS)
    for k in range(xS_.shape[1]):
        if k != j:
            xS_[:,k] = 0
    
    X = get_X(xL_, xS_, p)
    Z = get_Z(xS_, Knots, p)
    
    ax = plt.subplot(projection='3d')
    for t in range(1, np.max(time) + 1):
        yhat1   =  matfuncs.dot(X, thetaH[:,t,:].T)
        yhat2   =  matfuncs.dot(Z, UH.T)

        yhat = yhat1 + yhat2
        yhat = yhat - np.mean(yhat)
        surf = np.mean(yhat, axis =1 )
        ax.plot(xS_[:,j], np.ones(100) * t ,surf, color='b')
        #ax.add_collection3d(pl.fill_between(x, 0.95*z, 1.05*z, color='r', alpha=0.3), zs=t, zdir='y')
        ax.set_ylabel('Decade')
        ax.set_zlabel('Success') 
コード例 #7
0
def gibbs_dlm_1iter(X, Y, time, theta, sigmae, W):
    
    T = np.max(time) + 1
    K = X.shape[1]
    
    #Forward filetring
    at = np.zeros((T,K))
    Rt = np.zeros((T,K,K))
    mt = np.zeros((T,K))
    Ct = np.zeros((T,K,K))
    # Initial state
    at[0] = np.zeros(K)
    Rt[0] = np.diag(np.ones(K))
    mt[0] = np.zeros(K)               # prior over the mean of the thetas
    Ct[0] = 100 * np.diag(np.ones(K)) # prior over the variance of the thetas
    for t in range(1,T):
        idxt = which(time, t)
        Xt   = sub_row_matrix(X, idxt)
        Yt   = sub_array(Y, idxt)
        at[t] = mt[t-1]
        Rt[t] = Ct[t-1] + W
        inv_Rt = np.linalg.inv(Rt[t])
        inv_Ct = inv_Rt + matfuncs.dot(Xt.T, Xt) / sigmae
        Ct[t] = np.linalg.inv(inv_Ct)
        mt[t] = matfuncs.dot(Ct[t], matfuncs.dot(inv_Rt, at[t]) + matfuncs.dot(Xt.T, Yt) / sigmae) 
    
    # Backward smoothing
    ht = np.zeros((T,K))
    Ht = np.zeros((T,K,K))
    theta[T-1] = RanMNormalPrec(mt[int(T-1)], np.linalg.inv(Ct[int(T-1)]))
    for t in range(T-2,-1, -1):
        Ht[t] = np.linalg.inv(np.linalg.inv(Ct[t]) + np.linalg.inv(W))
        ht[t] = matfuncs.dot(Ht[t], matfuncs.dot(np.linalg.inv(Ct[t]), mt[t]) + matfuncs.dot(np.linalg.inv(W), theta[t+1]))
        theta[t] = RanMNormalPrec(ht[t], np.linalg.inv(Ht[t]))
    
    sigmae = full_conditional_sigma(Y, X, theta, T, time)
    
    w = np.zeros(int(K))
    for k in range(K):
        thetat1 = theta[1:,k]
        thetat  = theta[:int(T-1),k]
        w[k] = full_conditional_W(thetat1, thetat)
    W = np.diag(w)
    
    return(theta, sigmae, W)
コード例 #8
0
def DSPsHDP(Iter, y, time, data, max_len_doc, num_docs, V, xL, xS, Knots, p):
    """
    Non prametric Regression
    """
    
    """
    Dynamic part
    """
    maxT = np.max(time) + 1
    
    """
    Spline part
    """
    # We allow for an intercept here
    # this is the linear part of the matrix
    X = get_X(xL, xS, p)
    # this accounts for the fluctuation in the splines
    Z  = get_Z(xS, Knots, p)
    Zt = Z.T
    # we are spliting the estimation in two parts
    # thus we require the number of the lienar part (0 on the diagonal)
    # to be exactly 0
    num_zeros   = 0
    num_splines = xS.shape[1]
    
    """
    Initializing parameters
    """
    ### Dynamic part
    theta  =  np.zeros((maxT,X.shape[1]))
    sigmae = 1
    W      = 1 * np.diag(np.ones(X.shape[1])) 
    ### Spline part
    sigma_u_iter = np.ones(int(num_splines)) * 1000 #np.random.random(num_splines)
    U_iter       = np.random.random(Z.shape[1])
    
    """
    sHDP
    """
    # Priors fixed
    alpha = 0.01
    beta  = 0.50 # Dispersion of topics
    gamma = 0.01
    
    # Initial number of themes K0, maximum number of themes is Kmax
    K0   = 30
    Kmax = 200
    K    = K0
    
    # Init the state of the sample: dynamic/static
    Fixed_K = False
    
    # Lists of used (1) and unused topics (0)
    U1          =  np.zeros(int(Kmax)) # Activated topics
    U1[:int(K)] = 1.
    
    # Init 
    n_m_z, n_z_t, n_z, z_m_n = Init(data, K0, Kmax, V, num_docs, max_len_doc)
    n_m = init_nm(n_m_z)
    Zbar = get_active_Zbar(n_m_z , n_m, U1, K)
    
    # Initialization of dirichlet paramters tau
    tau   = np.ones(int(K)) / np.float(K)
    T,tau = sample_tau(tau, gamma, alpha, n_m_z, K, U1)
    gamma = sample_gamma(T, gamma, int(K))
    alpha = sample_alpha(T, alpha, n_m)
    
    # Regression history    
    Beta_iter          = np.zeros(int(Kmax))
    Beta_iter[int(K):] = 0
    Beta_iter_tmp      = np.zeros(int(K))
    Beta_new           = np.random.normal(0, 100.)
    
    """ History and caching """
    # History for saving the results    
    Hist = np.zeros(int(Iter))
    """
         Start the Markov chain
    """
    for iteration in range(Iter):
        """
        Sample Managerial, Acoustical and Sonor effects
        """
        
        # sample spline
        yhat0   = matfuncs.dot(Zbar, Beta_iter_tmp)
        yhat1   =  matfuncs.dot(X, theta.T)
        y_tild1 = np.zeros(Y.shape) 
        for t in range(Y.shape[0]):  
            y_tild1[t] = Y[t] - yhat1[t, time[t]] - yhat0[t]
           
        U_iter, sigma_u_iter = SP_Reg_iter(num_zeros, num_splines, Knots, Z, Zt, sigmae, sigma_u_iter, y_tild1, theta)
        
        # sample SSM
        yhat2   =  matfuncs.dot(Z, U_iter)
        y_tild2 = Y - yhat2 - yhat0
        theta, sigmae, W = gibbs_dlm_1iter(X, y_tild2, time, theta, sigmae, W)
        
        """
        Themal effects
        """
        # Themes effects before sampling
        BtM = matfuncs.dot(n_m_z, Beta_iter)
        # Keep what is not explained by the acoustics
        yres = np.zeros(Y.shape[0])
        for t in range(Y.shape[0]):
            yres[t] = Y[t] - (yhat1[t, time[t]] + yhat2[t])

        for i in range(len(data)):
            m = int(data[i][0])
            n = int(data[i][1])
            t = int(data[i][2])
            
            # Discount for n-th word t with topic z
            z = z_m_n[m][n]
            n_m_z, n_z_t, n_z, z_m_n = decrement(int(z), n_m_z, n_z_t, n_z, z_m_n, int(m), int(n), int(t))
            
            # Number of words in the document and the BtZ of the document
            Nd = np.int(n_m[m])
            yd = yres[m]
            # Update scalar product
            BtM[m] -= Beta_iter[int(z)]
            BtMd    = BtM[m]
        
            # Tockenrobabilties 
            phdp   = get_prob(U1, n_z_t, n_m_z, n_z, alpha, beta, tau, K, V,int(m), int(t)) 
            active = which(U1, 1.)
            prob   = get_pshdp(phdp, BtMd, K, yd, Beta_iter, Beta_new, sigmae, Nd, active)

            if Fixed_K == True:
                # Do not allow for sampling new topics
                prob[int(K)] = 0.
                
            k_tild = multinomial_rvs(prob)
            K, U1, n_m_z, n_z_t, n_z, z_m_n, tau, Beta_iter, Beta_new, BtM = update_topics(int(Kmax), int(k_tild), int(K), U1, n_m_z, n_z_t, n_z, z_m_n, tau, int(m), int(n), int(t), gamma, alpha, Beta_iter, Beta_new, BtM)
    
        # Check the topics still activated
        del_idx    = which(n_z, 0.)
        if del_idx[0]  >= 0.:
            for k in del_idx:
                U1[int(k)] = 0.
                Beta_iter[int(k)] = 0.
            K = np.sum(U1)
        
        """
        Update HDP priors
        """
        # Update the priors
        T,tau = sample_tau(tau, gamma, alpha, n_m_z, int(K), U1)
        gamma = sample_gamma(T, gamma, int(K))
        alpha = sample_alpha(T, alpha, n_m)
        

        """
        Sample Thematic parameters
        """
        Zbar = get_active_Zbar(n_m_z , n_m, U1, K)
        # The priors are dynamic and adjust to the number of topics that are sampled        
        eta   = np.zeros(int(K))
        inv_C = np.diag(np.ones(int(K))) * 0.01      
        Beta_iter_tmp = full_conditional_Beta(yres, Zbar, sigmae, eta, inv_C)
        # Relate the coefficients to the active topics and update the new topics sample
        active = which(U1, 1.)
        for t, k in enumerate(active):
            Beta_iter[int(k)] = Beta_iter_tmp[int(t)]
        Beta_new = np.random.normal(0, 100.)
        
        """
        Checking convergence and caching the results
        """
        # Once a reasonable number of iterations is reached 
        # we fix the topics and start sampling
        if iteration >= int(0.9 * Iter):
            if Fixed_K == False:
                # Guess which number of topics was sampled the most
                # in the last 90% iterations
                if Iter > 5:
                    # loc the best number of topics as soon as the counter gets back to it
                    best_K = find_max_topic(Hist[:iteration], 0.75)
                    if K == best_K:
                        iter_left = Iter - iteration
                        Fixed_K = True
                        # Update the priors accordingly
                        T,tau = sample_tau(tau, gamma, alpha, n_m_z, int(K), U1)
                        # Updating the hyperpriors
                        gamma = sample_gamma(T, gamma, int(K))
                        alpha = sample_alpha(T, alpha, n_m)
                        
                # Init Gibbs samples history
                SIGMA_E = np.zeros( np.int64(Iter - iteration))
                SIGMA_U = np.zeros((np.int64(Iter - iteration), np.int64(num_splines)))
                #BU      = np.zeros((np.int64(Iter - iteration), np.int64(C.shape[1]))) 
                iter_hist = 0
                # Init phi and theta
                phi   = np.zeros((np.int64(K), np.int64(V)))
                theta_topic = np.zeros((np.int64(num_docs), np.int64(K)))
                # Regression results
                B     = np.zeros((np.int64(Iter - iteration), np.int64(Kmax)))
                # The splines 
                thetaH   = np.zeros((np.int64(Iter - iteration), np.int64(maxT), X.shape[1]))
                WH       = np.zeros((np.int64(Iter - iteration), X.shape[1], X.shape[1]))
                UH       = np.zeros((np.int64(Iter - iteration), Z.shape[1]))

            if Fixed_K == True:                            
                # Fix the theta prior
                fixed_alpha = alpha * tau[:-1]
                # The estimated phi/theta
                active = which(U1, 1.)
                phi   += Compute_phi(K, V, n_z_t, beta, active) / float(iter_left)
                theta_topic += Compute_theta(num_docs, K, n_m_z, active, fixed_alpha) / float(iter_left)
                
                #S     += sigma_iter/ float(iter_left)
                B[int(iter_hist)]          = Beta_iter
                #BU[int(iter_hist)]         = BetaU_iter
                SIGMA_E[int(iter_hist)]    = sigmae
                SIGMA_U[int(iter_hist)]    = sigma_u_iter
                
                # saving the history of the SS plines
                thetaH[int(iter_hist)]   = theta
                #sigmae_H[iteration] = sigmae
                #sigmau_H[iteration] = sigma_u_iter
                WH[int(iter_hist)]       = W
                UH[int(iter_hist)]       = U_iter
                
                iter_hist += 1.
        Hist[iteration] = K
        
    return(Hist, phi, theta_topic, B, active, thetaH, WH, UH, SIGMA_E, SIGMA_U)
コード例 #9
0
def full_conditional_sigma_e(y, X, Z, b, u):
    ae, be = 1., 1.
    a1           = ae + len(y) * 0.5
    b1           = 1/(be + 0.5 * np.sum(np.square(y - matfuncs.dot(X, b) - matfuncs.dot(Z, u))))
    sigma_e_iter = 1/np.random.gamma(a1, b1, 1)
    return(sigma_e_iter) 
コード例 #10
0
def RanMNormalPrec(mub, inv_Vb):
    c      = np.linalg.cholesky(inv_Vb)
    rv     = np.random.normal(loc= 0, scale= 1, size= len(mub))
    draw   = mub + matfuncs.dot(np.linalg.inv(c.T), rv)
    return(draw)
コード例 #11
0
        x1 = np.random.uniform(0, 5, 1)[:,None]
        x2 = np.random.uniform(0, 5, 1)[:,None]
        # Non linear part
        x3 = np.random.uniform(0, 5, 1)[:,None]
        x4 = np.random.uniform(0, 5, 1)[:,None]
        # error term
        e  = np.random.normal(0, sigmae, 1)[:,None]
        # dependent variable
        
        topic_idx = np.random.choice(3,1)[0]
        data_text.append(rawdocs[topic_idx])
        
        #y  =  x1 + x2 + 3 * x3 *np.cos((x3 - 3)*5) + 3*x4*np.cos(x4*5) + e
        #y  = y * np.power(-1, t)
        x = np.concatenate((x1, x2, x3, x4), axis =1)
        y = ytopics[topic_idx] + matfuncs.dot(x,true_theta[t]) + np.random.normal(0, sigmae, 1)[0]
        
        # The linear effects
        xl = np.concatenate((x1, x2), axis =1)
        # The non linear effects
        xs = np.concatenate((x3, x4), axis =1)
        # Saving the variables
        xL.extend(xl)
        xS.extend(xs)
        X.extend(x)
        Y.extend(y)
        time.append(t)

#X = np.matrix(X)
xL = np.matrix(xL)
xS = np.matrix(xS)