Exemple #1
0
def get_r_and_ll_old2(X, theta, pimix):
    # Combined funciton to obtain the loglikelihood and r in one step
    mus = theta[0]
    kappas = theta[1]
    N, D = X.shape
    D, K = mus.shape
    r_log = np.zeros((N, K))

    # Truco del almendruco
    cp_logs = []
    for k in range(K):
        cp_logs.append(Wad.get_cp_log(D, kappas[:, k]))

    ll = 0
    for i in range(N):  # For every sample
        # For every  component
        k_component_pdf = Wad.Watson_K_pdf_log(X[[i], :].T,
                                               mus[:, :],
                                               kappas[:, :],
                                               cps_log=cp_logs)
        r_log[i, :] = np.log(pimix[:, :]) + k_component_pdf

        ll += gf.sum_logs(r_log[i, :])
        # Normalize the probability of the sample being generated by the clusters
        Marginal_xi_probability = gf.sum_logs(r_log[i, :])
        r_log[i, :] = r_log[i, :] - Marginal_xi_probability
    return r_log, ll
Exemple #2
0
def get_r_and_ll(X, distribution, theta, pimix):
    # Combined funciton to obtain the loglikelihood and r in one step

    N, D = X.shape
    K = len(theta)
    r_log = np.zeros((N, K))

    # We can precompute the normalization constants if we have the functionality !
    if (type(distribution.get_Cs_log) != type(None)):
        Cs_log = []
        for k in range(K):
            Cs_log.append(distribution.get_Cs_log(theta[k]))
    else:
        Cs_log = None

    ll = 0

    # Compute the pdf for all samples and all clusters
    k_component_pdf = distribution.pdf_log_K(X.T, theta, Cs_log=Cs_log)
    r_log[:, :] = np.log(pimix[:, :]) + k_component_pdf

    for i in range(N):  # For every sample
        ll += gf.sum_logs(
            r_log[i, :]
        )  # Marginalize clusters and product of samples probabilities!!
        # Normalize the probability of the sample being generated by the clusters
        Marginal_xi_probability = gf.sum_logs(r_log[i, :])
        r_log[i, :] = r_log[i, :] - Marginal_xi_probability
    return r_log, ll
Exemple #3
0
def get_loglikelihood(data ,distribution,theta, model_theta,alpha = None):

    N = len(data)
    pi, A = model_theta
    I = pi.size
    # Check if we have been given alpha so we do not compute it
    if (type(alpha) == type(None)):
        cp_logs = distribution.get_Cs_log(theta)
        loglike = distribution.pdf_log_K(data, theta, Cs_logs = cp_logs)
        alpha = get_alfa_matrix_log(data, pi, A,theta,distribution, loglike = loglike)
#        print len(alpha),N
    new_ll = 0

    for n in range(N):    # For every HMM sequence
        ## Generate probablilities of being at the state qt = j in the last point
        # of the chainfs  
        Nsam, Nd = data[n].shape
#        pi_end = get_final_probabilities(pi,A,Nsam)
        all_val = []
#        print pi_end.shape
#        print np.log(pi_end)
#        print I

        for i in range(I):
            all_val.append(alpha[n][i,-1]) # + np.log(pi_end)[0,i]  + np.log(pi_end)[0,i]
#            print np.log(pi_end)[0,i]
#        print all_val
        new_ll = new_ll +  gf.sum_logs(all_val)#  sum_logs(alpha[n][:,-1] + np.log(pi_end).T);
        
    return new_ll
Exemple #4
0
def get_responsabilityMatrix_log2(X, theta, pimix):
    mus = theta[0]
    kappas = theta[1]
    N, D = X.shape
    D, K = mus.shape
    r_log = np.zeros((N, K))

    # Truco del almendruco
    cp_logs = []
    for k in range(K):
        cp_logs.append(Wad.get_cp_log(D, kappas[:, k]))

    def get_custersval(k):
        return Wad.Watson_pdf_log(
            X[i, :], mus[:, k], kappas[:, k], cp_log=cp_logs[k]) + np.log(
                pimix[:, k])

    krange = range(K)
    for i in range(N):  # For every sample
        # For every  component
        r_log[i, :] = np.array(map(get_custersval, krange)).flatten()
        # Normalize the probability of the sample being generated by the clusters
        Marginal_xi_probability = gf.sum_logs(r_log[i, :])
        r_log[i, :] = r_log[i, :] - Marginal_xi_probability

    return r_log
Exemple #5
0
def get_responsabilityMatrix_log(X, theta, pimix):
    mus = theta[0]
    kappas = theta[1]
    N, D = X.shape
    D, K = mus.shape
    r_log = np.zeros((N, K))

    # Truco del almendruco
    cp_logs = []
    for k in range(K):
        cp_logs.append(Wad.get_cp_log(D, kappas[:, k]))

    r_log = np.zeros((N, K))

    # For every  component
    k_component_pdf = Wad.Watson_K_pdf_log(X[:, :].T,
                                           mus[:, :],
                                           kappas[:, :],
                                           cps_log=cp_logs)
    r_log = k_component_pdf + np.log(pimix[:, :])

    for i in range(N):  # For every sample
        # Normalize the probability of the sample being generated by the clusters
        Marginal_xi_probability = gf.sum_logs(r_log[i, :])
        r_log[i, :] = r_log[i, :] - Marginal_xi_probability

    return r_log
Exemple #6
0
def get_r_and_ll(data,distribution,theta, model_theta, loglike = None):
    # Combined funciton to obtain the loglikelihood and r in one step
    
    X = preprocess_data(data)
    N, D = X.shape
    K = len(theta)
    pimix = model_theta[0]
                        
     # Compute the pdf for all samples and all clusters
    if (type(loglike) == type(None)):
        loglike = get_samples_loglikelihood(X,theta, distribution)
    else:
        loglike = loglike
    
    
    r_log= np.log(pimix) + loglike
    samples_ll =  gf.sum_logs(r_log, byRow = True)
    ll = np.sum(samples_ll)
    

#    if(0):
#        for i in range(N):  # For every sample
#        #TODO: Can this not be done without a for ?
#            ll += gf.sum_logs(r_log[i,:])  # Marginalize clusters and product of samples probabilities!!
#            # Normalize the probability of the sample being generated by the clusters
#            Marginal_xi_probability = gf.sum_logs(r_log[i,:])
#            r_log[i,:] = r_log[i,:]- Marginal_xi_probability
#    else:

#        print samples_ll.shape

    r =  get_responsibilities(X,distribution,theta, model_theta, loglike = loglike)
    ## COMPUTE Responsibilities !!

    return r, ll
Exemple #7
0
def get_EM_Incomloglike_log(X, distribution, theta, pimix):

    N, D = X.shape
    K = len(theta)
    r_log = np.zeros((N, K))

    # We can precompute the normalization constants if we have the functionality !
    if (type(distribution.get_Cs_log) != type(None)):
        Cs_log = []
        for k in range(K):
            Cs_log.append(distribution.get_Cs_log(theta[k]))
    else:
        Cs_log = None

    ll = 0

    # For every  component
    k_component_pdf = Wad.Watson_K_pdf_log(X.T, theta, Cs_log=Cs_log)
    r_log = np.log(pimix[:, :]) + k_component_pdf

    ll = 0
    for i in range(N):  # For every sample
        #        print "r_log"
        #        print r_log[i,:]
        ll += gf.sum_logs(
            r_log[i, :]
        )  # Marginalize clusters and product of samples probabilities!!
        # Normalize the probability of the sample being generated by the clusters
#        Marginal_xi_probability = gf.sum_logs(r_log[i,:])
#        r_log[i,:] = r_log[i,:]- Marginal_xi_probability
    return ll
Exemple #8
0
def get_responsibilities(X,distribution,theta, model_theta, loglike = None):
    X = preprocess_data(X)
    N, D = X.shape
    K = len(theta)
    pimix = model_theta[0]
    
    if (type(loglike) == type(None)):
        loglike = get_samples_loglikelihood(X,theta, distribution)
    else:
        loglike = loglike
        
    
    
#    loglike[:,0] =  loglike[:,0] - np.log(5)
    
    r_log = np.log(pimix) + loglike
    
    samples_ll =  gf.sum_logs(r_log, byRow = True)
    r_log = r_log - samples_ll
    
    ## Turn into hard decision
    if (0):
        rmax = np.argmax(r_log, axis = 1)
        N,K = r_log.shape
        hard_r = np.zeros(r_log.shape)
        for i in range(N):
            hard_r[i,rmax[i]] = 1
        hard_r = hard_r + 1e-200
        r_log = np.log(hard_r)
    
    
    r = np.exp(r_log)
    return r
Exemple #9
0
def  get_beta_matrix_log( data, pi, A,theta, distribution, loglike = None):
    I = A.shape[0]
    N = len(data)
    D = data[0].shape[1]
    T = [len(x) for x in data]
    
    beta = [];
    
    # Calculate the last sample
    for n in range(N): # For every chain
        beta.append(np.zeros((I,T[n])));
        Nsam, Nd = data[n].shape
#        pi_end = get_final_probabilities(pi,A,Nsam)
        
        for i in range(I):
            beta[n][i,-1] = 0 # np.log( pi_end[0,i])
#        print beta[n][:,-1]
#            beta[n][i,-1] = np.log( pi_end[0,i]) + Wad.Watson_pdf_log(data[n][-1,:], B[0][:,i], B[1][:,i], cp_log = cp_logs[i]);

#            aux_vec = []
#            for j in range(J):
#                aux_vec.append(pi_end[:,i])
#                np.log( pi_end[:,i]) + Wad.Watson_pdf_log(data[n][0,:], B[0][:,i], B[1][:,i], cp_log = cp_logs[i]);

    # Calculate the rest of the betas recursively
    for n in range(N):     # For every chain
        for t in range(T[n]-2,-1,-1):  # For every time instant backwards
            aux_vec = np.log(A[:,:]) +  beta[n][:,[t+1]].T + \
            loglike[n][[t+1], :]
            #distribution.pdf_log_K(data[n][[t+1],:].T, theta ,cp_logs)
            beta[n][:,[t]] = gf.sum_logs(aux_vec, byRow = True)
                
    return beta
Exemple #10
0
def get_alpha_responsibilities(data, distribution, theta, model_theta,loglike = None):
    N = len(data)
    pi = model_theta[0]
    A = model_theta[1]
    
    if type(loglike) != type(None):
        loglike = loglike
    else:
        cp_logs = distribution.get_Cs_log(theta)
        loglike = get_samples_loglikelihood(data, theta,distribution , Cs_logs = cp_logs)
        
    alpha = get_alfa_matrix_log(data, pi, A,theta,distribution, loglike = loglike)
    gamma = alpha

    T = [len(x) for x in data]
    for n in range(N):
        for t in range(T[n]):
            #Normalize to get the actual gamma
            gamma[n][:,t] = gamma[n][:,t] - gf.sum_logs(gamma[n][:,t]);  
    
    ## Reconvert to natural units
    for n in range(N):
        gamma[n] = np.exp(gamma[n])
        gamma[n]  =  gamma[n].T
    
    return gamma
def kummer_own_log(a,b,x):
    # Default tolerance is tol = 1e-10.  Feel free to change this as needed.
    print ("$$$$$$$$$$$$$  Needed to use own Kummer func $$$$$$$$$$$$$$$$$$$$")
    tol = 1e-10;
    log_tol = np.log(tol)
    # Estimates the value by summing powers of the generalized hypergeometric
    # series:
    #      sum(n=0-->Inf)[(a)_n*x^n/{(b)_n*n!}
    # until the specified tolerance is acheived.
    
    log_term = np.log(x) + np.log(a) - np.log(b)
#    print a,b,x
#    f_log =  HMMl.sum_logs([0, log_term])
    
    n = 1;
    an = a;
    bn = b;
    nmin = 5;
    
    terms_list = []
    
    terms_list.extend([0,log_term])
    d = 0
    while((n < nmin) or (log_term > log_tol)):
      # We increase the n in 10 by 10 reduce overheading of  while
      n = n + d;
#      print "puto n %i"%(n)
#      print f_log
      an = an + d;
      bn = bn + d;
      
      d = 1
#      term = (x*term*an)/(bn*n);
      log_term1 = np.log(x) + log_term  + np.log(an+d) - np.log(bn+d) - np.log(n+d)
      d += 1
      log_term2 = np.log(x) + log_term1  + np.log(an+d) - np.log(bn+d) - np.log(n+d)
      d += 1
      log_term3 = np.log(x) + log_term2  + np.log(an+d) - np.log(bn+d) - np.log(n+d)
      d += 1
      log_term4 = np.log(x) + log_term3  + np.log(an+d) - np.log(bn+d) - np.log(n+d)
      d += 1
      log_term = np.log(x) + log_term4  + np.log(an+d) - np.log(bn+d) - np.log(n+d)
  
      terms_list.extend([log_term1,log_term2,log_term3,log_term4,log_term] )
      
      if(n > 10000):  # We f****d up
#        print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$4"
#        print " Not converged "
#        print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$4"
        # If we could not compute it, we raise an error...
        raise RuntimeError('Kummer function not converged after 10000 iterations. Kappa = %f', "Kummer_is_inf",x)
    f_log = gf.sum_logs(terms_list);
#    print "f_log success %f " % f_log
#    print "-----------------------------------------"
#    print n
#    print "-----------------------------------------"
    return f_log
Exemple #12
0
def get_EM_Incomloglike_byCluster_log(theta, pimix, X):
    # Gets the incomloglikelihood by clusters of all the samples

    mus = theta[0]
    kappas = theta[1]
    N = X.shape[0]  # Number of IDD samples
    D = X.shape[1]  # Number of dimesions of samples
    K = kappas.size  # Number of clusters
    # Calculate log-likelihood
    # Truco del almendruco
    cp_logs = []
    for k in range(K):
        cp_logs.append(Wad.get_cp_log(D, kappas[:, k]))

    k_component_pdf = Wad.Watson_K_pdf_log(X[:, :].T,
                                           mus[:, :],
                                           kappas[:, :],
                                           cps_log=cp_logs)
    r_log = np.log(pimix[:, :]) + k_component_pdf

    # Normalize probabilities first ?
    for i in range(N):  # For every sample
        Marginal_xi_probability = gf.sum_logs(r_log[i, :])
        r_log[i, :] = r_log[i, :] - Marginal_xi_probability

    clusters = np.argmax(r_log, axis=1)  # Implemented already
    #    print clusters
    pi_estimated = []

    for i in range(K):
        pi_i = np.where(clusters == i)[0].size
        pi_estimated.append(np.array(pi_i) / float(N))


#    For ll in logll
#
#        Logglikelihoods[clusters[i]] += ll

#    r_log = np.exp(r_log)
    ll = np.sum(r_log, axis=0).reshape(1, K)

    # Mmm filter ?
    #    ll = []
    #    for i in range (K):
    #        ll_i = np.sum(r_log[np.where(clusters == i)[0],i])
    #        ll.append(ll_i)
    #    ll = np.array(ll).reshape(1,K)
    #    pi_estimated = np.array(pi_estimated).reshape(1,K)
    #    ll = np.exp(ll)
    #    print pi_estimated
    return ll  #ll
Exemple #13
0
def get_gamma_matrix_log( alpha,beta ):
    I = alpha[0].shape[0]
    N = len(alpha)
    T = [x.shape[1] for x in alpha]

    gamma = []
    
    for n in range(N):
        gamma.append(np.zeros((I,T[n])))
        for t in range (0, T[n]):
            gamma[n][:,t] = alpha[n][:,t] + beta[n][:,t];
    
    for n in range(N):
        for t in range(T[n]):
            #Normalize to get the actual gamma
            gamma[n][:,t] = gamma[n][:,t] - gf.sum_logs(gamma[n][:,t]);  

    return gamma
Exemple #14
0
def get_alfa_matrix_log(data, pi, A,theta,distribution, loglike = None):
    I = A.shape[0]
    N = len(data)
    D = data[0].shape[1]
    T = [len(x) for x in data]
    
    alfa = []
#    print(I,N,D,T)
    # Calculate first sample
    
    if type(loglike) != type(None):
        loglike = loglike
    else:
        cp_logs = distribution.get_Cs_log(theta)
        loglike = get_samples_loglikelihood(data, theta,distribution , Cs_logs = cp_logs)
        
    for n in range(N): # For every chain
        alfa.append(np.zeros((I,T[n])));
        for i in range(I):  # For every state
            alfa[n][i,0] = np.log(pi[:,i]) + loglike[n][[0], i]  # distribution.pdf_log_K(data[n][[0],:].T,theta,  [cp_logs[i]]);  # Maybe need to transpose

    # Calculate the rest of the alfas recursively
    for n in range(N):          # For every chain
        for t in range(1, T[n]):           # For every time instant
            aux_vec = np.log(A[:,:]) + alfa[n][:,[t-1]]
            alfa[n][:,[t]] = gf.sum_logs(aux_vec.T,  byRow = True)
#            print sum_logs(aux_vec.T,  byRow = True).shape
#            print alfa[n][:,[t]].shape
            alfa[n][:,[t]] +=  loglike[n][[t], :].T  # distribution.pdf_log_K(data[n][[t],:].T,theta, cp_logs).T
#                print np.log(Wad.Watson_pdf(data[n][t,:], B[0][:,i], B[1][:,i]))
#                print    np.log(Wad.Watson_pdf(data[n][t,:], B[0][:,i], B[1][:,i]))# alfa[i,n,t] 

#            for i in range(I):      # For every state
#                aux_vec = np.log(A[:,[i]]) + alfa[n][:,[t-1]]
#                alfa[n][i,t] = sum_logs(aux_vec)
#                alfa[n][i,t] =  Wad.Watson_pdf_log(data[n][[t],:].T, B[0][:,i], B[1][:,i], cp_log = cp_logs[i]) + alfa[n][i,t] ;
                
    return alfa
Exemple #15
0
def  get_fi_matrix_log( data, A, theta, alpha,beta, distribution, loglike = None):
    I = A.shape[0]
    N = len(data)
    D = data[0].shape[1]
    T = [len(x) for x in data]

    fi = []
    
    for n in range(N):
        fi.append(np.zeros((I,I,T[n]-1)))

        zurullo = np.log(A[:,:])
        zurullo = zurullo.reshape(zurullo.shape[0],zurullo.shape[1],1)
        zurullo = np.repeat(zurullo,T[n]-1,axis = 2)
        
        mierda1 = beta[n][:,1:] 
        mierda1 = mierda1.reshape(1,mierda1.shape[0],mierda1.shape[1])
        mierda1 = np.repeat(mierda1,I,axis = 0)
        
        mierda2 = alpha[n][:,:-1] 
        mierda2 = mierda2.reshape(mierda2.shape[0],1,mierda2.shape[1])
        mierda2 = np.repeat(mierda2,I,axis = 1)
        
        # caca = distribution.pdf_log_K(data[n][1:,:].T, theta, cp_logs).T
        caca = loglike[n][1:,:].T
        caca = caca.reshape(1,caca.shape[0],caca.shape[1])
        caca = np.repeat(caca,I,axis = 0)
        
        fi[n][:,:,:] = zurullo + caca + mierda1 + mierda2
        
    for n in range(N):
        if(1):
            for t in range (0, T[n]-1):
                # Normalize to get the actual fi
                fi[n][:,:,t] = fi[n][:,:,t] - gf.sum_logs(fi[n][:,:,t]);  

    return fi
Exemple #16
0
def get_loglikelihood(data,distribution,theta, model_theta,loglike = None):
    # Combined funciton to obtain the loglikelihood and r in one step
    # The shape of X is (N,D)
    
    X = preprocess_data(data)
    N, D = X.shape
    K = len(theta)
    r_log = np.zeros((N,K))
    
    pimix = model_theta[0]
    ll = 0
#    print pimix
     # Compute the pdf for all samples and all clusters
    if (type(loglike) == type(None)):
         loglike = get_samples_loglikelihood(X,theta, distribution)
    else:
        loglike = loglike
        
    r_log[:,:] = np.log(pimix[:,:]) + loglike
    samples_ll =  gf.sum_logs(r_log[:,:], byRow = True)
#        print samples_ll.shape
    ll = np.sum(samples_ll)
        
    return ll
Exemple #17
0
def generate_images_iterations_ll(Xs, mus, covs, Ks, myDManager, logl,
                                  theta_list, model_theta_list,
                                  folder_images_gif):
    #    os.remove(folder_images_gif) # Remove previous images if existing
    """
    WARNING: MEANT FOR ONLY 3 Distributions due to the color RGB
    """
    import shutil
    ul.create_folder_if_needed(folder_images_gif)
    shutil.rmtree(folder_images_gif)
    ul.create_folder_if_needed(folder_images_gif)
    ######## Plot the original data #####

    Xdata = np.concatenate(Xs, axis=1).T
    colors = ["r", "b", "g"]
    K_G, K_W, K_vMF = Ks

    ### FOR EACH ITERATION
    for i in range(len(theta_list)):  # theta_list
        indx = i
        gl.init_figure()
        ax1 = gl.subplot2grid((1, 2), (0, 0), rowspan=1, colspan=1)

        ## Get the relative ll of the Gaussian denoising cluster.
        ll = myDManager.pdf_log_K(Xdata, theta_list[indx])
        N, K = ll.shape
        #        print ll.shape
        for j in range(N):  # For every sample
            #TODO: Can this not be done without a for ?
            # Normalize the probability of the sample being generated by the clusters
            Marginal_xi_probability = gf.sum_logs(ll[j, :])
            ll[j, :] = ll[j, :] - Marginal_xi_probability

            ax1 = gl.scatter(
                Xdata[j, 0],
                Xdata[j, 1],
                labels=[
                    'EM Evolution. Kg:' + str(K_G) + ', Kw:' + str(K_W) +
                    ', K_vMF:' + str(K_vMF), "X1", "X2"
                ],
                color=(np.exp(ll[j, 1]), np.exp(ll[j, 0]),
                       np.exp(ll[j, 2])),  ###  np.exp(ll[j,2])
                alpha=1,
                nf=0)

        # Only doable if the clusters dont die
        for k_c in myDManager.clusterk_to_Dname.keys():
            k = myDManager.clusterk_to_thetak[k_c]
            distribution_name = myDManager.clusterk_to_Dname[k_c]  # G W

            if (distribution_name == "Gaussian"):
                ## Plot the ecolution of the mu
                #### Plot the Covariance of the clusters !
                mean, w, h, theta = bMA.get_gaussian_ellipse_params(
                    mu=theta_list[indx][k][0],
                    Sigma=theta_list[indx][k][1],
                    Chi2val=2.4477)
                r_ellipse = bMA.get_ellipse_points(mean, w, h, theta)
                gl.plot(r_ellipse[:, 0],
                        r_ellipse[:, 1],
                        ax=ax1,
                        ls="-.",
                        lw=3,
                        AxesStyle="Normal2",
                        legend=[
                            "Kg(%i). pi:%0.2f" %
                            (k, float(model_theta_list[indx][0][0, k]))
                        ])

            elif (distribution_name == "Watson"):
                #### Plot the pdf of the distributino !
                ## Distribution parameters for Watson
                kappa = float(theta_list[indx][k][1])
                mu = theta_list[-1][k][0]
                Nsa = 1000
                # Draw 2D samples as transformation of the angle
                Xalpha = np.linspace(0, 2 * np.pi, Nsa)
                Xgrid = np.array([np.cos(Xalpha), np.sin(Xalpha)])

                probs = []  # Vector with probabilities
                for i in range(Nsa):
                    probs.append(
                        np.exp(Wad.Watson_pdf_log(Xgrid[:, i], [mu, kappa])))

                probs = np.array(probs)
                # Plot it in polar coordinates
                X1_w = (1 + probs) * np.cos(Xalpha)
                X2_w = (1 + probs) * np.sin(Xalpha)

                gl.plot(X1_w,
                        X2_w,
                        alpha=1,
                        lw=3,
                        ls="-.",
                        legend=[
                            "Kw(%i). pi:%0.2f" %
                            (k, float(model_theta_list[indx][0][0, k]))
                        ])

            elif (distribution_name == "vonMisesFisher"):
                #### Plot the pdf of the distributino !
                ## Distribution parameters for Watson
                kappa = float(theta_list[indx][k][1])
                mu = theta_list[indx][k][0]
                Nsa = 1000
                # Draw 2D samples as transformation of the angle
                Xalpha = np.linspace(0, 2 * np.pi, Nsa)
                Xgrid = np.array([np.cos(Xalpha), np.sin(Xalpha)])

                probs = []  # Vector with probabilities
                for i in range(Nsa):
                    probs.append(
                        np.exp(
                            vMFd.vonMisesFisher_pdf_log(
                                Xgrid[:, i], [mu, kappa])))

                probs = np.array(probs)
                probs = probs.reshape((probs.size, 1)).T
                # Plot it in polar coordinates
                X1_w = (1 + probs) * np.cos(Xalpha)
                X2_w = (1 + probs) * np.sin(Xalpha)

                #            print X1_w.shape, X2_w.shape
                gl.plot(X1_w,
                        X2_w,
                        alpha=1,
                        lw=3,
                        ls="-.",
                        legend=[
                            "Kvmf(%i). pi:%0.2f" %
                            (k, float(model_theta_list[indx][0][0, k]))
                        ])

        gl.set_zoom(xlim=[-6, 6], ylim=[-6, 6], ax=ax1)
        ax2 = gl.subplot2grid((1, 2), (0, 1), rowspan=1, colspan=1)
        if (indx == 0):
            gl.add_text(positionXY=[0.1, .5],
                        text=r' Initilization Incomplete LogLike: %.2f' %
                        (logl[0]),
                        fontsize=15)
            pass
        elif (indx >= 1):

            gl.plot(
                range(1,
                      np.array(logl).flatten()[1:].size + 1),
                np.array(logl).flatten()[1:(indx + 1)],
                ax=ax2,
                legend=["Iteration %i, Incom LL: %.2f" % (indx, logl[indx])],
                labels=[
                    "Convergence of LL with generated data", "Iterations", "LL"
                ],
                lw=2)
            gl.scatter(1, logl[1], lw=2)
            pt = 0.05
            gl.set_zoom(xlim=[0, len(logl)],
                        ylim=[
                            logl[1] - (logl[-1] - logl[1]) * pt,
                            logl[-1] + (logl[-1] - logl[1]) * pt
                        ],
                        ax=ax2)

        gl.subplots_adjust(left=.09,
                           bottom=.10,
                           right=.90,
                           top=.95,
                           wspace=.2,
                           hspace=0.01)

        gl.savefig(folder_images_gif + 'gif_' + str(indx) + '.png',
                   dpi=100,
                   sizeInches=[16, 8],
                   close="yes",
                   bbox_inches=None)

        gl.close("all")
def generate_images_iterations_ll(Xs,mus,covs, Ks ,myDManager, logl,theta_list,model_theta_list,folder_images_gif):
#    os.remove(folder_images_gif) # Remove previous images if existing
    """
    WARNING: MEANT FOR ONLY 3 Distributions due to the color RGB
    """
    import shutil
    ul.create_folder_if_needed(folder_images_gif)
    shutil.rmtree(folder_images_gif)
    ul.create_folder_if_needed(folder_images_gif)
    ######## Plot the original data #####

    Xdata = np.concatenate(Xs,axis = 1).T
    colors = ["r","b","g"]
    K_G,K_W,K_vMF = Ks
    
    ### FOR EACH ITERATION 
    for i in range(len(theta_list)):  # theta_list
        indx = i
        gl.init_figure()
        ax1 = gl.subplot2grid((1,2), (0,0), rowspan=1, colspan=1)
        
        ## Get the relative ll of the Gaussian denoising cluster.
        ll = myDManager.pdf_log_K(Xdata,theta_list[indx])
        N,K = ll.shape
#        print ll.shape
        for j in range(N):  # For every sample
        #TODO: Can this not be done without a for ?
            # Normalize the probability of the sample being generated by the clusters
            Marginal_xi_probability = gf.sum_logs(ll[j,:])
            ll[j,:] = ll[j,:]- Marginal_xi_probability
        
            ax1 = gl.scatter(Xdata[j,0],Xdata[j,1], labels = ['EM Evolution. Kg:'+str(K_G)+ ', Kw:' + str(K_W) + ', K_vMF:' + str(K_vMF), "X1","X2"], 
                              color = (np.exp(ll[j,1]), np.exp(ll[j,0]), np.exp(ll[j,2])) ,  ###  np.exp(ll[j,2])
                              alpha = 1, nf = 0)
            
        # Only doable if the clusters dont die
        for k_c in myDManager.clusterk_to_Dname.keys():
            k = myDManager.clusterk_to_thetak[k_c]
            distribution_name = myDManager.clusterk_to_Dname[k_c] # G W
            
            if (distribution_name == "Gaussian"):
                ## Plot the ecolution of the mu
                #### Plot the Covariance of the clusters !
                mean,w,h,theta = bMA.get_gaussian_ellipse_params( mu = theta_list[indx][k][0], Sigma = theta_list[indx][k][1], Chi2val = 2.4477)
                r_ellipse = bMA.get_ellipse_points(mean,w,h,theta)
                gl.plot(r_ellipse[:,0], r_ellipse[:,1], ax = ax1, ls = "-.", lw = 3,
                        AxesStyle = "Normal2",
                       legend = ["Kg(%i). pi:%0.2f"%(k,  float(model_theta_list[indx][0][0,k]))]) 
            
            elif(distribution_name == "Watson"):
                #### Plot the pdf of the distributino !
                ## Distribution parameters for Watson
                kappa = float(theta_list[indx][k][1]);  mu = theta_list[-1][k][0]
                Nsa = 1000
                # Draw 2D samples as transformation of the angle
                Xalpha = np.linspace(0, 2*np.pi, Nsa)
                Xgrid= np.array([np.cos(Xalpha), np.sin(Xalpha)])
                
                probs = []  # Vector with probabilities
                for i in range(Nsa):
                    probs.append(np.exp(Wad.Watson_pdf_log(Xgrid[:,i],[mu,kappa]) ))
                
                probs = np.array(probs)
                # Plot it in polar coordinates
                X1_w = (1 + probs) * np.cos(Xalpha)
                X2_w = (1 + probs) * np.sin(Xalpha)
                
                gl.plot(X1_w,X2_w, 
                     alpha = 1, lw = 3, ls = "-.", legend = ["Kw(%i). pi:%0.2f"%(k,  float(model_theta_list[indx][0][0,k]))]) 
                
            elif(distribution_name == "vonMisesFisher"):
                #### Plot the pdf of the distributino !
                ## Distribution parameters for Watson
                kappa = float(theta_list[indx][k][1]); mu = theta_list[indx][k][0]
                Nsa = 1000
                # Draw 2D samples as transformation of the angle
                Xalpha = np.linspace(0, 2*np.pi, Nsa)
                Xgrid= np.array([np.cos(Xalpha), np.sin(Xalpha)])
                
                probs = []  # Vector with probabilities
                for i in range(Nsa):
                    probs.append(np.exp(vMFd.vonMisesFisher_pdf_log(Xgrid[:,i],[mu,kappa]) ))
                    
                probs = np.array(probs)
                probs = probs.reshape((probs.size,1)).T
                # Plot it in polar coordinates
                X1_w = (1 + probs) * np.cos(Xalpha)
                X2_w = (1 + probs) * np.sin(Xalpha)
                
    #            print X1_w.shape, X2_w.shape
                gl.plot(X1_w,X2_w, 
                     alpha = 1, lw = 3, ls = "-.", legend = ["Kvmf(%i). pi:%0.2f"%(k,  float(model_theta_list[indx][0][0,k]))]) 
                
            
        gl.set_zoom(xlim = [-6,6], ylim = [-6,6], ax = ax1)     
        ax2 = gl.subplot2grid((1,2), (0,1), rowspan=1, colspan=1)
        if (indx == 0):
            gl.add_text(positionXY = [0.1,.5], text = r' Initilization Incomplete LogLike: %.2f'%(logl[0]),fontsize = 15)
            pass
        elif (indx >= 1):
           
            gl.plot(range(1,np.array(logl).flatten()[1:].size +1),np.array(logl).flatten()[1:(indx+1)], ax = ax2, 
                    legend = ["Iteration %i, Incom LL: %.2f"%(indx, logl[indx])], labels = ["Convergence of LL with generated data","Iterations","LL"], lw = 2)
            gl.scatter(1, logl[1], lw = 2)
            pt = 0.05
            gl.set_zoom(xlim = [0,len(logl)], ylim = [logl[1] - (logl[-1]-logl[1])*pt,logl[-1] + (logl[-1]-logl[1])*pt], ax = ax2)
            
        gl.subplots_adjust(left=.09, bottom=.10, right=.90, top=.95, wspace=.2, hspace=0.01)
        
        gl.savefig(folder_images_gif +'gif_'+ str(indx) + '.png', 
               dpi = 100, sizeInches = [16, 8], close = "yes",bbox_inches = None)
        
        gl.close("all")
Exemple #19
0
def kummer_own_log(a, b, x):
    # Default tolerance is tol = 1e-10.  Feel free to change this as needed.
    print("$$$$$$$$$$$$$  Needed to use own Kummer func $$$$$$$$$$$$$$$$$$$$")
    tol = 1e-10
    log_tol = np.log(tol)
    # Estimates the value by summing powers of the generalized hypergeometric
    # series:
    #      sum(n=0-->Inf)[(a)_n*x^n/{(b)_n*n!}
    # until the specified tolerance is acheived.

    log_term = np.log(x) + np.log(a) - np.log(b)
    #    print a,b,x
    #    f_log =  HMMl.sum_logs([0, log_term])

    n = 1
    an = a
    bn = b
    nmin = 5

    terms_list = []

    terms_list.extend([0, log_term])
    d = 0
    while ((n < nmin) or (log_term > log_tol)):
        # We increase the n in 10 by 10 reduce overheading of  while
        n = n + d
        #      print "puto n %i"%(n)
        #      print f_log
        an = an + d
        bn = bn + d

        d = 1
        #      term = (x*term*an)/(bn*n);
        log_term1 = np.log(x) + log_term + np.log(an + d) - np.log(
            bn + d) - np.log(n + d)
        d += 1
        log_term2 = np.log(x) + log_term1 + np.log(an + d) - np.log(
            bn + d) - np.log(n + d)
        d += 1
        log_term3 = np.log(x) + log_term2 + np.log(an + d) - np.log(
            bn + d) - np.log(n + d)
        d += 1
        log_term4 = np.log(x) + log_term3 + np.log(an + d) - np.log(
            bn + d) - np.log(n + d)
        d += 1
        log_term = np.log(x) + log_term4 + np.log(an + d) - np.log(
            bn + d) - np.log(n + d)

        terms_list.extend(
            [log_term1, log_term2, log_term3, log_term4, log_term])

        if (n > 10000):  # We f****d up
            #        print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$4"
            #        print " Not converged "
            #        print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$4"
            # If we could not compute it, we raise an error...
            raise RuntimeError(
                'Kummer function not converged after 10000 iterations. Kappa = %f',
                "Kummer_is_inf", x)
    f_log = gf.sum_logs(terms_list)
    #    print "f_log success %f " % f_log
    #    print "-----------------------------------------"
    #    print n
    #    print "-----------------------------------------"
    return f_log