data = np.zeros((D, model.V),dtype=int) for d in xrange(D): doc = model.generate(N=N, keep=True) data[d,:] = doc.w # def test_geweke_lda(): if __name__ == "__main__": N_iter = 50000 T = 3 # Number of topics D = 10 # Number of documents V = 10 # Number of words N = 20 # Number of words per document alpha_beta = 1.0 # Sample a GP model = StickbreakingCorrelatedLDA(T, V, alpha_beta=alpha_beta) # Run a Geweke test thetas = [] betas = [] for itr in progprint_xrange(N_iter): # Resample the data resample_data(model, D, N) # Resample the parameters model.resample_model() # Update our samples thetas.append(model.thetas.copy()) betas.append(model.beta.copy())
## DEBUG! Set beta and theta to true values init_to_true = False std_model = StandardLDA(T,V,alpha_beta,alpha_theta) std_model.beta = true_lda.beta if init_to_true else std_model.beta std_results = \ train(std_model, thetas=true_lda.thetas if init_to_true else None) std_collapsed_model = StandardLDA(T,V,alpha_beta,alpha_theta) std_collapsed_model.beta = true_lda.beta if init_to_true else std_collapsed_model.beta std_collapsed_results = \ train(std_collapsed_model, method='resample_model_collapsed', thetas=true_lda.thetas if init_to_true else None) sb_model = StickbreakingCorrelatedLDA(T, V, alpha_beta) sb_model.beta = true_lda.beta if init_to_true else sb_model.beta sb_results = \ train(sb_model, thetas=true_lda.thetas if init_to_true else None) ln_model = LogisticNormalCorrelatedLDA(T, V, alpha_beta) ln_model.beta = true_lda.beta if init_to_true else ln_model.beta ln_results = \ train(ln_model, thetas=true_lda.thetas if init_to_true else None) all_results = [sb_results, ln_results, std_results, std_collapsed_results] all_labels = ["SB Corr. LDA", "LN Corr. LDA", "Std. LDA", "Collapsed LDA"] # all_results = [std_results, std_collapsed_results] # all_labels = ["Std. LDA", "Collapsed LDA"] # all_results = [ln_results] # all_labels = ["LN Corr. LDA"]
# def test_geweke_lda(): if __name__ == "__main__": N_iter = 5000 T = 3 # Number of topics D = 10 # Number of documents V = 20 # Number of words N = 20 # Number of words per document alpha_beta = 1.0 # Generate synthetic data data = np.random.poisson(2, (D,V)) data = csr_matrix(data) # Sample a GP model = StickbreakingCorrelatedLDA(data, T, alpha_beta=alpha_beta) # Run a Geweke test thetas = [] betas = [] for itr in progprint_xrange(N_iter): # Resample the data model.generate(N, keep=True) # Resample the parameters model.resample() # Update our samples thetas.append(model.theta.copy()) betas.append(model.beta.copy())