Beispiel #1
0
    data = np.zeros((D, model.V),dtype=int)
    for d in xrange(D):
        doc = model.generate(N=N, keep=True)
        data[d,:] = doc.w

# def test_geweke_lda():
if __name__ == "__main__":
    N_iter = 50000
    T = 3           # Number of topics
    D = 10         # Number of documents
    V = 10          # Number of words
    N = 20         # Number of words per document
    alpha_beta = 1.0

    # Sample a GP
    model = StickbreakingCorrelatedLDA(T, V, alpha_beta=alpha_beta)

    # Run a Geweke test
    thetas = []
    betas = []
    for itr in progprint_xrange(N_iter):
        # Resample the data
        resample_data(model, D, N)

        # Resample the parameters
        model.resample_model()

        # Update our samples
        thetas.append(model.thetas.copy())
        betas.append(model.beta.copy())
Beispiel #2
0
    ## DEBUG! Set beta and theta to true values
    init_to_true = False
    std_model = StandardLDA(T,V,alpha_beta,alpha_theta)
    std_model.beta = true_lda.beta if init_to_true else std_model.beta
    std_results = \
        train(std_model, thetas=true_lda.thetas if init_to_true else None)

    std_collapsed_model = StandardLDA(T,V,alpha_beta,alpha_theta)
    std_collapsed_model.beta = true_lda.beta if init_to_true else std_collapsed_model.beta
    std_collapsed_results = \
        train(std_collapsed_model,
              method='resample_model_collapsed',
              thetas=true_lda.thetas if init_to_true else None)

    sb_model = StickbreakingCorrelatedLDA(T, V, alpha_beta)
    sb_model.beta = true_lda.beta if init_to_true else sb_model.beta
    sb_results = \
        train(sb_model, thetas=true_lda.thetas if init_to_true else None)

    ln_model = LogisticNormalCorrelatedLDA(T, V, alpha_beta)
    ln_model.beta = true_lda.beta if init_to_true else ln_model.beta
    ln_results = \
        train(ln_model, thetas=true_lda.thetas if init_to_true else None)

    all_results = [sb_results, ln_results, std_results, std_collapsed_results]
    all_labels = ["SB Corr. LDA", "LN Corr. LDA", "Std. LDA", "Collapsed LDA"]
    # all_results = [std_results, std_collapsed_results]
    # all_labels = ["Std. LDA", "Collapsed LDA"]
    # all_results = [ln_results]
    # all_labels = ["LN Corr. LDA"]
Beispiel #3
0
# def test_geweke_lda():
if __name__ == "__main__":
    N_iter = 5000
    T = 3           # Number of topics
    D = 10         # Number of documents
    V = 20          # Number of words
    N = 20         # Number of words per document
    alpha_beta = 1.0

    # Generate synthetic data
    data = np.random.poisson(2, (D,V))
    data = csr_matrix(data)

    # Sample a GP
    model = StickbreakingCorrelatedLDA(data, T, alpha_beta=alpha_beta)

    # Run a Geweke test
    thetas = []
    betas = []
    for itr in progprint_xrange(N_iter):
        # Resample the data
        model.generate(N, keep=True)

        # Resample the parameters
        model.resample()

        # Update our samples
        thetas.append(model.theta.copy())
        betas.append(model.beta.copy())
Beispiel #4
0
    data = np.zeros((D, model.V),dtype=int)
    for d in xrange(D):
        doc = model.generate(N=N, keep=True)
        data[d,:] = doc.w

# def test_geweke_lda():
if __name__ == "__main__":
    N_iter = 50000
    T = 3           # Number of topics
    D = 10         # Number of documents
    V = 10          # Number of words
    N = 20         # Number of words per document
    alpha_beta = 1.0

    # Sample a GP
    model = StickbreakingCorrelatedLDA(T, V, alpha_beta=alpha_beta)

    # Run a Geweke test
    thetas = []
    betas = []
    for itr in progprint_xrange(N_iter):
        # Resample the data
        resample_data(model, D, N)

        # Resample the parameters
        model.resample_model()

        # Update our samples
        thetas.append(model.thetas.copy())
        betas.append(model.beta.copy())