Ejemplo n.º 1
0
def train_model():
    updates = []
    log_lik = []

    # Train model
    print "Parsing dir.."
    docs, vocab, word_idx, sent_idx, sentences = data.parse_dir("./data/small/")
    l = MgLda(n_local_topics,
              n_global_topics,
              docs, vocab, word_idx, sentences, sent_idx)
    print "Done! Running %d iterations..."%iterations
    for i in range(0, iterations):
        start = time.time()
        l.update()
        duration = time.time() - start
        updates.append(l.updates)
        log_lik.append(l.log_lik)
        times.append(duration)
        print "Iteration %d, Duration: %f"%(i, duration)

    # Save model and data
    dir = os.path.join("models/", datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    os.makedirs(dir)
    with open(dir+'/mglda_model_2.pkl', 'wb') as output:
        pickle.dump(l, output, -1)

    updates = np.asarray(updates)
    log_lik = np.asarray(log_lik)
    np.savetxt(dir+'/updates.txt', updates)
    np.savetxt(dir+'/log_lik.txt', log_lik)
    np.savetxt(dir+'/times.txt', np.asarray(times))
    print "Model and data saved!"

    # Add some plots
    print "Adding some plots.."
    x = np.arange(0, iterations)
    fig, ax = plt.subplots( nrows=1, ncols=1 )
    ax.plot(x, log_lik/100000)
    plt.xlabel("Iterations")
    plt.ylabel("Log likelihood x 10^5")
    plt.title("MG-LDA training")
    plt.grid(True)
    fig.savefig(dir + "/log_lik.png")
    plt.close(fig)

    # Plot log updates 
    fig, ax = plt.subplots( nrows=1, ncols=1 )
    ax.plot(x, updates)
    plt.xlabel("Iterations")
    plt.ylabel("Updates")
    plt.title("MG-LDA training")
    plt.grid(True)
    fig.savefig(dir + "/updates.png")
    plt.close(fig)

    # Return the model
    return l
Ejemplo n.º 2
0
                new_z = np.random.multinomial(1, p_z).argmax()

                loglikelihood += log(p_z[new_z])

                self.assignment[i][w] = new_z
                self.nwz[cur_w, new_z] += 1
                self.ndz[i, new_z] += 1
                self.nz[new_z] += 1

        print "Loglikelihood %f" % loglikelihood
        self.loglikelihood.append(loglikelihood)


# EXAMPLE
print "Parsing dir.."
docs, vocab, word_idx, _, _ = data.parse_dir("./data/all/")
# docs, vocab, word_idx, _, _ = data.parse_dir("/Users/jeisses/Documents/datasets/nlp/movie/review_polarity/txt_sentoken/all/")
print "Done"
print " ====== "
print "Setting up LDA.."
l = Lda(10, docs, vocab, word_idx)

print "Training for 200 iterations..."
for i in range(0, 200):
    l.update()
    if i % 10 == 0:
        print " -- iteration %d ---" % i
print "Done. Top words for reach topic:"
l.top_words(10)

loglikelihood = [item / 100000 for item in l.loglikelihood]