doc2id = dict(  enumerate(id_list) )




RUNDIRS_ROOT = "../runs/"



# load the main model
mstar = LdaModel(numT=NUM_TOPICS, corpus=NIPS_corpus, alpha=0.01, beta=0.01)
mstar.allocate_arrays()
mstar.read_dw_alphabetical()
rd = os.path.join( RUNDIRS_ROOT, "reduced1/" )
#rd = os.path.join( RUNDIRS_ROOT, "lab7-24/run0021/" )
mstar.load_from_rundir(rd)


# load the merged model
#mrgd = LdaModel(numT=NUM_TOPICS, corpus=NIPS_corpus, alpha=0.005, beta=0.01)
#mrgd.allocate_arrays()
#mrgd.read_dw_alphabetical()
#rd = os.path.join( RUNDIRS_ROOT, "merge40-a0_005-b0_01/" )
#mrgd.load_from_rundir(rd)



# setup the dirs models to be merged
#m_dir_list = []
#for num in range(22,41):
#    rd = os.path.join( RUNDIRS_ROOT, "lab7-24/run00"+str(num)+"/" )
예제 #2
0
# vocab, model and doc2id
arXiv_test_corpus = Low2Corpus(TEST_DOCS_FILE)
arXiv_test_corpus.setVocabFromList(
    [w.strip() for w in open(VOCAB_FILE, 'r').readlines()])
arXiv_test_corpus.doCounts()
test_id_list = [w.strip() for w in open(TEST_IDS_FILE, 'r').readlines()]
test_doc2id = dict(enumerate(test_id_list))

# the original to compare with
#phiT60_1   = np.load("../runs/repeatedT60-1/phi.npy")
#thetaT60_1 = np.load("../runs/repeatedT60-1/theta.npy")
#zT60_1     = np.load("../runs/repeatedT60-1/z.npy")

# Mon 29 Aug 2011 12:02:14 EDT
# testing log like

# hydrate from dir
morig = LdaModel(numT=60, corpus=arXiv_corpus, alpha=0.01, beta=0.01)
morig.allocate_arrays()
morig.read_dw_alphabetical()
#rd = os.path.join( RUNDIRS_ROOT, "../runs/repeatedT60-1/" )
rd = "/Users/ivan/Homes/master/Documents/Projects/runs/repeatedT60-1/"
morig.load_from_rundir(rd)

# same for merged topic model
mstar = LdaModel(numT=60, corpus=arXiv_corpus, alpha=0.01, beta=0.01)
mstar.allocate_arrays()
mstar.read_dw_alphabetical()
rd = "/Users/ivan/Homes/master/Documents/Projects/runs/new_merging_gibbs0"
mstar.load_from_rundir(rd)