doc2id = dict( enumerate(id_list) ) RUNDIRS_ROOT = "../runs/" # load the main model mstar = LdaModel(numT=NUM_TOPICS, corpus=NIPS_corpus, alpha=0.01, beta=0.01) mstar.allocate_arrays() mstar.read_dw_alphabetical() rd = os.path.join( RUNDIRS_ROOT, "reduced1/" ) #rd = os.path.join( RUNDIRS_ROOT, "lab7-24/run0021/" ) mstar.load_from_rundir(rd) # load the merged model #mrgd = LdaModel(numT=NUM_TOPICS, corpus=NIPS_corpus, alpha=0.005, beta=0.01) #mrgd.allocate_arrays() #mrgd.read_dw_alphabetical() #rd = os.path.join( RUNDIRS_ROOT, "merge40-a0_005-b0_01/" ) #mrgd.load_from_rundir(rd) # setup the dirs models to be merged #m_dir_list = [] #for num in range(22,41): # rd = os.path.join( RUNDIRS_ROOT, "lab7-24/run00"+str(num)+"/" )
# vocab, model and doc2id arXiv_test_corpus = Low2Corpus(TEST_DOCS_FILE) arXiv_test_corpus.setVocabFromList( [w.strip() for w in open(VOCAB_FILE, 'r').readlines()]) arXiv_test_corpus.doCounts() test_id_list = [w.strip() for w in open(TEST_IDS_FILE, 'r').readlines()] test_doc2id = dict(enumerate(test_id_list)) # the original to compare with #phiT60_1 = np.load("../runs/repeatedT60-1/phi.npy") #thetaT60_1 = np.load("../runs/repeatedT60-1/theta.npy") #zT60_1 = np.load("../runs/repeatedT60-1/z.npy") # Mon 29 Aug 2011 12:02:14 EDT # testing log like # hydrate from dir morig = LdaModel(numT=60, corpus=arXiv_corpus, alpha=0.01, beta=0.01) morig.allocate_arrays() morig.read_dw_alphabetical() #rd = os.path.join( RUNDIRS_ROOT, "../runs/repeatedT60-1/" ) rd = "/Users/ivan/Homes/master/Documents/Projects/runs/repeatedT60-1/" morig.load_from_rundir(rd) # same for merged topic model mstar = LdaModel(numT=60, corpus=arXiv_corpus, alpha=0.01, beta=0.01) mstar.allocate_arrays() mstar.read_dw_alphabetical() rd = "/Users/ivan/Homes/master/Documents/Projects/runs/new_merging_gibbs0" mstar.load_from_rundir(rd)