def main(): options = parse_args() random.seed() sampler = None model = None corpus = None word_id_map = None next_iteration = 0 likelihoods = [] checkpointer = CheckPointer() if options.restart_by_checkpoint: (model, sampler, corpus, word_id_map, likelihoods, next_iteration) = checkpointer.load() print "Restart at iteration:", next_iteration else: corpus, word_id_map = load_corpus(options.train_name, options.num_topics) sampler = Sampler(options.alpha, options.beta) model = Model() model.init_model(len(corpus), options.num_topics, len(word_id_map)) sampler.init_model_given_corpus(corpus, model) for i in range(next_iteration, options.total_iterations): print "Iteration:", i sampler.sample_loop(corpus, model) if options.compute_likelihood: likelihood = sampler.compute_log_likelihood(corpus, model) print " Loglikehood:", likelihood likelihoods.append(likelihood) if i >= options.burn_in_iterations: model.accumulate_model() if options.checkpoint_interval > 0: interval = options.checkpoint_interval if i % interval == interval - 1: checkpointer.dump(model, sampler, corpus, word_id_map, likelihoods, i + 1) print " Save check point." model.average_accumulative_model() model.save_model(options.model_name, word_id_map) if options.compute_likelihood: likelihood_file = open(likelihood_name, 'w') likelihood_file.writelines([str(x) + '\n' for x in likelihoods]) likelihood_file.close()
def main(): options = parse_args() random.seed() sampler = None model = None corpus = None word_id_map = None next_iteration = 0 likelihoods = [] checkpointer = CheckPointer() if options.restart_by_checkpoint: (model, sampler, corpus, word_id_map, likelihoods, next_iteration) = checkpointer.load() print "Restart at iteration:", next_iteration else: corpus, word_id_map = load_corpus( options.train_name, options.num_topics) sampler = Sampler(options.alpha, options.beta) model = Model() model.init_model(len(corpus), options.num_topics, len(word_id_map)) sampler.init_model_given_corpus(corpus, model) for i in range(next_iteration, options.total_iterations): print "Iteration:", i sampler.sample_loop(corpus, model) if options.compute_likelihood: likelihood = sampler.compute_log_likelihood(corpus, model) print " Loglikehood:", likelihood likelihoods.append(likelihood) if i >= options.burn_in_iterations: model.accumulate_model() if options.checkpoint_interval > 0: interval = options.checkpoint_interval if i % interval == interval - 1: checkpointer.dump(model, sampler, corpus, word_id_map, likelihoods, i + 1) print " Save check point." model.average_accumulative_model() model.save_model(options.model_name, word_id_map) if options.compute_likelihood: likelihood_file = open(likelihood_name, 'w') likelihood_file.writelines([str(x)+'\n' for x in likelihoods]) likelihood_file.close()
def main(): options = parse_args() random.seed() comm = MPI.COMM_WORLD pnum = comm.Get_size() myid = comm.Get_rank() op = MPI.Op.Create(list_sum) op2 = MPI.Op.Create(list2d_sum) sampler = None model = None corpus_local = None word_id_map = None next_iteration = 0 likelihoods = [] checkpointer = CheckPointer() if options.restart_by_checkpoint: (model, sampler, corpus_local, word_id_map, likelihoods, next_iteration) = checkpointer.load() if myid == 0: print "Restart at iteration:", next_iteration else: corpus_local, word_id_map = distributely_load_corpus( options.train_name, options.num_topics, myid, pnum) sampler = Sampler(options.alpha, options.beta) for i in range(next_iteration, options.total_iterations): if myid == 0: print "Iteration:", i model = ParallelModel() model.init_model(len(corpus_local), options.num_topics, len(word_id_map)) sampler.init_model_given_corpus(corpus_local, model) model.allreduce_model(comm, op, op2) sampler.sample_loop(corpus_local, model) if options.compute_likelihood: loglikelihood_local = \ sampler.compute_log_likelihood(corpus_local, model) loglikelihood_golobal = 0.0 loglikelihood_golobal = comm.reduce( loglikelihood_local, loglikelihood_golobal, MPI.SUM, 0) if myid == 0: print " Loglikehood:", loglikelihood_golobal likelihoods.append(loglikelihood_golobal) if options.checkpoint_interval > 0: interval = options.checkpoint_interval if i % interval == interval - 1: checkpointer.dump(model, sampler, corpus_local, word_id_map, likelihoods, i + 1) if myid == 0: print " Save check point." model = ParallelModel() model.init_model(len(corpus_local), options.num_topics, len(word_id_map)) sampler.init_model_given_corpus(corpus_local, model) model.allreduce_model(comm, op, op2) if myid == 0: model.save_model(options.model_name, word_id_map, False) likelihood_file = open(likelihood_name, 'w') likelihood_file.writelines([str(x)+'\n' for x in likelihoods]) likelihood_file.close() op.Free() op2.Free()