Exemple #1
0
def main():
    options = parse_args()
    random.seed()

    sampler = None
    model = None
    corpus = None
    word_id_map = None
    next_iteration = 0
    likelihoods = []
    checkpointer = CheckPointer()
    if options.restart_by_checkpoint:
        (model, sampler, corpus, word_id_map, likelihoods,
         next_iteration) = checkpointer.load()
        print "Restart at iteration:", next_iteration
    else:
        corpus, word_id_map = load_corpus(options.train_name,
                                          options.num_topics)
        sampler = Sampler(options.alpha, options.beta)
        model = Model()
        model.init_model(len(corpus), options.num_topics, len(word_id_map))
        sampler.init_model_given_corpus(corpus, model)
    for i in range(next_iteration, options.total_iterations):
        print "Iteration:", i
        sampler.sample_loop(corpus, model)
        if options.compute_likelihood:
            likelihood = sampler.compute_log_likelihood(corpus, model)
            print "    Loglikehood:", likelihood
            likelihoods.append(likelihood)

        if i >= options.burn_in_iterations:
            model.accumulate_model()
        if options.checkpoint_interval > 0:
            interval = options.checkpoint_interval
            if i % interval == interval - 1:
                checkpointer.dump(model, sampler, corpus, word_id_map,
                                  likelihoods, i + 1)
                print "    Save check point."

    model.average_accumulative_model()
    model.save_model(options.model_name, word_id_map)

    if options.compute_likelihood:
        likelihood_file = open(likelihood_name, 'w')
        likelihood_file.writelines([str(x) + '\n' for x in likelihoods])
        likelihood_file.close()
Exemple #2
0
def main():
	options = parse_args()
	random.seed()
	
	sampler = None
	model = None
	corpus = None
	word_id_map = None
	next_iteration = 0
	likelihoods = []
	checkpointer = CheckPointer()
	if options.restart_by_checkpoint:
		(model, sampler, corpus, word_id_map, likelihoods,
				next_iteration) = checkpointer.load()
		print "Restart at iteration:", next_iteration
	else:
		corpus, word_id_map = load_corpus(
				options.train_name, options.num_topics)
		sampler = Sampler(options.alpha, options.beta)
		model = Model()
		model.init_model(len(corpus), options.num_topics, len(word_id_map))
		sampler.init_model_given_corpus(corpus, model)
	for i in range(next_iteration, options.total_iterations):
		print "Iteration:", i
		sampler.sample_loop(corpus, model)
		if options.compute_likelihood:
			likelihood = sampler.compute_log_likelihood(corpus, model)
			print "    Loglikehood:", likelihood
			likelihoods.append(likelihood)
			
		if i >= options.burn_in_iterations:
			model.accumulate_model()
		if options.checkpoint_interval > 0:
			interval = options.checkpoint_interval
			if i % interval == interval - 1:
				checkpointer.dump(model, sampler, corpus, word_id_map,
									likelihoods, i + 1)
				print "    Save check point."

	model.average_accumulative_model()
	model.save_model(options.model_name, word_id_map)

	if options.compute_likelihood:
		likelihood_file = open(likelihood_name, 'w')
		likelihood_file.writelines([str(x)+'\n' for x in likelihoods])
		likelihood_file.close()
Exemple #3
0
def main():
	options = parse_args()
	random.seed()

	comm = MPI.COMM_WORLD
	pnum = comm.Get_size()
	myid = comm.Get_rank()

	op = MPI.Op.Create(list_sum)
	op2 = MPI.Op.Create(list2d_sum)

	sampler = None
	model = None
	corpus_local = None
	word_id_map = None
	next_iteration = 0
	likelihoods = []
	checkpointer = CheckPointer()

	if options.restart_by_checkpoint:
		(model, sampler, corpus_local, word_id_map, likelihoods,
				next_iteration) = checkpointer.load()
		if myid == 0:
			print "Restart at iteration:", next_iteration
	else:
		corpus_local, word_id_map = distributely_load_corpus(
				options.train_name, options.num_topics, myid, pnum)
		sampler = Sampler(options.alpha, options.beta)

	for i in range(next_iteration, options.total_iterations):
		if myid == 0:
			print "Iteration:", i
		model = ParallelModel()
		model.init_model(len(corpus_local), options.num_topics, len(word_id_map))
		sampler.init_model_given_corpus(corpus_local, model)
		model.allreduce_model(comm, op, op2)
		sampler.sample_loop(corpus_local, model)
		if options.compute_likelihood:
			loglikelihood_local = \
					sampler.compute_log_likelihood(corpus_local, model)
			loglikelihood_golobal = 0.0
			loglikelihood_golobal = comm.reduce(
					loglikelihood_local, loglikelihood_golobal, MPI.SUM, 0)
			if myid == 0:
				print "    Loglikehood:", loglikelihood_golobal
				likelihoods.append(loglikelihood_golobal)

		if options.checkpoint_interval > 0:
			interval = options.checkpoint_interval
			if i % interval == interval - 1:
				checkpointer.dump(model, sampler, corpus_local, word_id_map,
									likelihoods, i + 1)
				if myid == 0:
					print "    Save check point."
	model = ParallelModel()
	model.init_model(len(corpus_local), options.num_topics, len(word_id_map))
	sampler.init_model_given_corpus(corpus_local, model)
	model.allreduce_model(comm, op, op2)
		
	if myid == 0:
		model.save_model(options.model_name, word_id_map, False)
		likelihood_file = open(likelihood_name, 'w')
		likelihood_file.writelines([str(x)+'\n' for x in likelihoods])
		likelihood_file.close()

	op.Free()
	op2.Free()