class LDARunner: def __init__(self, output_path, docs, vocab, topic_count = 20, alpha = 0.1, beta = 0.1, total_iteration=1000): self.output_path = output_path self.vocab = vocab self.k, self.alpha, self.beta = topic_count, alpha, beta self.model = FixedTopicModel(topic_count, len(vocab), alpha, beta) for doc in docs: self.model.add_new_document(doc) self.sampler = BaseSampler(self.model, total_iteration) def run(self): self.sampler.inference() save_topic_matrix(self.model, self.vocab, self.output_path)
def __init__(self, output_path, docs, vocab, topic_count=20, alpha=0.1, beta=0.1, total_iteration=1000): self.output_path = output_path self.vocab = vocab self.k, self.alpha, self.beta = topic_count, alpha, beta self.model = FixedTopicModel(topic_count, len(vocab), alpha, beta) for doc in docs: self.model.add_new_document(doc) self.sampler = BaseSampler(self.model, total_iteration)
def __init__(self, output_path, docs, vocab, topic_count = 20, alpha = 0.1, beta = 0.1, total_iteration=1000): self.output_path = output_path self.vocab = vocab self.k, self.alpha, self.beta = topic_count, alpha, beta self.model = FixedTopicModel(topic_count, len(vocab), alpha, beta) for doc in docs: self.model.add_new_document(doc) self.sampler = BaseSampler(self.model, total_iteration)
class LDARunner: def __init__(self, output_path, docs, vocab, topic_count=20, alpha=0.1, beta=0.1, total_iteration=1000): self.output_path = output_path self.vocab = vocab self.k, self.alpha, self.beta = topic_count, alpha, beta self.model = FixedTopicModel(topic_count, len(vocab), alpha, beta) for doc in docs: self.model.add_new_document(doc) self.sampler = BaseSampler(self.model, total_iteration) def run(self): self.sampler.inference() save_topic_matrix(self.model, self.vocab, self.output_path)
def __init__(self, output_path, docs, vocab, alpha_table=1.0, alpha_topic=1.0, beta=0.5, initial_topic=1, initial_table=1, total_iteration=1000): self.output_path, self.vocab = output_path, vocab self.model = HDPTopicModel(len(vocab), alpha_table, alpha_topic, beta, initial_topic, initial_table) for doc in docs: self.model.add_new_document(doc) self.sampler = BaseSampler(self.model, total_iteration, callback=_hdp_show_statistics)