def apply(self): self.topic_desc.clear() self.refresh_gui() if self.corpus: self.commit.setEnabled(False) self.enabled(False) with self.progressBar(len(self.corpus)) as bar: self.lda = LDA(self.corpus.tokens, num_topics=self.num_topics, callback=bar.advance) table = self.lda.insert_topics_into_corpus(self.corpus) self.update_topics() self.send(Output.DATA, table) self.send_topic_by_id(0) self.enabled(True) else: self.send(Output.DATA, None) self.send(Output.TOPICS, None)
def apply(self): self.topic_desc.clear() self.refresh_gui() if self.corpus: self.commit.setEnabled(False) self.enabled(False) preprocessed = self.preprocessor(self.corpus.documents) self.progressBarInit() self.lda = LDA(preprocessed, num_topics=self.num_topics, callback=self.progress) table = self.lda.insert_topics_into_corpus(self.corpus) self.update_topics() self.progressBarFinished() self.send(Output.DATA, table) self.send_topic_by_id(0) self.enabled(True) else: self.send(Output.DATA, None) self.send(Output.TOPICS, None)
class LDATests(unittest.TestCase): corp = Corpus.from_file('deerwester') text = [d.split() for d in corp.documents] model = LDA(text, num_topics=5) def test_insert_topic_into_corpus(self): corp_topics = self.model.insert_topics_into_corpus(self.corp) self.assertEqual(len(corp_topics), len(self.corp)) self.assertEqual(len(corp_topics.domain.attributes), 5) self.assertEqual(corp_topics.X.shape, (len(self.corp), 5)) def test_get_topic_table_by_id(self): topic1 = self.model.get_topics_table_by_id(1) self.assertEqual(len(topic1), 45) self.assertEqual(topic1.metas.shape, (45, 2)) def test_top_words_by_topic(self): words = self.model.get_top_words_by_id(1) self.assertEqual(len(words), 10) def test_too_large_id(self): with self.assertRaises(ValueError): self.model.get_topics_table_by_id(6)
class OWLDA(OWWidget): # Basic widget info name = "Topic Discovery" description = "Latent Dirichlet Allocation topic model." icon = "icons/LDA.svg" priority = 50 settingsHandler = DomainContextHandler() # Input/output inputs = [("Corpus", Corpus, "set_data"), ("Preprocessor", Preprocessor, "set_preprocessor")] outputs = [(Output.DATA, Table), (Output.TOPICS, Topics)] want_main_area = True # Settings num_topics = Setting(5) def __init__(self): super().__init__() self.lda = None self.corpus = None self.preprocessor = Preprocessor() # Info. info_box = gui.widgetBox(self.controlArea, "Info") self.info_label = gui.label(info_box, self, '') # Settings. topic_box = gui.widgetBox(self.controlArea, "Settings") hbox = gui.widgetBox(topic_box, orientation=0) self.topics_label = gui.label(hbox, self, 'Number of topics: ') self.topics_label.setMaximumSize(self.topics_label.sizeHint()) self.topics_input = gui.spin(hbox, self, "num_topics", minv=1, maxv=2**31 - 1, callback=self.num_topics_changed) # Commit button self.commit = gui.button(self.controlArea, self, "&Apply", callback=self.apply, default=True) self.commit.setEnabled(False) gui.rubber(self.controlArea) # Topics description self.cols = ['Topic', 'Topic keywords'] self.topic_desc = QtGui.QTreeWidget() self.topic_desc.setColumnCount(len(self.cols)) self.topic_desc.setHeaderLabels(self.cols) #self.topic_desc.setSelectionMode(QtGui.QTreeView.ExtendedSelection) self.topic_desc.itemSelectionChanged.connect( self.selected_topic_changed) for i in range(len(self.cols)): self.topic_desc.resizeColumnToContents(i) self.mainArea.layout().addWidget(self.topic_desc) self.refresh_gui() def set_preprocessor(self, data): if data is None: self.preprocessor = Preprocessor() else: self.preprocessor = data self.apply() def set_data(self, data=None): self.corpus = data self.apply() def refresh_gui(self): got_corpus = self.corpus is not None ndoc = len(self.corpus) if got_corpus else "(None)" self.info_label.setText("Input text entries: {}".format(ndoc)) def update_topics(self): self.topic_desc.clear() for i in range(self.lda.num_topics): words = self.lda.get_top_words_by_id(i) it = LDATreeWidgetItem(i, words, self.topic_desc) self.topic_desc.addTopLevelItem(it) for i in range(2): self.topic_desc.resizeColumnToContents(i) def num_topics_changed(self): if self.corpus is None or \ (self.lda is not None and self.lda.num_topics == self.num_topics): self.commit.setEnabled(False) else: self.commit.setEnabled(True) def selected_topic_changed(self): selected = self.topic_desc.selectedItems() if selected: self.send_topic_by_id(selected[0].topic_id) def send_topic_by_id(self, topic_id): self.topic_desc.setCurrentItem(self.topic_desc.topLevelItem(topic_id)) self.send(Output.TOPICS, self.lda.get_topics_table_by_id(topic_id)) def enabled(self, bool): self.topics_input.setEnabled(bool) def progress(self, p): self.progressBarSet(p) def apply(self): self.topic_desc.clear() self.refresh_gui() if self.corpus: self.commit.setEnabled(False) self.enabled(False) preprocessed = self.preprocessor(self.corpus.documents) self.progressBarInit() self.lda = LDA(preprocessed, num_topics=self.num_topics, callback=self.progress) table = self.lda.insert_topics_into_corpus(self.corpus) self.update_topics() self.progressBarFinished() self.send(Output.DATA, table) self.send_topic_by_id(0) self.enabled(True) else: self.send(Output.DATA, None) self.send(Output.TOPICS, None)
class OWLDA(OWWidget): # Basic widget info name = "Topic Modelling" description = "Topic modelling with Latent Dirichlet Allocation." icon = "icons/LDA.svg" priority = 50 settingsHandler = DomainContextHandler() # Input/output inputs = [("Corpus", Corpus, "set_data"), ("Preprocessor", Preprocessor, "set_preprocessor")] outputs = [(Output.DATA, Table), (Output.TOPICS, Topics)] want_main_area = True # Settings num_topics = Setting(5) def __init__(self): super().__init__() self.lda = None self.corpus = None self.preprocessor = Preprocessor() # Info. info_box = gui.widgetBox(self.controlArea, "Info") self.info_label = gui.label(info_box, self, '') # Settings. topic_box = gui.widgetBox(self.controlArea, "Settings") hbox = gui.widgetBox(topic_box, orientation=0) self.topics_label = gui.label(hbox, self, 'Number of topics: ') self.topics_label.setMaximumSize(self.topics_label.sizeHint()) self.topics_input = gui.spin(hbox, self, "num_topics", minv=1, maxv=2 ** 31 - 1, callback=self.num_topics_changed) # Commit button self.commit = gui.button(self.controlArea, self, "&Apply", callback=self.apply, default=True) self.commit.setEnabled(False) gui.rubber(self.controlArea) # Topics description self.cols = ['Topic', 'Topic keywords'] self.topic_desc = QtGui.QTreeWidget() self.topic_desc.setColumnCount(len(self.cols)) self.topic_desc.setHeaderLabels(self.cols) #self.topic_desc.setSelectionMode(QtGui.QTreeView.ExtendedSelection) self.topic_desc.itemSelectionChanged.connect(self.selected_topic_changed) for i in range(len(self.cols)): self.topic_desc.resizeColumnToContents(i) self.mainArea.layout().addWidget(self.topic_desc) self.refresh_gui() def set_preprocessor(self, data): if data is None: self.preprocessor = Preprocessor() else: self.preprocessor = data self.apply() def set_data(self, data=None): self.corpus = data self.apply() def refresh_gui(self): got_corpus = self.corpus is not None ndoc = len(self.corpus) if got_corpus else "(None)" self.info_label.setText("Input text entries: {}".format(ndoc)) def update_topics(self): self.topic_desc.clear() for i in range(self.lda.num_topics): words = self.lda.get_top_words_by_id(i) it = LDATreeWidgetItem(i, words, self.topic_desc) self.topic_desc.addTopLevelItem(it) for i in range(2): self.topic_desc.resizeColumnToContents(i) def num_topics_changed(self): if self.corpus is None or \ (self.lda is not None and self.lda.num_topics == self.num_topics): self.commit.setEnabled(False) else: self.commit.setEnabled(True) def selected_topic_changed(self): selected = self.topic_desc.selectedItems() if selected: self.send_topic_by_id(selected[0].topic_id) def send_topic_by_id(self, topic_id): self.topic_desc.setCurrentItem(self.topic_desc.topLevelItem(topic_id)) self.send(Output.TOPICS, self.lda.get_topics_table_by_id(topic_id)) def enabled(self, bool): self.topics_input.setEnabled(bool) def progress(self, p): self.progressBarSet(p) def apply(self): self.topic_desc.clear() self.refresh_gui() if self.corpus: self.commit.setEnabled(False) self.enabled(False) preprocessed = self.preprocessor(self.corpus.documents) self.progressBarInit() self.lda = LDA(preprocessed, num_topics=self.num_topics, callback=self.progress) table = self.lda.insert_topics_into_corpus(self.corpus) self.update_topics() self.progressBarFinished() self.send(Output.DATA, table) self.send_topic_by_id(0) self.enabled(True) else: self.send(Output.DATA, None) self.send(Output.TOPICS, None)