Ejemplo n.º 1
0
    def apply(self):
        self.topic_desc.clear()
        self.refresh_gui()
        if self.corpus:
            self.commit.setEnabled(False)
            self.enabled(False)

            with self.progressBar(len(self.corpus)) as bar:
                self.lda = LDA(self.corpus.tokens,
                               num_topics=self.num_topics,
                               callback=bar.advance)
                table = self.lda.insert_topics_into_corpus(self.corpus)
                self.update_topics()

            self.send(Output.DATA, table)
            self.send_topic_by_id(0)

            self.enabled(True)
        else:
            self.send(Output.DATA, None)
            self.send(Output.TOPICS, None)
Ejemplo n.º 2
0
    def apply(self):
        self.topic_desc.clear()
        self.refresh_gui()
        if self.corpus:
            self.commit.setEnabled(False)
            self.enabled(False)

            preprocessed = self.preprocessor(self.corpus.documents)

            self.progressBarInit()
            self.lda = LDA(preprocessed,
                           num_topics=self.num_topics,
                           callback=self.progress)
            table = self.lda.insert_topics_into_corpus(self.corpus)
            self.update_topics()
            self.progressBarFinished()

            self.send(Output.DATA, table)
            self.send_topic_by_id(0)

            self.enabled(True)
        else:
            self.send(Output.DATA, None)
            self.send(Output.TOPICS, None)
Ejemplo n.º 3
0
    def apply(self):
        self.topic_desc.clear()
        self.refresh_gui()
        if self.corpus:
            self.commit.setEnabled(False)
            self.enabled(False)

            with self.progressBar(len(self.corpus)) as bar:
                self.lda = LDA(self.corpus.tokens, num_topics=self.num_topics,
                               callback=bar.advance)
                table = self.lda.insert_topics_into_corpus(self.corpus)
                self.update_topics()

            self.send(Output.DATA, table)
            self.send_topic_by_id(0)

            self.enabled(True)
        else:
            self.send(Output.DATA, None)
            self.send(Output.TOPICS, None)
Ejemplo n.º 4
0
    def apply(self):
        self.topic_desc.clear()
        self.refresh_gui()
        if self.corpus:
            self.commit.setEnabled(False)
            self.enabled(False)

            preprocessed = self.preprocessor(self.corpus.documents)

            self.progressBarInit()
            self.lda = LDA(preprocessed, num_topics=self.num_topics, callback=self.progress)
            table = self.lda.insert_topics_into_corpus(self.corpus)
            self.update_topics()
            self.progressBarFinished()

            self.send(Output.DATA, table)
            self.send_topic_by_id(0)

            self.enabled(True)
        else:
            self.send(Output.DATA, None)
            self.send(Output.TOPICS, None)
Ejemplo n.º 5
0
class LDATests(unittest.TestCase):
    corp = Corpus.from_file('deerwester')
    text = [d.split() for d in corp.documents]
    model = LDA(text, num_topics=5)

    def test_insert_topic_into_corpus(self):
        corp_topics = self.model.insert_topics_into_corpus(self.corp)
        self.assertEqual(len(corp_topics), len(self.corp))
        self.assertEqual(len(corp_topics.domain.attributes), 5)
        self.assertEqual(corp_topics.X.shape, (len(self.corp), 5))

    def test_get_topic_table_by_id(self):
        topic1 = self.model.get_topics_table_by_id(1)
        self.assertEqual(len(topic1), 45)
        self.assertEqual(topic1.metas.shape, (45, 2))

    def test_top_words_by_topic(self):
        words = self.model.get_top_words_by_id(1)
        self.assertEqual(len(words), 10)

    def test_too_large_id(self):
        with self.assertRaises(ValueError):
            self.model.get_topics_table_by_id(6)
Ejemplo n.º 6
0
class OWLDA(OWWidget):
    # Basic widget info
    name = "Topic Discovery"
    description = "Latent Dirichlet Allocation topic model."
    icon = "icons/LDA.svg"
    priority = 50

    settingsHandler = DomainContextHandler()

    # Input/output
    inputs = [("Corpus", Corpus, "set_data"),
              ("Preprocessor", Preprocessor, "set_preprocessor")]
    outputs = [(Output.DATA, Table), (Output.TOPICS, Topics)]
    want_main_area = True

    # Settings
    num_topics = Setting(5)

    def __init__(self):
        super().__init__()

        self.lda = None
        self.corpus = None
        self.preprocessor = Preprocessor()

        # Info.
        info_box = gui.widgetBox(self.controlArea, "Info")
        self.info_label = gui.label(info_box, self, '')

        # Settings.
        topic_box = gui.widgetBox(self.controlArea, "Settings")
        hbox = gui.widgetBox(topic_box, orientation=0)
        self.topics_label = gui.label(hbox, self, 'Number of topics: ')
        self.topics_label.setMaximumSize(self.topics_label.sizeHint())
        self.topics_input = gui.spin(hbox,
                                     self,
                                     "num_topics",
                                     minv=1,
                                     maxv=2**31 - 1,
                                     callback=self.num_topics_changed)

        # Commit button
        self.commit = gui.button(self.controlArea,
                                 self,
                                 "&Apply",
                                 callback=self.apply,
                                 default=True)
        self.commit.setEnabled(False)
        gui.rubber(self.controlArea)

        # Topics description
        self.cols = ['Topic', 'Topic keywords']
        self.topic_desc = QtGui.QTreeWidget()
        self.topic_desc.setColumnCount(len(self.cols))
        self.topic_desc.setHeaderLabels(self.cols)
        #self.topic_desc.setSelectionMode(QtGui.QTreeView.ExtendedSelection)
        self.topic_desc.itemSelectionChanged.connect(
            self.selected_topic_changed)
        for i in range(len(self.cols)):
            self.topic_desc.resizeColumnToContents(i)
        self.mainArea.layout().addWidget(self.topic_desc)

        self.refresh_gui()

    def set_preprocessor(self, data):
        if data is None:
            self.preprocessor = Preprocessor()
        else:
            self.preprocessor = data
        self.apply()

    def set_data(self, data=None):
        self.corpus = data
        self.apply()

    def refresh_gui(self):
        got_corpus = self.corpus is not None
        ndoc = len(self.corpus) if got_corpus else "(None)"
        self.info_label.setText("Input text entries: {}".format(ndoc))

    def update_topics(self):
        self.topic_desc.clear()
        for i in range(self.lda.num_topics):
            words = self.lda.get_top_words_by_id(i)
            it = LDATreeWidgetItem(i, words, self.topic_desc)
            self.topic_desc.addTopLevelItem(it)
        for i in range(2):
            self.topic_desc.resizeColumnToContents(i)

    def num_topics_changed(self):
        if self.corpus is None or \
                (self.lda is not None and self.lda.num_topics == self.num_topics):
            self.commit.setEnabled(False)
        else:
            self.commit.setEnabled(True)

    def selected_topic_changed(self):
        selected = self.topic_desc.selectedItems()
        if selected:
            self.send_topic_by_id(selected[0].topic_id)

    def send_topic_by_id(self, topic_id):
        self.topic_desc.setCurrentItem(self.topic_desc.topLevelItem(topic_id))
        self.send(Output.TOPICS, self.lda.get_topics_table_by_id(topic_id))

    def enabled(self, bool):
        self.topics_input.setEnabled(bool)

    def progress(self, p):
        self.progressBarSet(p)

    def apply(self):
        self.topic_desc.clear()
        self.refresh_gui()
        if self.corpus:
            self.commit.setEnabled(False)
            self.enabled(False)

            preprocessed = self.preprocessor(self.corpus.documents)

            self.progressBarInit()
            self.lda = LDA(preprocessed,
                           num_topics=self.num_topics,
                           callback=self.progress)
            table = self.lda.insert_topics_into_corpus(self.corpus)
            self.update_topics()
            self.progressBarFinished()

            self.send(Output.DATA, table)
            self.send_topic_by_id(0)

            self.enabled(True)
        else:
            self.send(Output.DATA, None)
            self.send(Output.TOPICS, None)
Ejemplo n.º 7
0
class OWLDA(OWWidget):
    # Basic widget info
    name = "Topic Modelling"
    description = "Topic modelling with Latent Dirichlet Allocation."
    icon = "icons/LDA.svg"
    priority = 50

    settingsHandler = DomainContextHandler()

    # Input/output
    inputs = [("Corpus", Corpus, "set_data"),
              ("Preprocessor", Preprocessor, "set_preprocessor")]
    outputs = [(Output.DATA, Table),
               (Output.TOPICS, Topics)]
    want_main_area = True

    # Settings
    num_topics = Setting(5)

    def __init__(self):
        super().__init__()

        self.lda = None
        self.corpus = None
        self.preprocessor = Preprocessor()

        # Info.
        info_box = gui.widgetBox(self.controlArea, "Info")
        self.info_label = gui.label(info_box, self, '')

        # Settings.
        topic_box = gui.widgetBox(self.controlArea, "Settings")
        hbox = gui.widgetBox(topic_box, orientation=0)
        self.topics_label = gui.label(hbox, self, 'Number of topics: ')
        self.topics_label.setMaximumSize(self.topics_label.sizeHint())
        self.topics_input = gui.spin(hbox, self, "num_topics",
                                     minv=1, maxv=2 ** 31 - 1,
                                     callback=self.num_topics_changed)

        # Commit button
        self.commit = gui.button(self.controlArea, self, "&Apply",
                                 callback=self.apply, default=True)
        self.commit.setEnabled(False)
        gui.rubber(self.controlArea)

        # Topics description
        self.cols = ['Topic', 'Topic keywords']
        self.topic_desc = QtGui.QTreeWidget()
        self.topic_desc.setColumnCount(len(self.cols))
        self.topic_desc.setHeaderLabels(self.cols)
        #self.topic_desc.setSelectionMode(QtGui.QTreeView.ExtendedSelection)
        self.topic_desc.itemSelectionChanged.connect(self.selected_topic_changed)
        for i in range(len(self.cols)):
            self.topic_desc.resizeColumnToContents(i)
        self.mainArea.layout().addWidget(self.topic_desc)

        self.refresh_gui()

    def set_preprocessor(self, data):
        if data is None:
            self.preprocessor = Preprocessor()
        else:
            self.preprocessor = data
        self.apply()

    def set_data(self, data=None):
        self.corpus = data
        self.apply()

    def refresh_gui(self):
        got_corpus = self.corpus is not None
        ndoc = len(self.corpus) if got_corpus else "(None)"
        self.info_label.setText("Input text entries: {}".format(ndoc))

    def update_topics(self):
        self.topic_desc.clear()
        for i in range(self.lda.num_topics):
            words = self.lda.get_top_words_by_id(i)
            it = LDATreeWidgetItem(i, words, self.topic_desc)
            self.topic_desc.addTopLevelItem(it)
        for i in range(2):
            self.topic_desc.resizeColumnToContents(i)

    def num_topics_changed(self):
        if self.corpus is None or \
                (self.lda is not None and self.lda.num_topics == self.num_topics):
            self.commit.setEnabled(False)
        else:
            self.commit.setEnabled(True)

    def selected_topic_changed(self):
        selected = self.topic_desc.selectedItems()
        if selected:
            self.send_topic_by_id(selected[0].topic_id)

    def send_topic_by_id(self, topic_id):
        self.topic_desc.setCurrentItem(self.topic_desc.topLevelItem(topic_id))
        self.send(Output.TOPICS, self.lda.get_topics_table_by_id(topic_id))

    def enabled(self, bool):
        self.topics_input.setEnabled(bool)

    def progress(self, p):
        self.progressBarSet(p)

    def apply(self):
        self.topic_desc.clear()
        self.refresh_gui()
        if self.corpus:
            self.commit.setEnabled(False)
            self.enabled(False)

            preprocessed = self.preprocessor(self.corpus.documents)

            self.progressBarInit()
            self.lda = LDA(preprocessed, num_topics=self.num_topics, callback=self.progress)
            table = self.lda.insert_topics_into_corpus(self.corpus)
            self.update_topics()
            self.progressBarFinished()

            self.send(Output.DATA, table)
            self.send_topic_by_id(0)

            self.enabled(True)
        else:
            self.send(Output.DATA, None)
            self.send(Output.TOPICS, None)