예제 #1
0
class OWTweetProfiler(OWWidget):
    name = "Tweet Profiler"
    description = "Detect Ekman's, Plutchik's or Profile of Mood States's " \
                  "emotions in tweets."
    icon = "icons/TweetProfiler.svg"
    priority = 46

    inputs = [(IO.CORPUS, Corpus, 'set_corpus')]
    outputs = [(IO.CORPUS, Corpus)]

    want_main_area = False
    resizing_enabled = False

    token = Setting('')
    model_name = Setting('')
    output_mode = Setting('')
    tweet_attr = Setting(0)
    auto_commit = Setting(True)

    class Error(OWWidget.Error):
        server_down = Msg('Our servers are not responding. '
                          'Please try again later.')
        invalid_token = Msg('This token is invalid')
        no_credit = Msg('Too little credits for this data set')

    def __init__(self):
        super().__init__()
        self.corpus = None
        self.strings_attrs = []
        self.profiler = TweetProfiler(
            token=self.token,
            on_server_down=self.Error.server_down,
            on_invalid_token=self.Error.invalid_token,
            on_too_little_credit=self.Error.no_credit,
        )

        # Info box
        self.n_documents = ''
        self.credit = 0
        box = gui.widgetBox(self.controlArea, "Info")
        gui.label(box, self, 'Documents: %(n_documents)s')
        gui.label(box, self, 'Credits: %(credit)s')

        # Settings
        self.controlArea.layout().addWidget(self.generate_grid_layout())

        # Server token
        box = gui.vBox(self.controlArea, 'Server Token')
        gui.lineEdit(box, self, 'token', callback=self.token_changed,
                     controlWidth=300)
        gui.button(box, self, 'Get Token', callback=self.get_new_token)

        # Auto commit
        buttons_layout = QtGui.QHBoxLayout()
        buttons_layout.addWidget(self.report_button)
        buttons_layout.addSpacing(15)
        buttons_layout.addWidget(
            gui.auto_commit(None, self, 'auto_commit', 'Commit', box=False)
        )
        self.controlArea.layout().addLayout(buttons_layout)

        self.refresh_token_info()

    def generate_grid_layout(self):
        box = QtGui.QGroupBox(title='Options')

        layout = QGridLayout()
        layout.setSpacing(10)
        row = 0

        self.tweet_attr_combo = gui.comboBox(None, self, 'tweet_attr',
                                             callback=self.apply)
        layout.addWidget(QLabel('Attribute:'))
        layout.addWidget(self.tweet_attr_combo, row, 1)

        row += 1
        self.model_name_combo = gui.comboBox(None, self, 'model_name',
                                             items=self.profiler.model_names,
                                             sendSelectedValue=True,
                                             callback=self.apply)
        if self.profiler.model_names:
            self.model_name = self.profiler.model_names[0]  # select 0th
        layout.addWidget(QLabel('Emotions:'))
        layout.addWidget(self.model_name_combo, row, 1)

        row += 1
        self.output_mode_combo = gui.comboBox(None, self, 'output_mode',
                                              items=self.profiler.output_modes,
                                              sendSelectedValue=True,
                                              callback=self.apply)
        if self.profiler.output_modes:
            self.output_mode = self.profiler.output_modes[0]    # select 0th
        layout.addWidget(QLabel('Output:'))
        layout.addWidget(self.output_mode_combo, row, 1)

        box.setLayout(layout)
        return box

    def set_corpus(self, corpus):
        self.corpus = corpus

        if corpus is not None:
            self.strings_attrs = [a for a in self.corpus.domain.metas
                                  if isinstance(a, StringVariable)]
            self.tweet_attr_combo.setModel(VariableListModel(self.strings_attrs))
            self.tweet_attr_combo.currentIndexChanged.emit(self.tweet_attr)

            # select the first feature from 'text_features' if present
            ind = [self.strings_attrs.index(tf)
                   for tf in corpus.text_features
                   if tf in self.strings_attrs]
            if ind:
                self.tweet_attr = ind[0]

            self.n_documents = len(corpus)
        self.commit()

    def apply(self):
        self.commit()

    def commit(self):
        self.Error.clear()

        if self.corpus is not None:
            with self.progressBar(iterations=len(self.corpus)) as pb:
                out = self.profiler.transform(
                    self.corpus, self.strings_attrs[self.tweet_attr],
                    self.model_name, self.output_mode,
                    on_advance=pb.advance)
            self.send(IO.CORPUS, out)
        else:
            self.send(IO.CORPUS, None)

        self.refresh_token_info()

    def get_new_token(self):
        self.Warning.clear()
        self.profiler.new_token()
        self.token = self.profiler.token
        self.refresh_token_info()
        self.commit()

    def token_changed(self):
        self.profiler.token = self.token
        self.refresh_token_info()
        self.commit()

    def refresh_token_info(self):
        self.credit = str(self.profiler.get_credit())

    def send_report(self):
        self.report_items([
            ('Documents', self.n_documents),
            ('Attribute', self.strings_attrs[self.tweet_attr]
                if len(self.strings_attrs) > self.tweet_attr else ''),
            ('Emotions', self.model_name),
            ('Output', self.output_mode),
        ])
예제 #2
0
class TestTweetProfiler(unittest.TestCase):
    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def setUp(self):
        self.data = Corpus.from_file('Election-2016-Tweets.tab')[:100]
        self.profiler = TweetProfiler()

    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_get_server_address(self):
        server = self.profiler.get_server_address()
        self.assertTrue(server.startswith('http'))

    @patch(SERVER_CALL, MockServerCall())
    def test_get_configuration(self):
        self.assertEqual(self.profiler.model_names, MODELS)
        self.assertEqual(self.profiler.output_modes, MODES)

    @patch(SERVER_CALL, MockServerCall())
    def test_get_token(self):
        self.assertIsNone(self.profiler.token)
        self.profiler.new_token()
        self.assertEqual(self.profiler.token, TOKEN)

    @patch(SERVER_CALL, MockServerCall())
    def test_is_token_valid(self):
        self.assertEqual(self.profiler.is_token_valid(), VALID)

    @patch(SERVER_CALL, MockServerCall())
    def test_get_credit(self):
        self.assertEqual(self.profiler.get_credit(), COINS)

    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_transform_embeddings(self):
        advance_call_mock = Mock()
        text_var = self.data.domain.metas[1]
        corp = self.profiler.transform(self.data,
                                       text_var,
                                       'model-mc',
                                       'Embeddings',
                                       on_advance=advance_call_mock)
        self.assertIsInstance(corp, Corpus)
        self.assertEqual(advance_call_mock.call_count, 2)
        self.assertEqual(
            len(corp.domain.attributes) - len(self.data.domain.attributes),
            EMBEDDINGS_NUM)
        self.assertEqual(corp.X.shape[1] - self.data.X.shape[1],
                         EMBEDDINGS_NUM)
        self.assertEqual(np.sum(corp.X[:, -EMBEDDINGS_NUM]), 0)

    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_transform_probabilities_and_ml_classes(self):
        text_var = self.data.domain.metas[1]
        for mode in ['Probabilities', 'Classes']:
            corp = self.profiler.transform(self.data, text_var, 'model-ml',
                                           mode)
            self.assertIsInstance(corp, Corpus)
            self.assertEqual(
                len(corp.domain.attributes) - len(self.data.domain.attributes),
                len(PROFILE_CLASSES))
            self.assertEqual(corp.X.shape[1] - self.data.X.shape[1],
                             len(PROFILE_CLASSES))
            self.assertEqual(np.sum(corp.X[:, -len(PROFILE_CLASSES)]), 0)

    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_transform_mc_classes(self):
        text_var = self.data.domain.metas[1]
        corp = self.profiler.transform(self.data, text_var, 'model-mc',
                                       'Classes')
        self.assertIsInstance(corp, Corpus)
        self.assertEqual(
            len(corp.domain.attributes) - len(self.data.domain.attributes), 1)
        self.assertEqual(corp.X.shape[1] - self.data.X.shape[1], 1)
        self.assertEqual(np.sum(corp.X[:, -1]), 0)

    @patch(CHECK_ALIVE, Mock(return_value=False))
    def test_transform_probabilities(self):
        text_var = self.data.domain.metas[1]
        corp = self.profiler.transform(self.data, text_var, MODELS[0],
                                       'Classes')
        self.assertIsNone(corp)

    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_transform_empty_corpus(self):
        text_var = self.data.domain.metas[1]
        corp = self.profiler.transform(self.data[:0], text_var, 'model-mc',
                                       'Classes')
        self.assertIsNone(corp)

    @patch(SERVER_CALL, MockServerCallSuddenlyUnavailable())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_transform_server_call_error(self):
        text_var = self.data.domain.metas[1]
        corp = self.profiler.transform(self.data, text_var, 'model-mc',
                                       'Classes')
        self.assertIsNone(corp)
예제 #3
0
class TestTweetProfiler(unittest.TestCase):
    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def setUp(self):
        self.data = Corpus.from_file('Election-2016-Tweets.tab')[:100]
        self.profiler = TweetProfiler()

    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_get_server_address(self):
        server = self.profiler.get_server_address()
        self.assertTrue(server.startswith('http'))

    @patch(SERVER_CALL, MockServerCall())
    def test_get_configuration(self):
        self.assertEqual(self.profiler.model_names, MODELS)
        self.assertEqual(self.profiler.output_modes, MODES)

    @patch(SERVER_CALL, MockServerCall())
    def test_get_token(self):
        self.assertIsNone(self.profiler.token)
        self.profiler.new_token()
        self.assertEqual(self.profiler.token, TOKEN)

    @patch(SERVER_CALL, MockServerCall())
    def test_is_token_valid(self):
        self.assertEqual(self.profiler.is_token_valid(), VALID)

    @patch(SERVER_CALL, MockServerCall())
    def test_get_credit(self):
        self.assertEqual(self.profiler.get_credit(), COINS)

    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_transform_embeddings(self):
        advance_call_mock = Mock()
        text_var = self.data.domain.metas[1]
        corp = self.profiler.transform(self.data, text_var,
                                       'model-mc', 'Embeddings',
                                       on_advance=advance_call_mock)
        self.assertIsInstance(corp, Corpus)
        self.assertEqual(advance_call_mock.call_count, 2)
        self.assertEqual(len(corp.domain.attributes) -
                         len(self.data.domain.attributes),
                         EMBEDDINGS_NUM)
        self.assertEqual(corp.X.shape[1] - self.data.X.shape[1],
                         EMBEDDINGS_NUM)
        self.assertEqual(np.sum(corp.X[:, -EMBEDDINGS_NUM]), 0)

    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_transform_probabilities_and_ml_classes(self):
        text_var = self.data.domain.metas[1]
        for mode in ['Probabilities', 'Classes']:
            corp = self.profiler.transform(self.data, text_var,
                                           'model-ml', mode)
            self.assertIsInstance(corp, Corpus)
            self.assertEqual(len(corp.domain.attributes) -
                             len(self.data.domain.attributes),
                             len(PROFILE_CLASSES))
            self.assertEqual(corp.X.shape[1] - self.data.X.shape[1],
                             len(PROFILE_CLASSES))
            self.assertEqual(np.sum(corp.X[:, -len(PROFILE_CLASSES)]), 0)

    @patch(SERVER_CALL, MockServerCall())
    @patch(CHECK_ALIVE, Mock(return_value=True))
    def test_transform_mc_classes(self):
        text_var = self.data.domain.metas[1]
        corp = self.profiler.transform(self.data, text_var,
                                       'model-mc', 'Classes')
        self.assertIsInstance(corp, Corpus)
        self.assertEqual(len(corp.domain.attributes) -
                         len(self.data.domain.attributes),
                         1)
        self.assertEqual(corp.X.shape[1] - self.data.X.shape[1], 1)
        self.assertEqual(np.sum(corp.X[:, -1]), 0)

    @patch(CHECK_ALIVE, Mock(return_value=False))
    def test_transform_probabilities(self):
        text_var = self.data.domain.metas[1]
        corp = self.profiler.transform(self.data, text_var,
                                       MODELS[0], 'Classes')
        self.assertIs(corp, self.data)
예제 #4
0
class OWTweetProfiler(OWWidget):
    name = "Tweet Profiler"
    description = "Detect Ekman's, Plutchik's or Profile of Mood States's " \
                  "emotions in tweets."
    icon = "icons/TweetProfiler.svg"
    priority = 46

    inputs = [(IO.CORPUS, Corpus, 'set_corpus')]
    outputs = [(IO.CORPUS, Corpus)]

    want_main_area = False
    resizing_enabled = False

    token = Setting('')
    model_name = Setting('')
    output_mode = Setting('')
    tweet_attr = Setting(0)
    auto_commit = Setting(True)

    class Error(OWWidget.Error):
        server_down = Msg('Our servers are not responding. '
                          'Please try again later.')
        invalid_token = Msg('This token is invalid')
        no_credit = Msg('Too little credits for this data set')

    def __init__(self):
        super().__init__()
        self.corpus = None
        self.strings_attrs = []
        self.profiler = TweetProfiler(
            token=self.token,
            on_server_down=self.Error.server_down,
            on_invalid_token=self.Error.invalid_token,
            on_too_little_credit=self.Error.no_credit,
        )

        # Info box
        self.n_documents = ''
        self.credit = 0
        box = gui.widgetBox(self.controlArea, "Info")
        gui.label(box, self, 'Documents: %(n_documents)s')
        gui.label(box, self, 'Credits: %(credit)s')

        # Settings
        self.controlArea.layout().addWidget(self.generate_grid_layout())

        # Server token
        box = gui.vBox(self.controlArea, 'Server Token')
        gui.lineEdit(box,
                     self,
                     'token',
                     callback=self.token_changed,
                     controlWidth=300)
        gui.button(box, self, 'Get Token', callback=self.get_new_token)

        # Auto commit
        buttons_layout = QtGui.QHBoxLayout()
        buttons_layout.addWidget(self.report_button)
        buttons_layout.addSpacing(15)
        buttons_layout.addWidget(
            gui.auto_commit(None, self, 'auto_commit', 'Commit', box=False))
        self.controlArea.layout().addLayout(buttons_layout)

        self.refresh_token_info()

    def generate_grid_layout(self):
        box = QtGui.QGroupBox(title='Options')

        layout = QGridLayout()
        layout.setSpacing(10)
        row = 0

        self.tweet_attr_combo = gui.comboBox(None,
                                             self,
                                             'tweet_attr',
                                             callback=self.apply)
        layout.addWidget(QLabel('Attribute:'))
        layout.addWidget(self.tweet_attr_combo, row, 1)

        row += 1
        self.model_name_combo = gui.comboBox(None,
                                             self,
                                             'model_name',
                                             items=self.profiler.model_names,
                                             sendSelectedValue=True,
                                             callback=self.apply)
        if self.profiler.model_names:
            self.model_name = self.profiler.model_names[0]  # select 0th
        layout.addWidget(QLabel('Emotions:'))
        layout.addWidget(self.model_name_combo, row, 1)

        row += 1
        self.output_mode_combo = gui.comboBox(None,
                                              self,
                                              'output_mode',
                                              items=self.profiler.output_modes,
                                              sendSelectedValue=True,
                                              callback=self.apply)
        if self.profiler.output_modes:
            self.output_mode = self.profiler.output_modes[0]  # select 0th
        layout.addWidget(QLabel('Output:'))
        layout.addWidget(self.output_mode_combo, row, 1)

        box.setLayout(layout)
        return box

    def set_corpus(self, corpus):
        self.corpus = corpus

        if corpus is not None:
            self.strings_attrs = [
                a for a in self.corpus.domain.metas
                if isinstance(a, StringVariable)
            ]
            self.tweet_attr_combo.setModel(
                VariableListModel(self.strings_attrs))
            self.tweet_attr_combo.currentIndexChanged.emit(self.tweet_attr)

            # select the first feature from 'text_features' if present
            ind = [
                self.strings_attrs.index(tf) for tf in corpus.text_features
                if tf in self.strings_attrs
            ]
            if ind:
                self.tweet_attr = ind[0]

            self.n_documents = len(corpus)
        self.commit()

    def apply(self):
        self.commit()

    def commit(self):
        self.Error.clear()

        if self.corpus is not None:
            with self.progressBar(iterations=len(self.corpus)) as pb:
                out = self.profiler.transform(
                    self.corpus,
                    self.strings_attrs[self.tweet_attr],
                    self.model_name,
                    self.output_mode,
                    on_advance=pb.advance)
            self.send(IO.CORPUS, out)
        else:
            self.send(IO.CORPUS, None)

        self.refresh_token_info()

    def get_new_token(self):
        self.Warning.clear()
        self.profiler.new_token()
        self.token = self.profiler.token
        self.refresh_token_info()
        self.commit()

    def token_changed(self):
        self.profiler.token = self.token
        self.refresh_token_info()
        self.commit()

    def refresh_token_info(self):
        self.credit = str(self.profiler.get_credit())

    def send_report(self):
        self.report_items([
            ('Documents', self.n_documents),
            ('Attribute', self.strings_attrs[self.tweet_attr]
             if len(self.strings_attrs) > self.tweet_attr else ''),
            ('Emotions', self.model_name),
            ('Output', self.output_mode),
        ])