Exemple #1
0
    def test_remove_stop_words(self):
        stopwords_manager = StopWordsManager()

        tokens = ['this', 'is', 'a', 'test', 'string']
        words = stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))
Exemple #2
0
    def test_remove_stop_words(self):
        stopwords_manager = StopWordsManager()

        tokens = ['this', 'is', 'a', 'test', 'string']
        words = stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))
Exemple #3
0
class StopWordsTestCase(TestCase):
    def setUp(self):
        super(StopWordsTestCase, self).setUp()
        from chatterbot.utils.stop_words import StopWordsManager

        self.stopwords_manager = StopWordsManager()

    def test_remove_stop_words(self):
        tokens = ['this', 'is', 'a', 'test', 'string']
        words = self.stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))
class StopWordsTestCase(TestCase):

    def setUp(self):
        super(StopWordsTestCase, self).setUp()
        from chatterbot.utils.stop_words import StopWordsManager

        self.stopwords_manager = StopWordsManager()

    def test_remove_stop_words(self):
        tokens = ['this', 'is', 'a', 'test', 'string']
        words = self.stopwords_manager.remove_stopwords('english', tokens)

        # This example list of words should end up with only two elements
        self.assertEqual(len(words), 2)
        self.assertIn('test', list(words))
        self.assertIn('string', list(words))
    def get_tokens(self, text, language='english', exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple of each word.
        Skips common stop words such as ("is, the, a, ...")
        if 'exclude_stop_words' is True.
        """
        from chatterbot.utils.stop_words import StopWordsManager
        from nltk import word_tokenize

        stopwords = StopWordsManager()
        tokens = word_tokenize(text.lower())

        # Remove all stop words from the list of word tokens
        if exclude_stop_words:
            tokens = stopwords.remove_stopwords(language, tokens)

        return tokens
    def get_tokens(self, text, language='english', exclude_stop_words=True):
        """
        Takes a string and converts it to a tuple of each word.
        Skips common stop words such as ("is, the, a, ...")
        if 'exclude_stop_words' is True.
        """
        from chatterbot.utils.stop_words import StopWordsManager
        from nltk import word_tokenize

        stopwords = StopWordsManager()
        tokens = word_tokenize(text.lower())

        # Remove all stop words from the list of word tokens
        if exclude_stop_words:
            tokens = stopwords.remove_stopwords(language, tokens)

        return tokens