Esempio n. 1
0
class TestSentenceTokenizer(unittest.TestCase):
    def setUp(self):
        self.tokenizer = SentenceTokenizer()
        self.text = "Beautiful is better than ugly. Simple is better than complex."

    def test_tokenize(self):
        assert_equal(self.tokenizer.tokenize(self.text), [
            "Beautiful is better than ugly.", "Simple is better than complex."
        ])

    def test_tokenize_with_multiple_punctuation(self):
        text = "Hello world. How do you do?! My name's Steve..."
        assert_equal(self.tokenizer.tokenize(text),
                     ["Hello world.", "How do you do?!", "My name's Steve..."])
        text2 = 'OMG! I am soooo LOL!!!'
        tokens = self.tokenizer.tokenize(text2)
        assert_equal(len(tokens), 2)
        assert_equal(tokens, ["OMG!", "I am soooo LOL!!!"])
Esempio n. 2
0
class TestSentenceTokenizer(unittest.TestCase):

    def setUp(self):
        self.tokenizer = SentenceTokenizer()
        self.text = "Beautiful is better than ugly. Simple is better than complex."

    def test_tokenize(self):
        assert_equal(self.tokenizer.tokenize(self.text),
            ["Beautiful is better than ugly.", "Simple is better than complex."])
Esempio n. 3
0
class TestSentenceTokenizer(unittest.TestCase):

    def setUp(self):
        self.tokenizer = SentenceTokenizer()
        self.text = "Beautiful is better than ugly. Simple is better than complex."

    def test_tokenize(self):
        assert_equal(self.tokenizer.tokenize(self.text),
            ["Beautiful is better than ugly.", "Simple is better than complex."])

    def test_tokenize_with_multiple_punctuation(self):
        text = "Hello world. How do you do?! My name's Steve..."
        assert_equal(self.tokenizer.tokenize(text),
            ["Hello world.", "How do you do?!", "My name's Steve..."])
        text2 = 'OMG! I am soooo LOL!!!'
        tokens = self.tokenizer.tokenize(text2)
        assert_equal(len(tokens), 2)
        assert_equal(tokens,
            ["OMG!", "I am soooo LOL!!!"])