예제 #1
0
class Test(unittest.TestCase):
    def setUp(self):
        self.Tokenizer = Tokenizer()

    # unittests for tokenize method
    def test_list_type_output(self):
        result = self.Tokenizer.tokenize('some string')
        self.assertIsInstance(result, list)

    def test_list_type_number(self):
        with self.assertRaises(ValueError):
            self.Tokenizer.tokenize(13)

    def test_list_type_notlist(self):
        with self.assertRaises(ValueError):
            self.Tokenizer.tokenize([15, 'abc', 'stream'])

    def test_list_result_empty(self):
        result = self.Tokenizer.tokenize('')
        self.assertEqual(len(result), 0)

    def test_list_result_both_alpha(self):
        result = self.Tokenizer.tokenize('test !!!111some ,.,. string')
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].string, 'test')
        self.assertEqual(result[0].position, 0)
        self.assertEqual(result[2].string, 'string')
        self.assertEqual(result[2].position, 21)

    def test_list_result_both_notalpha(self):
        result = self.Tokenizer.tokenize(':)test !!!111some ,.,. string**')
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].string, 'test')
        self.assertEqual(result[0].position, 2)
        self.assertEqual(result[2].string, 'string')
        self.assertEqual(result[2].position, 23)

    def test_list_result_alpha_nonalpha(self):
        result = self.Tokenizer.tokenize('test !!!111some ,.,. string$^)')
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].string, 'test')
        self.assertEqual(result[0].position, 0)
        self.assertEqual(result[2].string, 'string')
        self.assertEqual(result[2].position, 21)

    def test_list_result_nonalpha_alpha(self):
        result = self.Tokenizer.tokenize('{test !!!111some ,.,. string')
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].string, 'test')
        self.assertEqual(result[0].position, 1)
        self.assertEqual(result[2].string, 'string')
        self.assertEqual(result[2].position, 22)

    # unittests for gentokenize method
    def test_gen_type_number(self):
        with self.assertRaises(ValueError):
            result = self.Tokenizer.gentokenize(13)
            next(result)

    def test_gen_type_notlist(self):
        with self.assertRaises(ValueError):
            result = self.Tokenizer.gentokenize([15, 'abc', 'stream'])
            next(result)

    def test_gen_result_empty(self):
        result = self.Tokenizer.gentokenize('')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 0)

    def test_gen_result_both_alpha(self):
        result = self.Tokenizer.gentokenize('test !!!111some ,.,. string')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 3)
        self.assertEqual(resultlist[0].string, 'test')
        self.assertEqual(resultlist[0].position, 0)
        self.assertEqual(resultlist[2].string, 'string')
        self.assertEqual(resultlist[2].position, 21)

    def test_gen_result_both_notalpha(self):
        result = self.Tokenizer.gentokenize(':)test !!!111some ,.,. string**')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 3)
        self.assertEqual(resultlist[0].string, 'test')
        self.assertEqual(resultlist[0].position, 2)
        self.assertEqual(resultlist[2].string, 'string')
        self.assertEqual(resultlist[2].position, 23)

    def test_gen_result_alpha_nonalpha(self):
        result = self.Tokenizer.gentokenize('test !!!111some ,.,. string$^)')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 3)
        self.assertEqual(resultlist[0].string, 'test')
        self.assertEqual(resultlist[0].position, 0)
        self.assertEqual(resultlist[2].string, 'string')
        self.assertEqual(resultlist[2].position, 21)

    def test_gen_result_nonalpha_alpha(self):
        result = self.Tokenizer.gentokenize('{test !!!111some ,.,. string')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 3)
        self.assertEqual(resultlist[0].string, 'test')
        self.assertEqual(resultlist[0].position, 1)
        self.assertEqual(resultlist[2].string, 'string')
        self.assertEqual(resultlist[2].position, 22)

    # unittests for genclasstokenize method
    def test_gen_class_type_number(self):
        with self.assertRaises(ValueError):
            result = self.Tokenizer.genclasstokenize(13)
            next(result)

    def test_gen_class_type_notlist(self):
        with self.assertRaises(ValueError):
            result = self.Tokenizer.genclasstokenize([15, 'abc', 'stream'])
            next(result)

    def test_gen_class_result_empty(self):
        result = self.Tokenizer.genclasstokenize('')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 0)

    def test_gen_class_result_one(self):
        result = self.Tokenizer.genclasstokenize('some string')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 3)
        self.assertEqual(resultlist[0].string, 'some')
        self.assertEqual(resultlist[0].position, 0)
        self.assertEqual(resultlist[0].category, "alpha")
        self.assertEqual(resultlist[1].string, ' ')
        self.assertEqual(resultlist[1].position, 4)
        self.assertEqual(resultlist[1].category, "space")
        self.assertEqual(resultlist[2].string, 'string')
        self.assertEqual(resultlist[2].position, 5)
        self.assertEqual(resultlist[2].category, "alpha")

    def test_gen_class_result_two(self):
        result = self.Tokenizer.genclasstokenize('!!some bloody string 123**')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 9)
        self.assertEqual(resultlist[0].string, '!!')
        self.assertEqual(resultlist[0].position, 0)
        self.assertEqual(resultlist[0].category, "punct")
        self.assertEqual(resultlist[7].string, '123')
        self.assertEqual(resultlist[7].position, 21)
        self.assertEqual(resultlist[7].category, "digit")

    def test_gen_class_result_three(self):
        result = self.Tokenizer.genclasstokenize('test™test* *test')
        resultlist = list(result)
        self.assertEqual(len(resultlist), 7)
        self.assertEqual(resultlist[1].string, '™')
        self.assertEqual(resultlist[1].position, 4)
        self.assertEqual(resultlist[1].category, "other")