def test_graphemes(self): t = Tokenizer() result = t.graphemes("Màttís List") self.assertEqual(result, "M à t t í s # L i s t") result = self.t.graphemes("Màttís List") self.assertEqual(result, "M à tt í s # ? ? s ?")
def test_graphemes(self): t = Tokenizer() result = t.graphemes("aabchonn-ih") self.assertEqual(result, "a a b c h o n n - i h") result = self.t.graphemes("aabchonn-ih") self.assertEqual(result, "aa b ch on n - ih")
def test_characters(self): t = Tokenizer() result = t.characters("Màttís List") self.assertEqual(result, "M a ̀ t t i ́ s # L i s t") result = self.t.characters("Màttís List") self.assertEqual(result, "M a ̀ t t i ́ s # L i s t")
class TokenizerTestCase(unittest.TestCase): """ Tests for tokenizer.py """ def setUp(self): self.t = Tokenizer(os.path.join(os.path.dirname(__file__), 'test.prf')) def test_printTree(self): self.t.tree.printTree(self.t.tree.root) printMultigraphs(self.t.tree.root, '', '') printMultigraphs(self.t.tree.root, 'abcd', '') def test_characters(self): t = Tokenizer() result = t.characters("Màttís List") self.assertEqual(result, "M a ̀ t t i ́ s # L i s t") result = self.t.characters("Màttís List") self.assertEqual(result, "M a ̀ t t i ́ s # L i s t") def test_graphemes(self): t = Tokenizer() result = t.graphemes("Màttís List") self.assertEqual(result, "M à t t í s # L i s t") result = self.t.graphemes("Màttís List") self.assertEqual(result, "M à tt í s # ? ? s ?") def test_grapheme_clusters(self): result = self.t.grapheme_clusters("Màttís List") self.assertEqual(result, "M à t t í s # L i s t") def test_transform1(self): result = self.t.transform("Màttís List") self.assertEqual(result, "M à tt í s # ? ? s ?") def test_transform2(self): result = self.t.transform("Màttís List", 'ipa') self.assertEqual(result, "m a tː i s # ? ? s ?") def test_transform3(self): result = self.t.transform("Màttís List", 'funny') self.assertEqual(result, "J e l n a # ? ? a ?") def test_rules(self): result = self.t.rules("Màttís List") self.assertEqual(result, "Jelena") def test_transform_rules(self): result = self.t.transform_rules("Màttís List") self.assertEqual(result, "M à e l ?") def test_find_missing_characters(self): result = self.t.find_missing_characters("L i s t") self.assertEqual(result, "? ? s ?") def test_tokenize_ipa(self): t = Tokenizer() t.tokenize_ipa("Màttís List")
def test_kabiye(self): t = Tokenizer() input, gold = jipa("Kabiye_input.txt", "Kabiye_output.txt") result = t.tokenize_ipa(input) self.assertEqual(result, gold)
def setUp(self): self.t = Tokenizer(_test_path('test.prf'))
class TokenizerTestCase(unittest.TestCase): """ Tests for tokenizer.py """ maxDiff = None # for printing large output def setUp(self): self.t = Tokenizer(_test_path('test.prf')) def test_printTree(self): self.t.tree.printTree(self.t.tree.root) printMultigraphs(self.t.tree.root, '', '') printMultigraphs(self.t.tree.root, 'abcd', '') def test_kabiye(self): t = Tokenizer() input, gold = jipa("Kabiye_input.txt", "Kabiye_output.txt") result = t.tokenize_ipa(input) self.assertEqual(result, gold) def test_portuguese(self): t = Tokenizer() input, gold = jipa("Brazilian_Portuguese_input.txt", "Brazilian_Portuguese_output.txt") result = t.tokenize_ipa(input) self.assertEqual(result, gold) def test_vietnamese(self): t = Tokenizer() input, gold = jipa("Vietnamese_input.txt", "Vietnamese_output.txt") result = t.tokenize_ipa(input) self.assertEqual(result, gold) def test_german(self): t = Tokenizer() input, gold = jipa("Zurich_German_input.txt", "Zurich_German_output.txt") result = t.tokenize_ipa(input) self.assertEqual(result, gold) def test_characters(self): t = Tokenizer() result = t.characters("ĉháɾã̌ctʼɛ↗ʐː| k͡p") self.assertEqual(result, "c ̂ h a ́ ɾ a ̃ ̌ c t ʼ ɛ ↗ ʐ ː | # k ͡ p") def test_grapheme_clusters(self): t = Tokenizer() result = t.grapheme_clusters("ĉháɾã̌ctʼɛ↗ʐː| k͡p") self.assertEqual(result, "ĉ h á ɾ ã̌ c t ʼ ɛ ↗ ʐ ː | # k͡ p") def test_graphemes(self): t = Tokenizer() result = t.graphemes("aabchonn-ih") self.assertEqual(result, "a a b c h o n n - i h") result = self.t.graphemes("aabchonn-ih") self.assertEqual(result, "aa b ch on n - ih") def test_transform1(self): result = self.t.transform("aabchonn-ih") self.assertEqual(result, "aa b ch on n - ih") def test_transform2(self): result = self.t.transform("aabchonn-ih", "ipa") self.assertEqual(result, "aː b tʃ õ n í") def test_transform3(self): result = self.t.transform("aabchonn-ih", "XSAMPA") self.assertEqual(result, "a: b tS o~ n i_H") def test_rules(self): result = self.t.rules("aabchonn-ih") self.assertEqual(result, "ii-ii") def test_transform_rules(self): result = self.t.transform_rules("aabchonn-ih") self.assertEqual(result, "b b ii - ii") def test_find_missing_characters(self): result = self.t.find_missing_characters("aa b ch on n - ih x y z") self.assertEqual(result, "aa b ch on n - ih ? ? ?")
def test_portuguese(self): t = Tokenizer() input, gold = jipa("Brazilian_Portuguese_input.txt", "Brazilian_Portuguese_output.txt") result = t.tokenize_ipa(input) self.assertEqual(result, gold)
def test_grapheme_clusters(self): t = Tokenizer() result = t.grapheme_clusters("ĉháɾã̌ctʼɛ↗ʐː| k͡p") self.assertEqual(result, "ĉ h á ɾ ã̌ c t ʼ ɛ ↗ ʐ ː | # k͡ p")
def test_german(self): t = Tokenizer() input, gold = jipa("Zurich_German_input.txt", "Zurich_German_output.txt") result = t.tokenize_ipa(input) self.assertEqual(result, gold)
def setUp(self): self.t = Tokenizer(os.path.join(os.path.dirname(__file__), 'test.prf'))
def test_characters(self): t = Tokenizer() result = t.characters("ĉháɾã̌ctʼɛ↗ʐː| k͡p") self.assertEqual(result, "c ̂ h a ́ ɾ a ̃ ̌ c t ʼ ɛ ↗ ʐ ː | # k ͡ p")
def test_vietnamese(self): t = Tokenizer() input, gold = jipa("Vietnamese_input.txt", "Vietnamese_output.txt") result = t.tokenize_ipa(input) self.assertEqual(result, gold)
def test_tokenize_ipa(self): t = Tokenizer() t.tokenize_ipa("Màttís List")