Ejemplo n.º 1
0
 def test_Tokens2(self):    
     """Word Tokenize a string with non-word characters"""
     line2 = "This$ is# a 6 line43 of text'ed words"
     line_expect = ["This","is","a","line","of",u"text\u02bbed","words"]
     test2 = to()
     Tokens = test2.w_Tokens(line2)
     assert Tokens == line_expect
Ejemplo n.º 2
0
 def test_Tokens1(self):    
     """Word Tokenize a string with a glottal"""
     line1 = "This is a line of text'ed words"
     line_expect = [u"This",u"is",u"a",u"line",u"of",u"text\u02bbed",u"words"]
     test1 = to()
     Tokens = test1.w_Tokens(line1)
     assert Tokens == line_expect
Ejemplo n.º 3
0
 def test_Tokens5(self):
     """Word Tokenize a string with apostropne pre-pending words
     beginning with a vowel (different meaning in Tongan Language than English)"""
     line5 = "This 'is 'a 'line of text'ed' words"
     line_expect = ["This",u"\u02bbis",u"\u02bba","line","of",u"text\u02bbed","words"]
     test5 = to()
     Tokens = test5.w_Tokens(line5)
     assert Tokens == line_expect
Ejemplo n.º 4
0
 def test_Tokens4(self):    
     """Word Tokenize a string with apostrophe [pre|post]-pending words 
     (including the last word in a string)"""
     line4 = "This is a line of 'text'ed' words'"
     line_expect = ["This","is","a","line","of",u"text\u02bbed","words"]
     test4 = to()
     Tokens = test4.w_Tokens(line4)
     assert Tokens == line_expect
Ejemplo n.º 5
0
 def test_Tokens3(self):    
     """Word Tokenize a string with apostrophe pre-pending words 
     and ending words"""
     line3 = "This is a line of 'text'ed' words'"
     line_expect = ["This","is","a","line","of",u"text\u02bbed","words"]
     test3 = to()
     Tokens = test3.w_Tokens(line3)
     assert Tokens == line_expect        
Ejemplo n.º 6
0
 def test_ctranspose(self):
     """Words contain intermingled msword smart single quotes ` and ' 
     They all become glottals in Tongan Language"""
     line = """\x91Iteita tamasi\x92i \x91i mu\x92a fa\x92iteliha;"""
     line_expect = u"""\u02bbIteita tamasi\u02bbi \u02bbi mu\u02bba fa\u02bbiteliha;"""
     test = to()
     transpose = test.c_transpose([line.decode('cp1252')])[0]
     assert transpose == line_expect
Ejemplo n.º 7
0
 def test_ctranspose(self):
     """Words contain intermingled msword smart single quotes ` and ' 
     They all become glottals in Tongan Language"""
     line="""\x91Iteita tamasi\x92i \x91i mu\x92a fa\x92iteliha;"""
     line_expect=u"""\u02bbIteita tamasi\u02bbi \u02bbi mu\u02bba fa\u02bbiteliha;"""
     test = to()
     transpose = test.c_transpose([line.decode('cp1252')])[0]
     assert transpose == line_expect
Ejemplo n.º 8
0
 def test_Tokens1(self):
     """Word Tokenize a string with a glottal"""
     line1 = "This is a line of text'ed words"
     line_expect = [
         u"This", u"is", u"a", u"line", u"of", u"text\u02bbed", u"words"
     ]
     test1 = to()
     Tokens = test1.w_Tokens(line1)
     assert Tokens == line_expect
Ejemplo n.º 9
0
 def test_Tokens2(self):
     """Word Tokenize a string with non-word characters"""
     line2 = "This$ is# a 6 line43 of text'ed words"
     line_expect = [
         "This", "is", "a", "line", "of", u"text\u02bbed", "words"
     ]
     test2 = to()
     Tokens = test2.w_Tokens(line2)
     assert Tokens == line_expect
Ejemplo n.º 10
0
 def test_Tokens4(self):
     """Word Tokenize a string with apostrophe [pre|post]-pending words 
     (including the last word in a string)"""
     line4 = "This is a line of 'text'ed' words'"
     line_expect = [
         "This", "is", "a", "line", "of", u"text\u02bbed", "words"
     ]
     test4 = to()
     Tokens = test4.w_Tokens(line4)
     assert Tokens == line_expect
Ejemplo n.º 11
0
 def test_Tokens3(self):
     """Word Tokenize a string with apostrophe pre-pending words 
     and ending words"""
     line3 = "This is a line of 'text'ed' words'"
     line_expect = [
         "This", "is", "a", "line", "of", u"text\u02bbed", "words"
     ]
     test3 = to()
     Tokens = test3.w_Tokens(line3)
     assert Tokens == line_expect
Ejemplo n.º 12
0
 def test_Tokens6(self):
     """Word contains a dierisis and leading glottal"""
     line6 = '\x91Otua Taumai\xe4 \x91al\xe4,'
     #~ line6="""\x91Otua Taumai\xe4 \x91al\xe4"""
     line_expect = [u"\u02bbOtua", u"Taumai\u0101", u"\u02bbal\u0101"]
     test = to()
     line6 = test.c_transpose(line6.decode('cp1252'))
     Tokens = test.w_Tokens(line6)
     assert Tokens[0] == line_expect[0]
     assert Tokens[1] == line_expect[1]
     assert Tokens[2] == line_expect[2]
Ejemplo n.º 13
0
 def test_Tokens5(self):
     """Word Tokenize a string with apostropne pre-pending words
     beginning with a vowel (different meaning in Tongan Language than English)"""
     line5 = "This 'is 'a 'line of text'ed' words"
     line_expect = [
         "This", u"\u02bbis", u"\u02bba", "line", "of", u"text\u02bbed",
         "words"
     ]
     test5 = to()
     Tokens = test5.w_Tokens(line5)
     assert Tokens == line_expect
Ejemplo n.º 14
0
 def test_Tokens6(self):
     """Word contains a dierisis and leading glottal"""
     line6='\x91Otua Taumai\xe4 \x91al\xe4,'
     #~ line6="""\x91Otua Taumai\xe4 \x91al\xe4"""
     line_expect = [u"\u02bbOtua",u"Taumai\u0101",u"\u02bbal\u0101"]
     test = to()
     line6=test.c_transpose(line6.decode('cp1252'))
     Tokens = test.w_Tokens(line6)
     assert Tokens[0] == line_expect[0]
     assert Tokens[1] == line_expect[1]
     assert Tokens[2] == line_expect[2]
Ejemplo n.º 15
0
 def test_Tokens7(self):
     """Word contains a dierisis and leading glottal"""
     line7="Breakdance \x91Otua Taumai\xe4 \x91al\xe4,"
     line_expect = [u"Breakdance", u"\u02bbOtua",u"Taumai\u0101",u"\u02bbal\u0101"]
     test = to()
     line7=test.c_transpose(line7.decode('cp1252'))
     Tokens = test.w_Tokens(line7)
     assert Tokens[0] == line_expect[0]
     assert Tokens[1] == line_expect[1]
     assert Tokens[2] == line_expect[2]
     assert Tokens[3] == line_expect[3]
Ejemplo n.º 16
0
 def test_Tokens8(self):
     """Words contain intermingled msword smart single quotes ` and ' 
     They all become glottals in Tongan Language"""
     line8="\x91Iteita tamasi\x92i \x91i mu\x92a fa\x92iteliha;"
     line_expect = [u"\u02bbIteita", u"tamasi\u02bbi", u"\u02bbi", u"mu\u02bba", u"fa\u02bbiteliha"]
     test = to()
     line8=test.c_transpose(line8.decode('cp1252'))
     Tokens = test.w_Tokens(line8)
     assert Tokens[0] == line_expect[0]
     assert Tokens[1] == line_expect[1]
     assert Tokens[2] == line_expect[2]
     assert Tokens[3] == line_expect[3]
Ejemplo n.º 17
0
 def test_Tokens7(self):
     """Word contains a dierisis and leading glottal"""
     line7 = "Breakdance \x91Otua Taumai\xe4 \x91al\xe4,"
     line_expect = [
         u"Breakdance", u"\u02bbOtua", u"Taumai\u0101", u"\u02bbal\u0101"
     ]
     test = to()
     line7 = test.c_transpose(line7.decode('cp1252'))
     Tokens = test.w_Tokens(line7)
     assert Tokens[0] == line_expect[0]
     assert Tokens[1] == line_expect[1]
     assert Tokens[2] == line_expect[2]
     assert Tokens[3] == line_expect[3]
Ejemplo n.º 18
0
 def test_Tokens8(self):
     """Words contain intermingled msword smart single quotes ` and ' 
     They all become glottals in Tongan Language"""
     line8 = "\x91Iteita tamasi\x92i \x91i mu\x92a fa\x92iteliha;"
     line_expect = [
         u"\u02bbIteita", u"tamasi\u02bbi", u"\u02bbi", u"mu\u02bba",
         u"fa\u02bbiteliha"
     ]
     test = to()
     line8 = test.c_transpose(line8.decode('cp1252'))
     Tokens = test.w_Tokens(line8)
     assert Tokens[0] == line_expect[0]
     assert Tokens[1] == line_expect[1]
     assert Tokens[2] == line_expect[2]
     assert Tokens[3] == line_expect[3]
Ejemplo n.º 19
0
 def test_transform1(self):
     test = to()
     answer = test.c_transform(
         "o", "o", unicodedata.lookup('LATIN SMALL LETTER O WITH MACRON'))
     assert answer == unicodedata.lookup('LATIN SMALL LETTER O WITH MACRON')
Ejemplo n.º 20
0
 def test_transform1(self):
     test = to()
     answer = test.c_transform("o","o",unicodedata.lookup('LATIN SMALL LETTER O WITH MACRON'))
     assert answer == unicodedata.lookup('LATIN SMALL LETTER O WITH MACRON')