def add_ending(self, ending=u""): """ Add an ending to the word, combining vowel-consonant pairs if necessary """ # If ending begins with a vowel, the word must end with a consonant # and this consonant-vowel pair should be combined # If the word doesn't end with a consonant, raise a value error if TamilLetter.is_vowel(ending[0]): if TamilLetter.is_consonant(self.word[-1]): # replace the last letter of the word with the consonant-vowel # combination, then copy over the rest of the ending self.word[-1] = TamilLetter.get_combination(self.word[-1], ending[0]) self.word += ending[1:] # Unless word ends with a consonant, ending cannot start with a vowel else: raise ValueError( """Invalid word-ending pair - ending can only start with a vowel if word ends with consonant""" ) # In all other cases, simply add the ending to the word self.word += ending
def validate(word=u""): """ Asserts that a given word is valid """ # if input was a TamilWord object, extract out the word portion if isinstance(word, TamilWord): word = word.word # simple test: every element of string must be valid Tamil character for codepoint in word: TamilLetter.assert_valid_letter(codepoint) # TODO: implement method this more thoroughly # TODO: check for pulli or combination_ending at beginning of word return True
def validate(word=''): """ Asserts that a given word is valid """ # if input was a TamilWord object, extract out the word portion if isinstance(word, TamilWord): word = word.word # simple test: every element of string must be valid Tamil character for codepoint in word: TamilLetter.assert_valid_letter(codepoint) # TODO: implement method this more thoroughly # TODO: check for pulli or combination_ending at beginning of word return True
def split_syllables(letters=[]): """ Returns the syllables in a given word as a list """ # Generic algorithm: # Each vowel and combination is its own syllable. Consonants and # aytham get added to the end of the previous syllable # ensure that the word is a valid word TamilWord.validate("".join(letters)) # initialize empty list syllables = [] # loop through letters in the word for letter in letters: # if letter is a vowel or combination, it gets its own syllable if TamilLetter.is_combination(letter) or TamilLetter.is_vowel(letter): syllables.append(letter) # if codepoint is a consonant or aytham, add it to the end # of the previously-added codepoint elif TamilLetter.is_consonant(letter) or TamilLetter.is_aytham(letter): # ensure that at least one character already exists if len(syllables) > 0: syllables[-1] = syllables[-1] + letter # if the first letter is a consonant (probably b/c it' s a # loanword), add it to the beginning of the string else: syllables.append(letter) # if codepoint was neither a vowel, aytham, a pulli or a # combination ending, an unexpected error has occurred else: raise Exception( """Unknown error: \'%s\' in word %s is neither a vowel, consonant, combination or aytham""" % (letter, "".join(letters)) ) return syllables
def split_syllables(letters=[]): """ Returns the syllables in a given word as a list """ # Generic algorithm: # Each vowel and combination is its own syllable. Consonants and # aytham get added to the end of the previous syllable # ensure that the word is a valid word TamilWord.validate(''.join(letters)) # initialize empty list syllables = [] # loop through letters in the word for letter in letters: # if letter is a vowel or combination, it gets its own syllable if TamilLetter.is_combination(letter) or \ TamilLetter.is_vowel(letter): syllables.append(letter) # if codepoint is a consonant or aytham, add it to the end # of the previously-added codepoint elif TamilLetter.is_consonant(letter) or \ TamilLetter.is_aytham(letter): # ensure that at least one character already exists if len(syllables) > 0: syllables[-1] = syllables[-1] + letter # if the first letter is a consonant (probably b/c it' s a # loanword), add it to the beginning of the string else: syllables.append(letter) # if codepoint was neither a vowel, aytham, a pulli or a # combination ending, an unexpected error has occurred else: raise Exception("""Unknown error: \'%s\' in word %s is neither a vowel, consonant, combination or aytham""" % (letter, ''.join(letters)))
def split_letters(word=u""): """ Returns the graphemes (i.e. the Tamil characters) in a given word as a list """ # ensure that the word is a valid word TamilWord.validate(word) # list (which will be returned to user) letters = [] # a tuple of all combination endings and of all அ combinations combination_endings = TamilLetter.get_combination_endings() a_combinations = TamilLetter.get_combination_column(u"அ").values() # loop through for codepoint in word: # if codepoint is an அ combination, a vowel, aytham or a space, # add it to the list if ( codepoint in a_combinations or TamilLetter.is_whitespace(codepoint) or TamilLetter.is_vowel(codepoint) or TamilLetter.is_aytham(codepoint) ): letters.append(codepoint) # if codepoint is a combination ending or a pulli ('்'), add it # to the end of the previously-added codepoint elif codepoint in combination_endings or codepoint == TamilLetter.get_pulli(): # ensure that at least one character already exists if len(letters) > 0: letters[-1] = letters[-1] + codepoint # otherwise raise an Error. However, validate_word() # should catch this else: raise ValueError( """Unknown error: Combination ending %s cannot be first character of a word""" % (codepoint) ) # if codepoint was neither a vowel, aytham, a pulli or a # combination ending, an unexpected error has occurred else: raise ValueError( """Unknown error: Codepoint \'%s\' in word %s is neither a vowel, consonant, combination or aytham""" % (codepoint, word) ) # TODO: Write extensive test cases for this return letters
def add_ending(self, ending=''): """ Add an ending to the word, combining vowel-consonant pairs if necessary """ # If ending begins with a vowel, the word must end with a consonant # and this consonant-vowel pair should be combined # If the word doesn't end with a consonant, raise a value error if TamilLetter.is_vowel(ending[0]): if TamilLetter.is_consonant(self.word[-1]): # replace the last letter of the word with the consonant-vowel # combination, then copy over the rest of the ending self.word[-1] = TamilLetter.get_combination(self.word[-1], \ ending[0]) self.word += ending[1:] # Unless word ends with a consonant, ending cannot start with a vowel else: raise ValueError("""Invalid word-ending pair - ending can only start with a vowel if word ends with consonant""") # In all other cases, simply add the ending to the word self.word += ending
def split_letters(word=''): """ Returns the graphemes (i.e. the Tamil characters) in a given word as a list """ # ensure that the word is a valid word TamilWord.validate(word) # list (which will be returned to user) letters = [] # a tuple of all combination endings and of all அ combinations combination_endings = TamilLetter.get_combination_endings() a_combinations = list(TamilLetter.get_combination_column('அ').values()) # loop through for codepoint in word: # if codepoint is an அ combination, a vowel, aytham or a space, # add it to the list if codepoint in a_combinations or \ TamilLetter.is_whitespace(codepoint) or \ TamilLetter.is_vowel(codepoint) or \ TamilLetter.is_aytham(codepoint): letters.append(codepoint) # if codepoint is a combination ending or a pulli ('்'), add it # to the end of the previously-added codepoint elif codepoint in combination_endings or \ codepoint == TamilLetter.get_pulli(): # ensure that at least one character already exists if len(letters) > 0: letters[-1] = letters[-1] + codepoint # otherwise raise an Error. However, validate_word() # should catch this else: raise ValueError("""Unknown error: Combination ending %s cannot be first character of a word""" % (codepoint)) # if codepoint was neither a vowel, aytham, a pulli or a # combination ending, an unexpected error has occurred else: raise ValueError("""Unknown error: Codepoint \'%s\' in word %s is neither a vowel, consonant, combination or aytham""" % (codepoint, word)) # TODO: Write extensive test cases for this return letters