def french_count(): f = FST('french') f.add_state('start') f.initial_state = 'start' for ii in xrange(10): f.add_arc('start', 'start', str(ii), [kFRENCH_TRANS[ii]]) f.set_final('start') return f
def french_count(): f = FST("french") f.add_state("start") f.initial_state = "start" for ii in xrange(10): f.add_arc("start", "start", str(ii), [kFRENCH_TRANS[ii]]) f.set_final("start") return f
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.add_state('2') f2.add_state('3') f2.add_state('4') f2.initial_state = '1' f2.set_final('1') f2.set_final('2') f2.set_final('3') f2.set_final('4') # Adds letters from input string of 'A###0000' for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) # Adds numbers from first FST of range 0-9 for n in range(10): f2.add_arc('1', '2', str(n), (str(n))) f2.add_arc('2', '3', str(n), (str(n))) f2.add_arc('3', '4', str(n), (str(n))) f2.add_arc('4', '4', str(n), ()) return f2
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states states = ['1', 'd1', 'd2', 'd3'] for state in states: f2.add_state(state) f2.initial_state = '1' for state in ['d1', 'd2', 'd3']: f2.set_final(state) # Add the arcs for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) for index, state in enumerate(states): if index > 0: for n in range(10): f2.add_arc(states[index-1], states[index], str(n), str(n)) for n in range(10): f2.add_arc('d3', 'd3', str(n), ()) return f2
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in string.ascii_lowercase: f1.add_arc('start', 'next', (letter), (letter)) f1.add_arc('next', 'next', (letter), '0') return f1
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.initial_state = '1' f2.set_final('1') # Add the arcs for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) for n in range(10): f2.add_arc('1', '1', str(n), str(n)) return f2
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') states = ['1', '2', '3', '4'] for state in states: f3.add_state(state) f3.initial_state = '1' f3.set_final('4') for letter in string.letters: f3.add_arc('1', '1', letter, letter) for number in range(1, 10): f3.add_arc('1', '2', str(number), str(number)) f3.add_arc('2', '3', str(number), str(number)) f3.add_arc('3', '4', str(number), str(number)) f3.add_arc('2', '4', (), '00') f3.add_arc('3', '4', (), '0') return f3
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.add_state('one') f1.add_state('two') f1.add_state('three') f1.add_state('four') f1.add_state('five') f1.add_state('six') f1.initial_state = 'start' # Set all the final states f1.set_final('next') f1.set_final('one') f1.set_final('two') f1.set_final('three') f1.set_final('four') f1.set_final('five') f1.set_final('six') list_one = ['b', 'f', 'p', 'v'] list_two = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z'] list_three = ['d', 't'] list_four = ['l'] list_five = ['m', 'n'] list_six = ['r'] vowels = ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y'] # Add the rest of the arcs # changed string.ascii_lowercase to string.letters for letter in string.letters: f1.add_arc('start', 'next', (letter), (letter)) for letter in string.letters: if letter in list_one: f1.add_arc('next', 'one', (letter), '1') elif letter in list_two: f1.add_arc('next', 'two', (letter), '2') elif letter in list_three: f1.add_arc('next', 'three', (letter), '3') elif letter in list_four: f1.add_arc('next', 'four', (letter), '4') elif letter in list_five: f1.add_arc('next', 'five', (letter), '5') elif letter in list_six: f1.add_arc('next', 'six', (letter), '6') else: f1.add_arc('next', 'next', (letter), ()) for letter in string.letters: if letter in list_two: f1.add_arc('one', 'two', (letter), '2') elif letter in list_three: f1.add_arc('one', 'three', (letter), '3') elif letter in list_four: f1.add_arc('one', 'four', (letter), '4') elif letter in list_five: f1.add_arc('one', 'five', (letter), '5') elif letter in list_six: f1.add_arc('one', 'six', (letter), '6') else: f1.add_arc('one', 'one', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('two', 'one', (letter), '1') elif letter in list_three: f1.add_arc('two', 'three', (letter), '3') elif letter in list_four: f1.add_arc('two', 'four', (letter), '4') elif letter in list_five: f1.add_arc('two', 'five', (letter), '5') elif letter in list_six: f1.add_arc('two', 'six', (letter), '6') else: f1.add_arc('two', 'two', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('three', 'one', (letter), '1') elif letter in list_two: f1.add_arc('three', 'two', (letter), '2') elif letter in list_four: f1.add_arc('three', 'four', (letter), '4') elif letter in list_five: f1.add_arc('three', 'five', (letter), '5') elif letter in list_six: f1.add_arc('three', 'six', (letter), '6') else: f1.add_arc('three', 'three', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('four', 'one', (letter), '1') elif letter in list_two: f1.add_arc('four', 'two', (letter), '2') elif letter in list_three: f1.add_arc('four', 'three', (letter), '3') elif letter in list_five: f1.add_arc('four', 'five', (letter), '5') elif letter in list_six: f1.add_arc('four', 'six', (letter), '6') else: f1.add_arc('four', 'four', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('five', 'one', (letter), '1') elif letter in list_two: f1.add_arc('five', 'two', (letter), '2') elif letter in list_three: f1.add_arc('five', 'three', (letter), '3') elif letter in list_four: f1.add_arc('five', 'four', (letter), '4') elif letter in list_six: f1.add_arc('five', 'six', (letter), '6') else: f1.add_arc('five', 'five', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('six', 'one', (letter), '1') elif letter in list_two: f1.add_arc('six', 'two', (letter), '2') elif letter in list_three: f1.add_arc('six', 'three', (letter), '3') elif letter in list_four: f1.add_arc('six', 'four', (letter), '4') elif letter in list_five: f1.add_arc('six', 'five', (letter), '5') else: f1.add_arc('six', 'six', (letter), ()) return f1
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') #f1.add_state('next') f1.add_state('grp0') f1.add_state('grp1') f1.add_state('grp2') f1.add_state('grp3') f1.add_state('grp4') f1.add_state('grp5') f1.add_state('grp6') f1.initial_state = 'start' # Set all the final states #f1.set_final('next') f1.set_final('grp0') f1.set_final('grp1') f1.set_final('grp2') f1.set_final('grp3') f1.set_final('grp4') f1.set_final('grp5') f1.set_final('grp6') # Add the rest of the arcs for letter in string.ascii_lowercase: #f1.add_arc('start', 'next', (letter), (letter)) # do we need to move this to end ???? ''' if letter in "aeiouhwy": f1.add_arc('next', 'next', (letter), ()) f1.add_arc('grp1', 'grp1', (letter), ()) f1.add_arc('grp2', 'grp2', (letter), ()) f1.add_arc('grp3', 'grp3', (letter), ()) f1.add_arc('grp4', 'grp4', (letter), ()) f1.add_arc('grp5', 'grp5', (letter), ()) f1.add_arc('grp6', 'grp6', (letter), ()) if letter in "bfpv": f1.add_arc('next', 'grp1', (letter), ('1')) f1.add_arc('grp1', 'grp1', (letter), ()) f1.add_arc('grp2', 'grp1', (letter), ('1')) f1.add_arc('grp3', 'grp1', (letter), ('1')) f1.add_arc('grp4', 'grp1', (letter), ('1')) f1.add_arc('grp5', 'grp1', (letter), ('1')) f1.add_arc('grp6', 'grp1', (letter), ('1')) if letter in "cgjkqsxz": f1.add_arc('next', 'grp2', (letter), ('2')) f1.add_arc('grp1', 'grp2', (letter), ('2')) f1.add_arc('grp2', 'grp2', (letter), ()) f1.add_arc('grp3', 'grp2', (letter), ('2')) f1.add_arc('grp4', 'grp2', (letter), ('2')) f1.add_arc('grp5', 'grp2', (letter), ('2')) f1.add_arc('grp6', 'grp2', (letter), ('2')) if letter in "dt": f1.add_arc('next', 'grp3', (letter), ('3')) f1.add_arc('grp1', 'grp3', (letter), ('3')) f1.add_arc('grp2', 'grp3', (letter), ('3')) f1.add_arc('grp3', 'grp3', (letter), ()) f1.add_arc('grp4', 'grp3', (letter), ('3')) f1.add_arc('grp5', 'grp3', (letter), ('3')) f1.add_arc('grp6', 'grp3', (letter), ('3')) if letter in "l": f1.add_arc('next', 'grp4', (letter), ('4')) f1.add_arc('grp1', 'grp4', (letter), ('4')) f1.add_arc('grp2', 'grp4', (letter), ('4')) f1.add_arc('grp3', 'grp4', (letter), ('4')) f1.add_arc('grp4', 'grp4', (letter), ()) f1.add_arc('grp5', 'grp4', (letter), ('4')) f1.add_arc('grp6', 'grp4', (letter), ('4')) if letter in "mn": f1.add_arc('next', 'grp5', (letter), ('5')) f1.add_arc('grp1', 'grp5', (letter), ('5')) f1.add_arc('grp2', 'grp5', (letter), ('5')) f1.add_arc('grp3', 'grp5', (letter), ('5')) f1.add_arc('grp4', 'grp5', (letter), ('5')) f1.add_arc('grp5', 'grp5', (letter), ()) f1.add_arc('grp6', 'grp5', (letter), ('5')) if letter in "r": f1.add_arc('next', 'grp6', (letter), ('6')) f1.add_arc('grp1', 'grp6', (letter), ('6')) f1.add_arc('grp2', 'grp6', (letter), ('6')) f1.add_arc('grp3', 'grp6', (letter), ('6')) f1.add_arc('grp4', 'grp6', (letter), ('6')) f1.add_arc('grp5', 'grp6', (letter), ('6')) f1.add_arc('grp6', 'grp6', (letter), ()) f1.add_arc('start', 'next', (letter), (letter)) ''' if letter in "aeiouhwy": f1.add_arc('start', 'grp0', (letter), (letter)) f1.add_arc('grp0', 'grp0', (letter), ()) f1.add_arc('grp1', 'grp0', (letter), ()) f1.add_arc('grp2', 'grp0', (letter), ()) f1.add_arc('grp3', 'grp0', (letter), ()) f1.add_arc('grp4', 'grp0', (letter), ()) f1.add_arc('grp5', 'grp0', (letter), ()) f1.add_arc('grp6', 'grp0', (letter), ()) if letter in "bfpv": f1.add_arc('start', 'grp1', (letter), (letter)) f1.add_arc('grp0', 'grp1', (letter), ('1')) f1.add_arc('grp1', 'grp1', (letter), ()) f1.add_arc('grp2', 'grp1', (letter), ('1')) f1.add_arc('grp3', 'grp1', (letter), ('1')) f1.add_arc('grp4', 'grp1', (letter), ('1')) f1.add_arc('grp5', 'grp1', (letter), ('1')) f1.add_arc('grp6', 'grp1', (letter), ('1')) if letter in "cgjkqsxz": f1.add_arc('start', 'grp2', (letter), (letter)) f1.add_arc('grp0', 'grp2', (letter), ('2')) f1.add_arc('grp1', 'grp2', (letter), ('2')) f1.add_arc('grp2', 'grp2', (letter), ()) f1.add_arc('grp3', 'grp2', (letter), ('2')) f1.add_arc('grp4', 'grp2', (letter), ('2')) f1.add_arc('grp5', 'grp2', (letter), ('2')) f1.add_arc('grp6', 'grp2', (letter), ('2')) if letter in "dt": f1.add_arc('start', 'grp3', (letter), (letter)) f1.add_arc('grp0', 'grp3', (letter), ('3')) f1.add_arc('grp1', 'grp3', (letter), ('3')) f1.add_arc('grp2', 'grp3', (letter), ('3')) f1.add_arc('grp3', 'grp3', (letter), ()) f1.add_arc('grp4', 'grp3', (letter), ('3')) f1.add_arc('grp5', 'grp3', (letter), ('3')) f1.add_arc('grp6', 'grp3', (letter), ('3')) if letter in "l": f1.add_arc('start', 'grp4', (letter), (letter)) f1.add_arc('grp0', 'grp4', (letter), ('4')) f1.add_arc('grp1', 'grp4', (letter), ('4')) f1.add_arc('grp2', 'grp4', (letter), ('4')) f1.add_arc('grp3', 'grp4', (letter), ('4')) f1.add_arc('grp4', 'grp4', (letter), ()) f1.add_arc('grp5', 'grp4', (letter), ('4')) f1.add_arc('grp6', 'grp4', (letter), ('4')) if letter in "mn": f1.add_arc('start', 'grp5', (letter), (letter)) f1.add_arc('grp0', 'grp5', (letter), ('5')) f1.add_arc('grp1', 'grp5', (letter), ('5')) f1.add_arc('grp2', 'grp5', (letter), ('5')) f1.add_arc('grp3', 'grp5', (letter), ('5')) f1.add_arc('grp4', 'grp5', (letter), ('5')) f1.add_arc('grp5', 'grp5', (letter), ()) f1.add_arc('grp6', 'grp5', (letter), ('5')) if letter in "r": f1.add_arc('start', 'grp6', (letter), (letter)) f1.add_arc('grp0', 'grp6', (letter), ('6')) f1.add_arc('grp1', 'grp6', (letter), ('6')) f1.add_arc('grp2', 'grp6', (letter), ('6')) f1.add_arc('grp3', 'grp6', (letter), ('6')) f1.add_arc('grp4', 'grp6', (letter), ('6')) f1.add_arc('grp5', 'grp6', (letter), ('6')) f1.add_arc('grp6', 'grp6', (letter), ()) return f1
def parse(self, word): """Parse a word morphologically e.g. p = Parser() word = ['p','a','n','i','c','k','i','n','g'] p.parse(word) ---> 'panic+present participle form' """ # Ok so now let's do the second FST f2 = FST('morphology-parse') # Detecting affixes ('ing' and 'ed') # Indicate initial and final states states = ['start', 'i', 'n', 'g', 'e', 'd'] for state in states: f2.add_state(state) f2.initial_state = 'start' f2.set_final('d') f2.set_final('g') for letter in string.ascii_lowercase: if not letter in 'ie': f2.add_arc('start', 'start', letter, letter) if not letter == 'n': f2.add_arc('i', 'start', letter, ('i', letter)) if not letter == 'd': f2.add_arc('e', 'start', letter, ['e', letter]) if not letter == 'g': f2.add_arc('n', 'start', letter, ['i', 'n', letter]) f2.add_arc('start', 'i', 'i', '') f2.add_arc('i', 'n', 'n', '') f2.add_arc('n', 'g', 'g', '+present participle form') f2.add_arc('start', 'e', 'e', '') f2.add_arc('e', 'd', 'd', '+past form') f3 = FST('morphology-parse') # K-deletion # Indicate initial and final states f3.add_state('start') f3.add_state('vowel') f3.add_state('consonant') f3.add_state('c') f3.add_state('k') f3.add_state('lick_l') f3.add_state('lick_i') f3.add_state('lick_c') f3.add_state('lick_k') f3.add_state('parse') f3.initial_state = 'start' f3.set_final('parse') vowels = 'aeiou' for vowel in vowels: f3.add_arc('start', 'vowel', vowel, vowel) f3.add_arc('vowel', 'vowel', vowel, vowel) f3.add_arc('consonant', 'vowel', vowel, vowel) f3.add_arc('c', 'vowel', vowel, vowel) for letter in string.ascii_lowercase: f3.add_arc('parse', 'parse', letter, letter) if letter in vowels: continue if not letter == 'c': f3.add_arc('vowel', 'consonant', letter, letter) if not letter == 'l': f3.add_arc('start', 'consonant', letter, letter) if not letter == 'k': f3.add_arc('c', 'consonant', letter, letter) f3.add_arc('consonant', 'consonant', letter, letter) f3.add_arc('vowel', 'c', 'c', 'c') f3.add_arc('c', 'k', 'k', '') f3.add_arc('start', 'lick_l', 'l', 'l') f3.add_arc('lick_l', 'lick_i', 'i', 'i') f3.add_arc('lick_i', 'lick_c', 'c', 'c') f3.add_arc('lick_c', 'lick_k', 'k', 'k') f3.add_arc('lick_k', 'parse', '+', '+') f3.add_arc('k', 'parse', '+', '+') f3.add_arc('consonant', 'parse', '+', '+') f3.add_arc('parse', 'parse', ' ', ' ') output = compose(word, f2, f3)[0] return "".join(output)
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state states = ['q1', 'q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6'] for state in states: f1.add_state(state) f1.initial_state = 'q1' # Set all the final states for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']: f1.set_final(state) # Add the rest of the arcs for letter in string.ascii_lowercase: f1.add_arc('q1', 'q2', (letter), (letter)) if letter in set('aehiouwy'): for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']: f1.add_arc(state, state, (letter), ()) else: if letter in set('bfpv'): for state in ['q2', 'n2', 'n3', 'n4', 'n5', 'n6']: f1.add_arc(state, 'n1', (letter), ('1')) f1.add_arc('n1', 'n1', (letter), ()) elif letter in set('cgjkqsxz'): for state in ['q2', 'n1', 'n3', 'n4', 'n5', 'n6']: f1.add_arc(state, 'n2', (letter), ('2')) f1.add_arc('n2', 'n2', (letter), ()) elif letter in set('dt'): for state in ['q2', 'n1', 'n2', 'n4', 'n5', 'n6']: f1.add_arc(state, 'n3', (letter), ('3')) f1.add_arc('n3', 'n3', (letter), ()) elif letter in set('l'): for state in ['q2', 'n1', 'n2', 'n3', 'n5', 'n6']: f1.add_arc(state, 'n4', (letter), ('4')) f1.add_arc('n4', 'n4', (letter), ()) elif letter in set('mn'): for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n6']: f1.add_arc(state, 'n5', (letter), ('5')) f1.add_arc('n5', 'n5', (letter), ()) elif letter in set('r'): for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5']: f1.add_arc(state, 'n6', (letter), ('6')) f1.add_arc('n6', 'n6', (letter), ()) return f1
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.add_state('2') f2.add_state('3') f2.add_state('4') f2.initial_state = '1' f2.set_final('4') # Add the arcs for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) f2.add_arc('1', '4', (), ()) for n in range(10): f2.add_arc('1', '2', (str(n)), (str(n))) f2.add_arc('2', '4', (), ()) for n in range(10): f2.add_arc('2', '3', (str(n)), (str(n))) f2.add_arc('3', '4', (), ()) for n in range(10): f2.add_arc('3', '4', (str(n)), (str(n))) for n in range(10): f2.add_arc('4', '4', (str(n)), ()) return f2
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') f1.add_state('7') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('7') #setting the rules non_in = [ 'a', 'e', 'i', 'o', 'u', 'h', 'w', 'y', 'A', 'E', 'I', 'O', 'U', 'H', 'W', 'Y' ] rep1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V'] rep2 = [ 'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z' ] rep3 = ['d', 't', 'D', 'T'] rep4 = ['l', 'L'] rep5 = ['m', 'n', 'M', 'N'] rep6 = ['r', 'R'] # Add the rest of the arcs for letter in string.ascii_letters: if letter in non_in: f1.add_arc('start', 'next', (letter), (letter)) if letter in rep1: f1.add_arc('start', '1', (letter), (letter)) if letter in rep2: f1.add_arc('start', '2', (letter), (letter)) if letter in rep3: f1.add_arc('start', '3', (letter), (letter)) if letter in rep4: f1.add_arc('start', '4', (letter), (letter)) if letter in rep5: f1.add_arc('start', '5', (letter), (letter)) if letter in rep6: f1.add_arc('start', '6', (letter), (letter)) for letter in string.ascii_letters: if letter in non_in: f1.add_arc('next', 'next', (letter), ()) if letter in rep1: f1.add_arc('next', '1', (letter), ('1')) if letter in rep2: f1.add_arc('next', '2', (letter), ('2')) if letter in rep3: f1.add_arc('next', '3', (letter), ('3')) if letter in rep4: f1.add_arc('next', '4', (letter), ('4')) if letter in rep5: f1.add_arc('next', '5', (letter), ('5')) if letter in rep6: f1.add_arc('next', '6', (letter), ('6')) f1.add_arc('next', '7', (), ()) for letter in string.ascii_letters: if letter in non_in: f1.add_arc('1', 'next', (letter), ()) if letter in rep1: f1.add_arc('1', '1', (letter), ()) if letter in rep2: f1.add_arc('1', '2', (letter), ('2')) if letter in rep3: f1.add_arc('1', '3', (letter), ('3')) if letter in rep4: f1.add_arc('1', '4', (letter), ('4')) if letter in rep5: f1.add_arc('1', '5', (letter), ('5')) if letter in rep6: f1.add_arc('1', '6', (letter), ('6')) f1.add_arc('1', '7', (), ()) for letter in string.ascii_letters: if letter in non_in: f1.add_arc('2', 'next', (letter), ()) if letter in rep1: f1.add_arc('2', '1', (letter), ('1')) if letter in rep2: f1.add_arc('2', '2', (letter), ()) if letter in rep3: f1.add_arc('2', '3', (letter), ('3')) if letter in rep4: f1.add_arc('2', '4', (letter), ('4')) if letter in rep5: f1.add_arc('2', '5', (letter), ('5')) if letter in rep6: f1.add_arc('2', '6', (letter), ('6')) f1.add_arc('2', '7', (), ()) for letter in string.ascii_letters: if letter in non_in: f1.add_arc('3', 'next', (letter), ()) if letter in rep1: f1.add_arc('3', '1', (letter), ('1')) if letter in rep2: f1.add_arc('3', '2', (letter), ('2')) if letter in rep3: f1.add_arc('3', '3', (letter), ()) if letter in rep4: f1.add_arc('3', '4', (letter), ('4')) if letter in rep5: f1.add_arc('3', '5', (letter), ('5')) if letter in rep6: f1.add_arc('3', '6', (letter), ('6')) f1.add_arc('3', '7', (), ()) for letter in string.ascii_letters: if letter in non_in: f1.add_arc('4', 'next', (letter), ()) if letter in rep1: f1.add_arc('4', '1', (letter), ('1')) if letter in rep2: f1.add_arc('4', '2', (letter), ('2')) if letter in rep3: f1.add_arc('4', '3', (letter), ('')) if letter in rep4: f1.add_arc('4', '4', (letter), ()) if letter in rep5: f1.add_arc('4', '5', (letter), ('5')) if letter in rep6: f1.add_arc('4', '6', (letter), ('6')) f1.add_arc('4', '7', (), ()) for letter in string.ascii_letters: if letter in non_in: f1.add_arc('5', 'next', (letter), ()) if letter in rep1: f1.add_arc('5', '1', (letter), ('1')) if letter in rep2: f1.add_arc('5', '2', (letter), ('2')) if letter in rep3: f1.add_arc('5', '3', (letter), ('')) if letter in rep4: f1.add_arc('5', '4', (letter), ('4')) if letter in rep5: f1.add_arc('5', '5', (letter), ()) if letter in rep6: f1.add_arc('5', '6', (letter), ('6')) f1.add_arc('5', '7', (), ()) for letter in string.ascii_letters: if letter in non_in: f1.add_arc('6', 'next', (letter), ()) if letter in rep1: f1.add_arc('6', '1', (letter), ('1')) if letter in rep2: f1.add_arc('6', '2', (letter), ('2')) if letter in rep3: f1.add_arc('6', '3', (letter), ('')) if letter in rep4: f1.add_arc('6', '4', (letter), ('4')) if letter in rep5: f1.add_arc('6', '5', (letter), ('5')) if letter in rep6: f1.add_arc('6', '6', (letter), ()) f1.add_arc('6', '7', (), ()) return f1
def french_count(): f = FST('french') # Indicate initial and final states f.add_state('start') f.initial_state = 'start' f.add_state('znod1') f.add_state('znod2') f.add_state('0') f.add_state('units') f.add_state('tens') f.add_state('hundreds') f.add_state('unod1') f.add_state('unod2') f.add_state('1s') f.add_state('1e') f.add_state('2-9s') f.add_state('tnod1') f.add_state('11s') f.add_state('11e') f.add_state('10-60s') f.add_state('10-60e') f.add_state('11-16s') f.add_state('12-16e') f.add_state('7090s') f.add_state('80s') f.add_state('100-900s') f.add_state('100-900t') f.add_state('100-900e') # add arcs to first level f.add_arc('start', 'znod1', ('0'), ()) f.add_arc('znod1', 'znod2', ('0'), ()) f.add_arc('znod2', '0', ('0'), [kFRENCH_TRANS[0]]) f.set_final('0') f.add_arc('start', 'units', (), ()) f.add_arc('start', 'tens', (), ()) f.add_arc('start', 'hundreds', (), ()) # covers 1 f.add_arc('units', 'unod1', ('0'), ()) f.add_arc('unod1', 'unod2', ('0'), ()) f.add_arc('unod2', '1s', (), ()) f.add_arc('1s', '1e', ('1'), [kFRENCH_TRANS[1]]) f.set_final('1e') # Covers 2-9 for i in range(2, 10): f.add_arc('unod2', '2-9s', (str(i)), [kFRENCH_TRANS[i]]) f.set_final('2-9s') # covers 10-60 f.add_arc('tens', 'tnod1', ('0'), ()) for i in range(1, 7): f.add_arc('tnod1', '10-60s', (str(i)), [kFRENCH_TRANS[i * 10]]) f.add_arc('10-60s', '10-60e', ('0'), ()) f.add_arc('10-60s', 'unod2', (), ()) f.add_arc('10-60s', '1s', (), [kFRENCH_AND]) f.set_final('10-60e') # covers 11-16 f.add_arc('tnod1', '11-16s', ('1'), ()) #f.add_arc('11-16s', 'units', (), ()) # covers 11 f.add_arc('11-16s', '11s', (), ()) f.add_arc('11s', '11e', ('1'), [kFRENCH_TRANS[11]]) f.set_final('11e') # covers 12 - 19 for i in range(2, 7): f.add_arc('11-16s', '12-16e', (str(i)), [kFRENCH_TRANS[i + 10]]) f.set_final('12-16e') # covers 70-90 f.add_arc('tnod1', '7090s', ('7'), [kFRENCH_TRANS[60]]) f.add_arc('tnod1', '7090s', ('9'), [kFRENCH_TRANS[4] + " " + kFRENCH_TRANS[20]]) f.add_arc('7090s', 'unod2', (), [kFRENCH_TRANS[10]]) f.add_arc('7090s', '11-16s', (), ()) f.add_arc('7090s', '10-60e', ('0'), [kFRENCH_TRANS[10]]) f.add_arc('tnod1', '80s', ('8'), [kFRENCH_TRANS[4] + " " + kFRENCH_TRANS[20]]) f.add_arc('80s', 'unod2', (), ()) f.add_arc('80s', '10-60e', ('0'), ()) # Covers 100 - 900 f.add_arc('hundreds', '100-900s', ('1'), [kFRENCH_TRANS[100]]) for i in range(2, 10): f.add_arc('hundreds', '100-900s', (str(i)), [kFRENCH_TRANS[i] + " " + kFRENCH_TRANS[100]]) f.add_arc('100-900s', 'tnod1', (), ()) f.add_arc('100-900s', '100-900t', ('0'), ()) f.add_arc('100-900t', 'unod2', (), ()) f.add_arc('100-900t', '100-900e', ('0'), ()) f.set_final('100-900e') return f
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ remove = ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y'] group1 = ['b', 'f', 'p', 'v'] group2 = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z'] group3 = ['d', 't'] group4 = ['l'] group5 = ['m', 'n'] group6 = ['r'] # Let's define our first FST f1 = FST('soundex-generate') # Indicate that 'start' is the initial state f1.add_state('start') f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') f1.add_state('7') f1.add_state('8') f1.add_state('9') f1.add_state('10') f1.add_state('11') f1.add_state('12') f1.add_state('13') f1.add_state('14') f1.add_state('15') f1.add_state('16') f1.add_state('17') f1.add_state('18') f1.add_state('19') f1.add_state('20') f1.initial_state = 'start' # Set all the final states f1.set_final('16') f1.set_final('17') f1.set_final('18') f1.set_final('19') f1.set_final('20') for letter in string.ascii_letters: f1.add_arc('start', '1', (letter), (letter)) if letter in remove: f1.add_arc('1', '1', (letter), ()) f1.add_arc('2', '8', (letter), ()) f1.add_arc('3', '8', (letter), ()) f1.add_arc('4', '8', (letter), ()) f1.add_arc('5', '8', (letter), ()) f1.add_arc('6', '8', (letter), ()) f1.add_arc('7', '8', (letter), ()) f1.add_arc('8', '8', (letter), ()) f1.add_arc('9', '15', (letter), ()) f1.add_arc('10', '15', (letter), ()) f1.add_arc('11', '15', (letter), ()) f1.add_arc('12', '15', (letter), ()) f1.add_arc('13', '15', (letter), ()) f1.add_arc('14', '15', (letter), ()) f1.add_arc('15', '15', (letter), ()) f1.add_arc('16', '16', (letter), ()) elif letter in group1: f1.add_arc('1', '2', (letter), ('1')) f1.add_arc('2', '2', (letter), ()) f1.add_arc('3', '9', (letter), ('1')) f1.add_arc('4', '9', (letter), ('1')) f1.add_arc('5', '9', (letter), ('1')) f1.add_arc('6', '9', (letter), ('1')) f1.add_arc('7', '9', (letter), ('1')) f1.add_arc('8', '9', (letter), ('1')) f1.add_arc('9', '9', (letter), ()) f1.add_arc('10', '16', (letter), ('1')) f1.add_arc('11', '16', (letter), ('1')) f1.add_arc('12', '16', (letter), ('1')) f1.add_arc('13', '16', (letter), ('1')) f1.add_arc('14', '16', (letter), ('1')) f1.add_arc('15', '16', (letter), ('1')) f1.add_arc('16', '16', (letter), ('1')) elif letter in group2: f1.add_arc('1', '3', (letter), ('2')) f1.add_arc('3', '3', (letter), ()) f1.add_arc('2', '10', (letter), ('2')) f1.add_arc('4', '10', (letter), ('2')) f1.add_arc('5', '10', (letter), ('2')) f1.add_arc('6', '10', (letter), ('2')) f1.add_arc('7', '10', (letter), ('2')) f1.add_arc('8', '10', (letter), ('2')) f1.add_arc('10', '10', (letter), ()) f1.add_arc('9', '16', (letter), ('2')) f1.add_arc('11', '16', (letter), ('2')) f1.add_arc('12', '16', (letter), ('2')) f1.add_arc('13', '16', (letter), ('2')) f1.add_arc('14', '16', (letter), ('2')) f1.add_arc('15', '16', (letter), ('2')) f1.add_arc('16', '16', (letter), ('2')) elif letter in group3: f1.add_arc('1', '4', (letter), ('3')) f1.add_arc('4', '4', (letter), ()) f1.add_arc('2', '11', (letter), ('3')) f1.add_arc('3', '11', (letter), ('3')) f1.add_arc('5', '11', (letter), ('3')) f1.add_arc('6', '11', (letter), ('3')) f1.add_arc('7', '11', (letter), ('3')) f1.add_arc('8', '11', (letter), ('3')) f1.add_arc('11', '11', (letter), ()) f1.add_arc('9', '16', (letter), ('3')) f1.add_arc('10', '16', (letter), ('3')) f1.add_arc('12', '16', (letter), ('3')) f1.add_arc('13', '16', (letter), ('3')) f1.add_arc('14', '16', (letter), ('3')) f1.add_arc('15', '16', (letter), ('3')) f1.add_arc('16', '16', (letter), ('3')) elif letter in group4: f1.add_arc('1', '5', (letter), ('4')) f1.add_arc('5', '5', (letter), ()) f1.add_arc('2', '12', (letter), ('4')) f1.add_arc('3', '12', (letter), ('4')) f1.add_arc('4', '12', (letter), ('4')) f1.add_arc('6', '12', (letter), ('4')) f1.add_arc('7', '12', (letter), ('4')) f1.add_arc('8', '12', (letter), ('4')) f1.add_arc('12', '12', (letter), ()) f1.add_arc('9', '16', (letter), ('4')) f1.add_arc('10', '16', (letter), ('4')) f1.add_arc('11', '16', (letter), ('4')) f1.add_arc('13', '16', (letter), ('4')) f1.add_arc('14', '16', (letter), ('4')) f1.add_arc('15', '16', (letter), ('4')) f1.add_arc('16', '16', (letter), ('4')) elif letter in group5: f1.add_arc('1', '6', (letter), ('5')) f1.add_arc('6', '6', (letter), ()) f1.add_arc('2', '13', (letter), ('5')) f1.add_arc('3', '13', (letter), ('5')) f1.add_arc('4', '13', (letter), ('5')) f1.add_arc('5', '13', (letter), ('5')) f1.add_arc('7', '13', (letter), ('5')) f1.add_arc('8', '13', (letter), ('5')) f1.add_arc('13', '13', (letter), ()) f1.add_arc('9', '16', (letter), ('5')) f1.add_arc('10', '16', (letter), ('5')) f1.add_arc('11', '16', (letter), ('5')) f1.add_arc('12', '16', (letter), ('5')) f1.add_arc('14', '16', (letter), ('5')) f1.add_arc('15', '16', (letter), ('5')) f1.add_arc('16', '16', (letter), ('5')) elif letter in group6: f1.add_arc('1', '7', (letter), ('6')) f1.add_arc('7', '7', (letter), ()) f1.add_arc('2', '14', (letter), ('6')) f1.add_arc('3', '14', (letter), ('6')) f1.add_arc('4', '14', (letter), ('6')) f1.add_arc('5', '14', (letter), ('6')) f1.add_arc('6', '14', (letter), ('6')) f1.add_arc('8', '14', (letter), ('6')) f1.add_arc('14', '14', (letter), ()) f1.add_arc('9', '16', (letter), ('6')) f1.add_arc('10', '16', (letter), ('6')) f1.add_arc('11', '16', (letter), ('6')) f1.add_arc('12', '16', (letter), ('6')) f1.add_arc('13', '16', (letter), ('6')) f1.add_arc('15', '16', (letter), ('6')) f1.add_arc('16', '16', (letter), ('6')) else: f1.add_arc('1', '17', (), ()) f1.add_arc('2', '18', (), ()) f1.add_arc('3', '18', (), ()) f1.add_arc('4', '18', (), ()) f1.add_arc('5', '18', (), ()) f1.add_arc('6', '18', (), ()) f1.add_arc('7', '18', (), ()) f1.add_arc('8', '18', (), ()) f1.add_arc('9', '19', (), ()) f1.add_arc('10', '19', (), ()) f1.add_arc('11', '19', (), ()) f1.add_arc('12', '19', (), ()) f1.add_arc('13', '19', (), ()) f1.add_arc('14', '19', (), ()) f1.add_arc('15', '19', (), ()) f1.add_arc('16', '20', (), ()) return f1
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.initial_state = 'start' f1.add_state('0') f1.set_final('0') for letter in string.ascii_letters: f1.add_arc('start', '0', (letter), (letter)) # for 1st letter in IP removal_letters_set = {'a', 'e', 'i', 'o', 'u', 'h', 'w', 'y'} for removeChar in list(removal_letters_set): f1.add_arc('0', '0', (removeChar), ()) soundex_letter_lkp = [(['b', 'f', 'p', 'v'], '1'), (['c', 'g', 'j', 'k', 'q', 's', 'x', 'z'], '2'), (['d', 't'], '3'), (['l'], '4'), (['m', 'n'], '5'), (['r'], '6')] soundex_chars_set = set() for charList, state in soundex_letter_lkp: soundex_chars_set = soundex_chars_set.union(set(charList)) f1.add_state(state) f1.set_final(state) #build automata for charList, state in soundex_letter_lkp: for char in charList: f1.add_arc('0', state, (char), (state)) #1st jump f1.add_arc(state, state, (char), ()) # self loop for char in list(removal_letters_set): # for vowelsset f1.add_arc(state, '0', (char), ()) for char in list( soundex_chars_set.difference(set(charList)) ): # any other char from different group will cause return to 0 state, with OP for returnCharList, returnState in soundex_letter_lkp: if char in returnCharList: f1.add_arc(state, returnState, (char), (returnState)) return f1
def french_count(): f = FST('french') list26 = [2, 3, 4, 5, 6] list16 = [1, 2, 3, 4, 5, 6] list79 = [7, 8, 9] list19 = [1, 2, 3, 4, 5, 6, 7, 8, 9] list29 = [2, 3, 4, 5, 6, 7, 8, 9] f.add_state('1') f.add_state('2') f.add_state('3') f.add_state('4') f.add_state('5') f.add_state('6') f.add_state('7') f.add_state('8') f.add_state('9') f.add_state('10') f.add_state('11') f.add_state('12') f.add_state('13') f.add_state('14') f.add_state('15') f.add_state('16') f.add_state('17') f.add_state('18') f.add_state('19') f.add_state('20') f.add_state('21') f.initial_state = '1' f.set_final('2') f.set_final('4') f.set_final('7') f.set_final('8') f.set_final('9') f.set_final('10') f.set_final('12') f.set_final('14') f.set_final('15') f.set_final('19') f.set_final('21') element_z = [0] element_o = [1] for i in element_o: i = str(i) f.add_arc('1', '6', i, [kFRENCH_TRANS[int(i) * 10]]) f.add_arc('18', '6', i, [kFRENCH_TRANS[int(i) * 10]]) f.add_arc('1', '3', i, ()) f.add_arc('18', '3', i, ()) f.add_arc('11', '12', i, [kFRENCH_AND, kFRENCH_TRANS[int(i) + 10]]) f.add_arc('13', '15', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[int(i)]]) f.add_arc('17', '21', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[int(i) + 10]]) f.add_arc('1', '18', i, [kFRENCH_TRANS[100]]) f.add_arc('5', '8', i, [kFRENCH_AND, kFRENCH_TRANS[int(i)]]) for i in element_z: i = str(i) f.add_arc('1', '10', i, [kFRENCH_TRANS[int(i)]]) f.add_arc('11', '12', i, [kFRENCH_TRANS[int(i) + 10]]) f.add_arc('18', '20', i, ()) f.add_arc('6', '9', i, ()) f.add_arc('20', '10', i, ()) f.add_arc('13', '14', i, [kFRENCH_TRANS[20]]) f.add_arc('17', '19', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[10]]) f.add_arc('1', '1', i, ()) f.add_arc('5', '7', i, ()) for i in list16: i = str(i) f.add_arc('3', '4', i, [kFRENCH_TRANS[int(i) + 10]]) for i in list29: i = str(i) f.add_arc('13', '15', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[int(i)]]) f.add_arc('5', '8', i, [kFRENCH_TRANS[int(i)]]) f.add_arc('1', '18', i, [kFRENCH_TRANS[int(i)], kFRENCH_TRANS[100]]) for i in list26: i = str(i) f.add_arc('11', '12', i, [kFRENCH_TRANS[int(i) + 10]]) f.add_arc('17', '21', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[int(i) + 10]]) f.add_arc('18', '5', i, [kFRENCH_TRANS[int(i) * 10]]) f.add_arc('1', '5', i, [kFRENCH_TRANS[int(i) * 10]]) for i in list19: i = str(i) f.add_arc('20', '2', i, [kFRENCH_TRANS[int(i)]]) f.add_arc('1', '2', i, [kFRENCH_TRANS[int(i)]]) for i in list79: i = str(i) f.add_arc('11', '12', i, [kFRENCH_TRANS[10], kFRENCH_TRANS[int(i)]]) f.add_arc('6', '9', i, [kFRENCH_TRANS[int(i)]]) f.add_arc( '17', '21', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[10], kFRENCH_TRANS[int(i)]]) list7 = [7] list8 = [8] list9 = [9] y = int(60) x = int(4) for i in list7: i = str(i) f.add_arc('18', '11', i, [kFRENCH_TRANS[y]]) f.add_arc('1', '11', i, [kFRENCH_TRANS[y]]) for i in list8: i = str(i) f.add_arc('1', '13', i, [kFRENCH_TRANS[x]]) f.add_arc('18', '13', i, [kFRENCH_TRANS[x]]) for i in list9: i = str(i) f.add_arc('1', '17', i, [kFRENCH_TRANS[x]]) f.add_arc('18', '17', i, [kFRENCH_TRANS[x]]) return f
def add_zero_padding(): #=========================================================================== # #Now, the third fst - the zero-padding fst # f3 = FST('soundex-padzero') # # # Indicate initial and final states # f3.add_state('1') # f3.initial_state = '1' # # f3.add_state('2') # f3.add_state('3') # f3.add_state('4') # f3.add_state('5') # # #f3.set_final('2') # #f3.set_final('3') # #f3.set_final('4') # f3.set_final('5') # # # Add the arcs # possible_chars = string.digits+ string.letters # for letter in possible_chars: # f3.add_arc('1', '2', (letter), (letter)) # # f3.add_arc('2', '3', (letter), (letter)) # # # f3.add_arc('3', '4', (letter), (letter)) # # # f3.add_arc('4', '5', (letter), (letter)) # # # f3.add_arc('5', '5', (letter), (letter)) # # #=========================================================================== # #f3.add_arc('1', '2', (), ('0')) # f3.add_arc('2', '3', (), ('0')) # f3.add_arc('3', '4', (), ('0')) # f3.add_arc('4', '5', (), ('0')) # #=========================================================================== # return f3 #=========================================================================== f3 = FST('soundex-padzero') f3.add_state('1') f3.add_state('1a') f3.add_state('1b') f3.add_state('2') f3.initial_state = '1' f3.set_final('2') for letter in string.letters: f3.add_arc('1', '1', (letter), (letter)) for number in xrange(10): f3.add_arc('1', '1', (str(number)), (str(number))) f3.add_arc('1', '1a', (), ('0')) f3.add_arc('1a', '1b', (), ('0')) f3.add_arc('1b', '2', (), ('0')) return f3
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in string.ascii_lowercase: # f1.add_arc('start', 'next', (letter), (letter)) # f1.add_arc('next', 'next', (letter), ('0')) f1.add_arc('start', 'next', (letter), (letter)) if letter in vowels: f1.add_arc('next', 'next', (letter), ()) elif letter in grp1: f1.add_arc('next', 'next', (letter), ('1')) elif letter in grp2: f1.add_arc('next', 'next', (letter), ('2')) elif letter in grp3: f1.add_arc('next', 'next', (letter), ('3')) elif letter in grp4: f1.add_arc('next', 'next', (letter), ('4')) elif letter in grp5: f1.add_arc('next', 'next', (letter), ('5')) elif letter in grp6: f1.add_arc('next', 'next', (letter), ('6')) else: continue #wtf return f1
def french_count(): f = FST('french') f.add_state('start') f.initial_state = 'start' for index in range(10): f.add_state('H'+str(index)) f.add_state('T'+str(index)) f.add_state('U'+str(index)) f.add_state('U1'+str(index)) f.set_final('U'+str(index)) f.set_final('U1'+str(index)) f.add_state('H:(T0-T9)_COMMON') f.add_state('T:(U2-U9)_COMMON') f.add_state('T:(U11-U19)_COMMON') # Prepare 100s for i in range(10): if i==0: opList= [] elif i ==1: opList = [ kFRENCH_TRANS[100] ] else: opList = [ kFRENCH_TRANS[i] + ' '+kFRENCH_TRANS[100] ] f.add_arc('start','H'+str(i), [str(i)], opList ) if i !=0: f.add_arc('H'+str(i), 'H:(T0-T9)_COMMON', [], []) # 0 has special case, connections made in line 58 #- special case 0 f.add_state('0_case') f.add_arc('H0','0_case',['0'],[]) f.add_arc('0_case', 'U0',['0'],[kFRENCH_TRANS[0]]) f.add_arc('0_case', 'U0',['1'],[kFRENCH_TRANS[1]]) f.add_arc('0_case', 'T:(U2-U9)_COMMON',[],[]) # Prepare 10s - arcs to reach state for i in range(10): if i==0 or i==1: opList = [] elif i<=6: opList = [ kFRENCH_TRANS[i*10] ] elif i==7: opList = [ kFRENCH_TRANS[60] ] else: opList = [ kFRENCH_TRANS[4]+' '+kFRENCH_TRANS[20] ] f.add_arc('H:(T0-T9)_COMMON','T'+str(i), [str(i)], opList ) if i !=0: f.add_arc('H0', 'T'+str(i), [str(i)], opList ) # Prepare 10s - arcs to exit to 1s - Major Mapping BEGIN f.add_arc('T0', 'U0', ['0'], []) f.add_arc('T0', 'U1', ['1'], [kFRENCH_TRANS[1]]) f.add_arc('T0', 'T:(U2-U9)_COMMON', [], []) f.add_arc('T1', 'T:(U11-U19)_COMMON', [], []) f.add_arc('T1', 'U10', ['0'], [kFRENCH_TRANS[10]]) f.add_arc('T2', 'U0', ['0'], []) f.add_arc('T2', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]]) f.add_arc('T2', 'T:(U2-U9)_COMMON', [], []) f.add_arc('T3', 'U0', ['0'], []) f.add_arc('T3', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]]) f.add_arc('T3', 'T:(U2-U9)_COMMON', [], []) f.add_arc('T4', 'U0', ['0'], []) f.add_arc('T4', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]]) f.add_arc('T4', 'T:(U2-U9)_COMMON', [], []) f.add_arc('T5', 'U0', ['0'], []) f.add_arc('T5', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]]) f.add_arc('T5', 'T:(U2-U9)_COMMON', [], []) f.add_arc('T6', 'U0', ['0'], []) f.add_arc('T6', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]]) f.add_arc('T6', 'T:(U2-U9)_COMMON', [], []) for i in range(10): if i == 1: opList = [ kFRENCH_AND + ' '+kFRENCH_TRANS[i+10] ] elif i in [7,8,9]: opList = [ kFRENCH_TRANS[10]+' '+kFRENCH_TRANS[i] ] else: opList = [ kFRENCH_TRANS[i+10] ] f.add_arc('T7', 'U'+str(10+i), [str(i)], opList ) #=========================================================================== weird behavior on epsilon, backup above # f.add_arc('T7', 'U10', ['0'], [kFRENCH_TRANS[10]]) # f.add_arc('T7', 'U11', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[11]]) # f.add_arc('T7', 'T:(U11-U19)_COMMON', [], []) #=========================================================================== f.add_arc('T8', 'U0', ['0'], []) f.add_arc('T8', 'U1', ['1'], [kFRENCH_TRANS[1]]) f.add_arc('T8', 'T:(U2-U9)_COMMON', [], []) f.add_arc('T9', 'U10', ['0'], [kFRENCH_TRANS[10]]) f.add_arc('T9', 'T:(U11-U19)_COMMON', [], []) # Prepare 10s - arcs to exit to 1s - Major Mapping END # prepare Common: 1s for i in range(1,10): if i!=1: f.add_arc('T:(U2-U9)_COMMON', 'U'+str(i), [str(i)], [ kFRENCH_TRANS[i]]) if i+10 not in [17,18,19]: f.add_arc('T:(U11-U19)_COMMON', 'U'+str(10+i), [str(i)], [ kFRENCH_TRANS[i+10]]) else: f.add_arc('T:(U11-U19)_COMMON', 'U'+str(10+i), [str(i)], [ kFRENCH_TRANS[10]+' '+kFRENCH_TRANS[i]]) return f
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') # Indicate initial and final states f3.add_state('0') f3.add_state('1') f3.add_state('1a') f3.add_state('1b') f3.add_state('1c') f3.add_state('2') f3.add_state('3a') f3.add_state('3b') f3.add_state('4') f3.initial_state = '0' f3.set_final('4') # Add the arcs for letter in string.letters: f3.add_arc('0', '1', (letter), (letter)) for n in range(10): f3.add_arc('1', '1a', (str(n)), (str(n))) f3.add_arc('1a', '1b', (str(n)), (str(n))) f3.add_arc('1b', '4', (str(n)), (str(n))) f3.add_arc('1', '2', (str(n)), (str(n))) f3.add_arc('1', '3a', (str(n)), (str(n))) f3.add_arc('3a', '3b', (str(n)), (str(n))) f3.add_arc('0', '1', (), ()) f3.add_arc('1', '4', (), ('000')) f3.add_arc('2', '4', (), ('00')) f3.add_arc('3b', '4', (), ('0')) return f3
def french_count(): f = FST('french') f.add_state('start') f.add_state('2') f.add_state('3') f.add_state('4') f.add_state('5') f.add_state('6') f.add_state('7') f.add_state('8') f.add_state('9') f.add_state('10') f.add_state('11') f.add_state('12') f.add_state('13') f.add_state('14') f.add_state('15') f.add_state('16') # f.add_state('17') # f.add_state('18') # f.add_state('19') # f.add_state('20') # f.add_state('21') # f.add_state('22') f.initial_state = 'start' f.set_final('4') f.set_final('6') f.set_final('7') f.set_final('9') # f.set_final('13') # f.set_final('14') # f.set_final('15') f.set_final('12') f.set_final('16') # f.set_final('17') # f.set_final('18') # f.set_final('20') # f.set_final('21') # f.set_final('22') #takes care of single digit f.add_arc('start', '2', '0', ()) f.add_arc('2', '3', '0', ()) for digit in range(0, 10): f.add_arc('3', '4', [str(digit)], [kFRENCH_TRANS[digit]]) #Two-digits -in range (10,16) f.add_arc('2', '5', '1', ()) for digit in range(0, 7): f.add_arc('5', '6', [str(digit)], [kFRENCH_TRANS[digit + 10]]) #takes care of 17,18,19 for digit in range(7, 10): f.add_arc('5', '7', [str(digit)], [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[digit]]) #takes care of 20 for digit in range(2, 7): f.add_arc('2', '8', [str(digit)], [kFRENCH_TRANS[digit * 10]]) f.add_arc('8', '9', '0', ()) #takes care of 21-29(Takes care of 20 to 60) for digit in range(1, 10): if digit == 1: f.add_arc('8', '4', [str(digit)], [kFRENCH_AND] + [kFRENCH_TRANS[digit]]) else: f.add_arc('8', '4', [str(digit)], [kFRENCH_TRANS[digit]]) #takes care of 70-79 f.add_arc('2', '10', '7', [kFRENCH_TRANS[60]]) for digit in range(0, 7): if digit == 1: f.add_arc('10', '6', [str(digit)], [kFRENCH_AND] + [kFRENCH_TRANS[digit + 10]]) else: f.add_arc('10', '6', [str(digit)], [kFRENCH_TRANS[digit + 10]]) for digit in range(7, 10): f.add_arc('10', '7', [str(digit)], [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[digit]]) #takes care of 80-89 f.add_arc('2', '11', '8', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]]) #takes care of 80 f.add_arc('11', '12', '0', ()) for digit in range(1, 10): f.add_arc('11', '4', [str(digit)], [kFRENCH_TRANS[digit]]) #takes care of 90-99 f.add_arc('2', '13', '9', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]]) for digit in range(0, 7): f.add_arc('13', '6', [str(digit)], [kFRENCH_TRANS[digit + 10]]) for digit in range(7, 10): f.add_arc('13', '7', [str(digit)], [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[digit]]) #takes care of 100 f.add_arc('start', '14', '1', ['cent']) f.add_arc('14', '15', '0', ()) f.add_arc('15', '16', '0', ()) #takes care of 101-109 for digit in range(1, 10): f.add_arc('15', '4', [str(digit)], [kFRENCH_TRANS[digit]]) #takes care of 110-119 f.add_arc('14', '5', '1', ()) #takes care of 120-169 for digit in range(2, 7): f.add_arc('14', '8', [str(digit)], [kFRENCH_TRANS[digit * 10]]) #takes care of 170-179 f.add_arc('14', '10', '7', [kFRENCH_TRANS[60]]) #takes care of 180-189 f.add_arc('14', '11', '8', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]]) #takes care of 190-199 f.add_arc('14', '13', '9', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]]) #takes care of 200-999 for digit in range(2, 10): f.add_arc('start', '14', [str(digit)], [kFRENCH_TRANS[digit]] + ['cent']) return f
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.add_state('next0') f1.add_state('next1') f1.add_state('next2') f1.add_state('next3') f1.add_state('next4') f1.add_state('next5') f1.add_state('next6') f1.initial_state = 'start' # Set all the final states f1.set_final('next0') f1.set_final('next1') f1.set_final('next2') f1.set_final('next3') f1.set_final('next4') f1.set_final('next5') f1.set_final('next6') group0 = [ 'a', 'e', 'h', 'i', 'o', 'u', 'w', 'y', 'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y' ] group1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V'] group2 = [ 'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z' ] group3 = ['d', 't', 'D', 'T'] group4 = ['l', 'L'] group5 = ['m', 'n', 'M', 'N'] group6 = ['r', 'R'] # Add the rest of the arcs for letter in string.letters: if (letter in group0): f1.add_arc('start', 'next0', (letter), (letter)) f1.add_arc('next', 'next0', (letter), ()) f1.add_arc('next0', 'next0', (letter), ()) f1.add_arc('next1', 'next0', (letter), ()) f1.add_arc('next2', 'next0', (letter), ()) f1.add_arc('next3', 'next0', (letter), ()) f1.add_arc('next4', 'next0', (letter), ()) f1.add_arc('next5', 'next0', (letter), ()) f1.add_arc('next6', 'next0', (letter), ()) elif (letter in group1): f1.add_arc('start', 'next1', (letter), (letter)) f1.add_arc('next', 'next1', (letter), ('1')) f1.add_arc('next1', 'next1', (letter), ()) f1.add_arc('next0', 'next1', (letter), ('1')) f1.add_arc('next2', 'next1', (letter), ('1')) f1.add_arc('next3', 'next1', (letter), ('1')) f1.add_arc('next4', 'next1', (letter), ('1')) f1.add_arc('next5', 'next1', (letter), ('1')) f1.add_arc('next6', 'next1', (letter), ('1')) elif (letter in group2): f1.add_arc('start', 'next2', (letter), (letter)) f1.add_arc('next', 'next2', (letter), ('2')) f1.add_arc('next2', 'next2', (letter), ()) f1.add_arc('next0', 'next2', (letter), ('2')) f1.add_arc('next1', 'next2', (letter), ('2')) f1.add_arc('next3', 'next2', (letter), ('2')) f1.add_arc('next4', 'next2', (letter), ('2')) f1.add_arc('next5', 'next2', (letter), ('2')) f1.add_arc('next6', 'next2', (letter), ('2')) elif (letter in group3): f1.add_arc('start', 'next3', (letter), (letter)) f1.add_arc('next', 'next3', (letter), ('3')) f1.add_arc('next3', 'next3', (letter), ()) f1.add_arc('next0', 'next3', (letter), ('3')) f1.add_arc('next1', 'next3', (letter), ('3')) f1.add_arc('next2', 'next3', (letter), ('3')) f1.add_arc('next4', 'next3', (letter), ('3')) f1.add_arc('next5', 'next3', (letter), ('3')) f1.add_arc('next6', 'next3', (letter), ('3')) elif (letter in group4): f1.add_arc('start', 'next4', (letter), (letter)) f1.add_arc('next', 'next4', (letter), ('4')) f1.add_arc('next4', 'next4', (letter), ()) f1.add_arc('next0', 'next4', (letter), ('4')) f1.add_arc('next1', 'next4', (letter), ('4')) f1.add_arc('next2', 'next4', (letter), ('4')) f1.add_arc('next3', 'next4', (letter), ('4')) f1.add_arc('next5', 'next4', (letter), ('4')) f1.add_arc('next6', 'next4', (letter), ('4')) elif (letter in group5): f1.add_arc('start', 'next5', (letter), (letter)) f1.add_arc('next', 'next5', (letter), ('5')) f1.add_arc('next5', 'next5', (letter), ()) f1.add_arc('next0', 'next5', (letter), ('5')) f1.add_arc('next1', 'next5', (letter), ('5')) f1.add_arc('next2', 'next5', (letter), ('5')) f1.add_arc('next3', 'next5', (letter), ('5')) f1.add_arc('next4', 'next5', (letter), ('5')) f1.add_arc('next6', 'next5', (letter), ('5')) elif (letter in group6): f1.add_arc('start', 'next6', (letter), (letter)) f1.add_arc('next', 'next6', (letter), ('6')) f1.add_arc('next6', 'next6', (letter), ()) f1.add_arc('next0', 'next6', (letter), ('6')) f1.add_arc('next1', 'next6', (letter), ('6')) f1.add_arc('next2', 'next6', (letter), ('6')) f1.add_arc('next3', 'next6', (letter), ('6')) f1.add_arc('next4', 'next6', (letter), ('6')) f1.add_arc('next5', 'next6', (letter), ('6')) return f1
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') f1.add_state('7') f1.add_state('8') f1.initial_state = '1' # Set all the final states f1.set_final('2') f1.set_final('3') f1.set_final('4') f1.set_final('5') f1.set_final('6') f1.set_final('7') f1.set_final('8') set1 = { 'a': 1, 'e': 1, 'i': 1, 'o': 1, 'u': 1, 'h': 1, 'w': 1, 'y': 1, 'A': 1, 'E': 1, 'I': 1, 'O': 1, 'U': 1, 'H': 1, 'W': 1, 'Y': 1 } set2 = {'b': 1, 'f': 1, 'p': 1, 'v': 1, 'B': 1, 'F': 1, 'P': 1, 'V': 1} set3 = { 'c': 1, 'g': 1, 'j': 1, 'k': 1, 'q': 1, 's': 1, 'x': 1, 'z': 1, 'C': 1, 'G': 1, 'J': 1, 'K': 1, 'Q': 1, 'S': 1, 'X': 1, 'Z': 1 } set4 = {'d': 1, 't': 1, 'D': 1, 'T': 1} set5 = {'l': 1, 'L': 1} set6 = {'m': 1, 'n': 1, 'M': 1, 'N': 1} set7 = {'r': 1, 'R': 1} # Add the rest of the arcs for letter in string.ascii_letters: if letter in set1: f1.add_arc('1', '2', (letter), (letter)) f1.add_arc('2', '2', (letter), ()) f1.add_arc('3', '2', (letter), ()) f1.add_arc('4', '2', (letter), ()) f1.add_arc('5', '2', (letter), ()) f1.add_arc('6', '2', (letter), ()) f1.add_arc('7', '2', (letter), ()) f1.add_arc('8', '2', (letter), ()) elif letter in set2: f1.add_arc('1', '3', (letter), (letter)) f1.add_arc('2', '3', (letter), ('1')) f1.add_arc('3', '3', (letter), ()) f1.add_arc('4', '3', (letter), ('1')) f1.add_arc('5', '3', (letter), ('1')) f1.add_arc('6', '3', (letter), ('1')) f1.add_arc('7', '3', (letter), ('1')) f1.add_arc('8', '3', (letter), ('1')) elif letter in set3: f1.add_arc('1', '4', (letter), (letter)) f1.add_arc('2', '4', (letter), ('2')) f1.add_arc('3', '4', (letter), ('2')) f1.add_arc('4', '4', (letter), ()) f1.add_arc('5', '4', (letter), ('2')) f1.add_arc('6', '4', (letter), ('2')) f1.add_arc('7', '4', (letter), ('2')) f1.add_arc('8', '4', (letter), ('2')) elif letter in set4: f1.add_arc('1', '5', (letter), (letter)) f1.add_arc('2', '5', (letter), ('3')) f1.add_arc('3', '5', (letter), ('3')) f1.add_arc('4', '5', (letter), ('3')) f1.add_arc('5', '5', (letter), ()) f1.add_arc('6', '5', (letter), ('3')) f1.add_arc('7', '5', (letter), ('3')) f1.add_arc('8', '5', (letter), ('3')) elif letter in set5: f1.add_arc('1', '6', (letter), (letter)) f1.add_arc('2', '6', (letter), ('4')) f1.add_arc('3', '6', (letter), ('4')) f1.add_arc('4', '6', (letter), ('4')) f1.add_arc('5', '6', (letter), ('4')) f1.add_arc('6', '6', (letter), ()) f1.add_arc('7', '6', (letter), ('4')) f1.add_arc('8', '6', (letter), ('4')) elif letter in set6: f1.add_arc('1', '7', (letter), (letter)) f1.add_arc('2', '7', (letter), ('5')) f1.add_arc('3', '7', (letter), ('5')) f1.add_arc('4', '7', (letter), ('5')) f1.add_arc('5', '7', (letter), ('5')) f1.add_arc('6', '7', (letter), ('5')) f1.add_arc('8', '7', (letter), ('5')) elif letter in set7: f1.add_arc('1', '8', (letter), (letter)) f1.add_arc('2', '8', (letter), ('6')) f1.add_arc('3', '8', (letter), ('6')) f1.add_arc('4', '8', (letter), ('6')) f1.add_arc('5', '8', (letter), ('6')) f1.add_arc('6', '8', (letter), ('6')) f1.add_arc('7', '8', (letter), ('6')) f1.add_arc('8', '8', (letter), ()) return f1
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('1') f3.add_state('1a') f3.add_state('1b') f3.add_state('2') f3.initial_state = '1' # The soundex string will either need no padding in which case its final # state is 1, or it will need 1 to 3 zeros and have final state 2 f3.set_final('1') f3.set_final('2') f3.add_arc('1', '2', (), ('000')) f3.add_arc('1a', '2', (), ('00')) f3.add_arc('1b', '2', (), ('0')) for letter in string.letters: f3.add_arc('1', '1', (letter), (letter)) for number in xrange(10): f3.add_arc('1', '1a', (str(number)), (str(number))) for number in xrange(10): f3.add_arc('1a', '1b', (str(number)), (str(number))) for number in xrange(10): f3.add_arc('1b', '2', (str(number)), (str(number))) return f3
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') letter_groups = [['b','f','p','v','B','F','P','V'],['c','C', 'g','G','J', 'j', 'K','k','Q', 'q','S', 's','X', 'x', 'Z','z'],['d','D','T','t'],['L','l'],['M','N','m','n'],['R','r']] vowels = ['a','e','i','o','u','w','y','h','A','E','I','O','U','W','Y','H'] states_num = len(letter_groups) f1.add_state('start') f1.add_state('vowels') f1.set_final('vowels') for i in range(states_num) : f1.add_state(i) f1.set_final(i) f1.initial_state = 'start' # Add the rest of the arcs # f1.add_arc('vowels','start',(),()) for letter in string.ascii_letters: if letter in vowels : f1.add_arc('start','vowels',(letter),(letter)) #first char is vowel f1.add_arc('vowels','vowels',(letter),()) #ignoring consecutive vowels iin start for i in range(states_num) : f1.add_arc(i,'vowels',(letter),()) else : for conso_state in range(states_num): if letter in letter_groups[conso_state] : f1.add_arc('start',conso_state,(letter),(letter)) f1.add_arc('vowels',conso_state,(letter),(str(conso_state+1)[0])) f1.add_arc(conso_state,conso_state,(letter),()) for other_conso_state in range(states_num): if other_conso_state != conso_state : f1.add_arc(other_conso_state,conso_state,(letter),(str(conso_state+1)[0])) return f1
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') char_removal = [ 'a', 'e', 'h', 'i', 'o', 'u', 'w', 'y', 'A', 'E', 'I', 'O', 'U', 'W', 'Y' ] char_replace1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V'] char_replace2 = [ 'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z' ] char_replace3 = ['d', 't', 'D', 'T'] char_replace4 = ['l', 'L'] char_replace5 = ['m', 'n', 'M', 'N'] char_replace6 = ['r', 'R'] #indicate all the states f1.add_state('start') f1.add_state('next') f1.add_state('rp1') f1.add_state('rp2') f1.add_state('rp3') f1.add_state('rp4') f1.add_state('rp5') f1.add_state('rp6') #indicate the initial and final state f1.initial_state = 'start' f1.set_final('next') f1.set_final('rp1') f1.set_final('rp2') f1.set_final('rp3') f1.set_final('rp4') f1.set_final('rp5') f1.set_final('rp6') #Add the arcs for letter in char_removal: f1.add_arc('start', 'next', (letter), (letter)) f1.add_arc('next', 'next', (letter), ()) for letter in char_replace1: f1.add_arc('start', 'rp1', (letter), (letter)) f1.add_arc('next', 'rp1', (letter), ('1')) f1.add_arc('rp1', 'rp1', (letter), ()) for letter in char_removal: f1.add_arc('rp1', 'rp1', (letter), ()) for letter in char_replace2: f1.add_arc('rp1', 'rp2', (letter), ('2')) for letter in char_replace3: f1.add_arc('rp1', 'rp3', (letter), ('3')) for letter in char_replace4: f1.add_arc('rp1', 'rp4', (letter), ('4')) for letter in char_replace5: f1.add_arc('rp1', 'rp5', (letter), ('5')) for letter in char_replace6: f1.add_arc('rp1', 'rp6', (letter), ('6')) ##end of rp1 to all rp6 connections for letter in char_replace2: f1.add_arc('start', 'rp2', (letter), (letter)) f1.add_arc('next', 'rp2', (letter), ('2')) f1.add_arc('rp2', 'rp2', (letter), ()) for letter in char_removal: f1.add_arc('rp2', 'rp2', (letter), ()) for letter in char_replace1: f1.add_arc('rp2', 'rp1', (letter), ('1')) for letter in char_replace3: f1.add_arc('rp2', 'rp3', (letter), ('3')) for letter in char_replace4: f1.add_arc('rp2', 'rp4', (letter), ('4')) for letter in char_replace5: f1.add_arc('rp2', 'rp5', (letter), ('5')) for letter in char_replace6: f1.add_arc('rp2', 'rp6', (letter), ('6')) ##end of rp2 to all rp6 connections for letter in char_replace3: f1.add_arc('start', 'rp3', (letter), (letter)) f1.add_arc('next', 'rp3', (letter), ('3')) f1.add_arc('rp3', 'rp3', (letter), ()) for letter in char_removal: f1.add_arc('rp3', 'rp3', (letter), ()) for letter in char_replace1: f1.add_arc('rp3', 'rp1', (letter), ('1')) for letter in char_replace2: f1.add_arc('rp3', 'rp2', (letter), ('2')) for letter in char_replace4: f1.add_arc('rp3', 'rp4', (letter), ('4')) for letter in char_replace5: f1.add_arc('rp3', 'rp5', (letter), ('5')) for letter in char_replace6: f1.add_arc('rp3', 'rp6', (letter), ('6')) ##end of rp3 to all rp6 connections for letter in char_replace4: f1.add_arc('start', 'rp4', (letter), (letter)) f1.add_arc('next', 'rp4', (letter), ('4')) f1.add_arc('rp4', 'rp4', (letter), ()) for letter in char_removal: f1.add_arc('rp4', 'rp4', (letter), ()) for letter in char_replace1: f1.add_arc('rp4', 'rp1', (letter), ('1')) for letter in char_replace2: f1.add_arc('rp4', 'rp2', (letter), ('2')) for letter in char_replace3: f1.add_arc('rp4', 'rp3', (letter), ('3')) for letter in char_replace5: f1.add_arc('rp4', 'rp5', (letter), ('5')) for letter in char_replace6: f1.add_arc('rp4', 'rp6', (letter), ('6')) ##end of rp4 to all rp6 connections for letter in char_replace5: f1.add_arc('start', 'rp5', (letter), (letter)) f1.add_arc('next', 'rp5', (letter), ('5')) f1.add_arc('rp5', 'rp5', (letter), ()) for letter in char_removal: f1.add_arc('rp5', 'rp5', (letter), ()) for letter in char_replace1: f1.add_arc('rp5', 'rp1', (letter), ('1')) for letter in char_replace2: f1.add_arc('rp5', 'rp2', (letter), ('2')) for letter in char_replace3: f1.add_arc('rp5', 'rp3', (letter), ('3')) for letter in char_replace4: f1.add_arc('rp5', 'rp4', (letter), ('4')) for letter in char_replace6: f1.add_arc('rp5', 'rp6', (letter), ('6')) ##end of rp5 to all rp6 connections for letter in char_replace6: f1.add_arc('start', 'rp6', (letter), (letter)) f1.add_arc('next', 'rp6', (letter), ('6')) f1.add_arc('rp6', 'rp6', (letter), ()) for letter in char_removal: f1.add_arc('rp6', 'rp6', (letter), ()) for letter in char_replace1: f1.add_arc('rp6', 'rp1', (letter), ('1')) for letter in char_replace2: f1.add_arc('rp6', 'rp2', (letter), ('2')) for letter in char_replace3: f1.add_arc('rp6', 'rp3', (letter), ('3')) for letter in char_replace4: f1.add_arc('rp6', 'rp4', (letter), ('4')) for letter in char_replace5: f1.add_arc('rp6', 'rp5', (letter), ('5')) ##end of rp6 to all rp6 connections return f1
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' #create 0,1,2,3,4,5,6 states for all classes for x in range(0,7): f1.add_state(str(x)) f1.set_final(str(x)) list_0 = ['a','e','h','i','o','u','w','y'] list_1 = ['b','f','p','v'] list_2 = ['c','g','j','k','q','s','x','z'] list_3 = ['d','t'] list_4 = ['l'] list_5 = ['m','n'] list_6 = ['r'] all_lists = [list_0,list_1,list_2,list_3,list_4,list_5,list_6] # Set all the final states for index,item in enumerate(all_lists): for letter in item: f1.add_arc('start',str(index),(letter),(letter)) f1.add_arc('start',str(index),(letter.upper()),(letter.upper())) for x in range(0,7): for index,item in enumerate(all_lists): for letter in item: if x == index: f1.add_arc(str(x),str(index),(letter),()) f1.add_arc(str(x),str(index),(letter.upper()),()) elif index == 0: f1.add_arc(str(x),str(index),(letter),()) f1.add_arc(str(x),str(index),(letter.upper()),()) else: f1.add_arc(str(x),str(index),(letter),(str(index))) f1.add_arc(str(x),str(index),(letter.upper()),(str(index))) return f1
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('1') f3.add_state('1a') f3.add_state('1b') f3.add_state('2') f3.initial_state = '1' f3.set_final('2') for letter in string.letters: f3.add_arc('1', '1', (letter), (letter)) f3.add_arc('1', '1a', (), ('0')) for number in xrange(10): f3.add_arc('1', '1a', (str(number)), (str(number))) f3.add_arc('1a', '1b', (), ('0')) for number in xrange(10): f3.add_arc('1a', '1b', (str(number)), (str(number))) f3.add_arc('1b', '2', (), ('0')) for number in xrange(10): f3.add_arc('1b', '2', (str(number)), (str(number))) return f3
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('start') f3.initial_state = 'start' for x in range(4): f3.add_state(str(x)) f3.set_final(str(3)) # Add the arcs f3.add_arc(str(0),str(1),(''),('0')) f3.add_arc('start','1',(''),('0')) f3.add_arc(str(1),str(2),(''),('0')) f3.add_arc(str(2),str(3),(''),('0')) for letter in string.letters: f3.add_arc('start', '0', (letter), (letter)) for n in range(10): f3.add_arc('start','1',(str(n)),(str(n))) for x in range(3): for n in range(10): f3.add_arc(str(x), str(x+1), (str(n)), (str(n))) for n in range(10): f3.add_arc(str(3),str(3),(str(n)),()) ''' f3.add_state('1') f3.add_state('1a') f3.add_state('1b') f3.add_state('2') f3.initial_state = '1' f3.set_final('2') for letter in string.letters: f3.add_arc('1', '1', (letter), (letter)) for number in xrange(10): f3.add_arc('1', '1', (str(number)), (str(number))) f3.add_arc('1', '1a', (), ('0')) f3.add_arc('1a', '1b', (), ('0')) f3.add_arc('1b', '2', (), ('0')) ''' return f3
def generate(self, analysis): """Generate the morphologically correct word e.g. p = Parser() analysis = ['p','a','n','i','c','+past form'] p.generate(analysis) ---> 'panicked' """ # Let's define our first FST f1 = FST('morphology-generate') # Indicate initial and final states f1.add_state('start') f1.add_state('vowel') f1.add_state('consonant') f1.add_state('c') f1.add_state('form_1') f1.add_state('form_2') f1.initial_state = 'start' f1.set_final('form_1') f1.set_final('form_2') # Generate vowels = 'aeiou' for vowel in vowels: f1.add_arc('start', 'vowel', vowel, vowel) f1.add_arc('vowel', 'vowel', vowel, vowel) f1.add_arc('consonant', 'vowel', vowel, vowel) f1.add_arc('c', 'vowel', vowel, vowel) for letter in string.ascii_lowercase: if letter in vowels: continue if not letter == 'c': f1.add_arc('vowel', 'consonant', letter, letter) f1.add_arc('start', 'consonant', letter, letter) f1.add_arc('consonant', 'consonant', letter, letter) f1.add_arc('c', 'consonant', letter, letter) f1.add_arc('vowel', 'c', 'c', 'c') f1.add_arc('c', 'form_1', '+past form', 'ked') f1.add_arc('c', 'form_1', '+present participle form', 'king') f1.add_arc('consonant', 'form_2', '+past form', 'ed') f1.add_arc('consonant', 'form_2', '+present participle form', 'ing') output = f1.transduce(analysis)[0] return "".join(output)
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ vowels = 'aeiouwhyAEIOUWHY' q1 = 'BFPVbfpv' q2 = 'CGJKQSXZcgjkqsxz' q3 = 'dtDT' q4 = 'lL' q5 = 'mnMN' q6 = 'rR' # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state '''f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in string.ascii_lowercase: f1.add_arc('start', 'next', (letter), (letter)) f1.add_arc('next', 'next', (letter), ('0')) return f1''' f1.add_state('0') f1.add_state('1') f1.add_state('v') f1.add_state('q1') f1.add_state('q2') f1.add_state('q3') f1.add_state('q4') f1.add_state('q5') f1.add_state('q6') f1.initial_state = '0' f1.set_final('1') f1.set_final('v') f1.set_final('q1') f1.set_final('q2') f1.set_final('q3') f1.set_final('q4') f1.set_final('q5') f1.set_final('q6') for letter in string.ascii_letters: f1.add_arc('0', '1', (letter), (letter)) if letter in vowels: f1.add_arc('1', 'v', (letter), ()) f1.add_arc('v', 'v', (letter), ()) f1.add_arc('q1', 'v', (letter), ()) f1.add_arc('q2', 'v', (letter), ()) f1.add_arc('q3', 'v', (letter), ()) f1.add_arc('q4', 'v', (letter), ()) f1.add_arc('q5', 'v', (letter), ()) f1.add_arc('q6', 'v', (letter), ()) if letter in q1: f1.add_arc('1', 'q1', (letter), ('1')) f1.add_arc('v', 'q1', (letter), ('1')) f1.add_arc('q1','q1', (letter), ()) f1.add_arc('q2','q1', (letter), ('1')) f1.add_arc('q3', 'q1', (letter), ('1')) f1.add_arc('q4', 'q1', (letter), ('1')) f1.add_arc('q5', 'q1', (letter), ('1')) f1.add_arc('q6', 'q1', (letter), ('1')) if letter in q2: f1.add_arc('1', 'q2', (letter), ('2')) f1.add_arc('v', 'q2', (letter), ('2')) f1.add_arc('q1','q2', (letter), ('2')) f1.add_arc('q2','q2', (letter), ()) f1.add_arc('q3', 'q2', (letter), ('2')) f1.add_arc('q4', 'q2', (letter), ('2')) f1.add_arc('q5', 'q2', (letter), ('2')) f1.add_arc('q6', 'q2', (letter), ('2')) if letter in q3: f1.add_arc('1', 'q3', (letter), ('3')) f1.add_arc('v', 'q3', (letter), ('3')) f1.add_arc('q1','q3', (letter), ('3')) f1.add_arc('q2','q3', (letter), ('3')) f1.add_arc('q3', 'q3', (letter), ()) f1.add_arc('q4', 'q3', (letter), ('3')) f1.add_arc('q5', 'q3', (letter),('3')) f1.add_arc('q6', 'q3', (letter), ('3')) if letter in q4: f1.add_arc('1', 'q4', (letter), ('4')) f1.add_arc('v', 'q4', (letter), ('4')) f1.add_arc('q1','q4', (letter), ('4')) f1.add_arc('q2','q4', (letter), ('4')) f1.add_arc('q3', 'q4', (letter), ('4')) f1.add_arc('q4', 'q4', (letter), ()) f1.add_arc('q5', 'q4', (letter), ('4')) f1.add_arc('q6', 'q4', (letter), ('4')) if letter in q5: f1.add_arc('1', 'q5', (letter), ('5')) f1.add_arc('v', 'q5', (letter), ('5')) f1.add_arc('q1','q5', (letter), ('5')) f1.add_arc('q2','q5', (letter), ('5')) f1.add_arc('q3','q5', (letter), ('5')) f1.add_arc('q4','q5', (letter), ('5')) f1.add_arc('q5', 'q5', (letter), ()) f1.add_arc('q6', 'q5', (letter), ('5')) if letter in q6: f1.add_arc('1', 'q6', (letter), ('6')) f1.add_arc('v', 'q6', (letter), ('6')) f1.add_arc('q1','q6', (letter), ('6')) f1.add_arc('q2','q6', (letter), ('6')) f1.add_arc('q3', 'q6', (letter), ('6')) f1.add_arc('q4', 'q6', (letter), ('6')) f1.add_arc('q5', 'q6', (letter), ('6')) f1.add_arc('q6', 'q6', (letter), ()) return f1
if __name__ == '__main__': f1 = FST('test-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in ['A', 'B', 'C', 'D']: f1.add_arc('start', 'next', letter, '1') f1.add_arc('next', 'next', letter, '0') f2 = FST('test-generate') f2.add_state('start') f2.add_state('next') f2.initial_state = 'start' f2.set_final('next') f2.add_arc('start', 'next', '1', 'a') f2.add_arc('start', 'next', '1', 'an') f2.add_arc('next', 'next', '0', 'b') output = compose(tuple('BAD'), f1, f2) print output for o in output:
def french_count(): f = FST('french') f.add_state('start') f.add_state('final') f.add_state('0XX') f.add_state('00X') f.add_state('01X') f.add_state('XX') f.add_state('7X') f.add_state('8X') f.add_state('9X') f.add_state('NXX') f.add_state('N0X') f.initial_state = 'start' f.set_final('final') # single digit numbers for ii in xrange(10): if ii == 0: f.add_arc('start', '0XX', [str(ii)], ()) f.add_arc('0XX', '00X', [str(ii)], ()) f.add_arc('00X', 'final', [str(ii)], [kFRENCH_TRANS[ii]]) # 10-19 f.add_arc('0XX', '01X', [str(1)], ()) for ii in xrange(10): if ii < 7: f.add_arc('01X', 'final', [str(ii)], [kFRENCH_TRANS[ii+10]]) else: f.add_arc('01X', 'final', [str(ii)], [kFRENCH_TRANS[10],kFRENCH_TRANS[ii]]) # 20-69 for ii in xrange(2,7): f.add_arc('0XX', 'XX', [str(ii)], [kFRENCH_TRANS[ii*10]]) # dont add anything that ends in 0...e.g. 20, 30, 40, 50, 60 f.add_arc('XX', 'final', [str(0)], ()) # add "and" to numbers if number is 21,31,41,51,61 f.add_arc('XX', 'final', [str(1)], [kFRENCH_AND,kFRENCH_TRANS[1]]) for ii in xrange(2,10): f.add_arc('XX', 'final', [str(ii)], [kFRENCH_TRANS[ii]]) # 70s f.add_arc('0XX', '7X', [str(7)], [kFRENCH_TRANS[60]]) for ii in xrange(10): if ii == 0: f.add_arc('7X', 'final', [str(ii)], [kFRENCH_TRANS[10]]) elif ii == 1: f.add_arc('7X', 'final', [str(ii)], [kFRENCH_AND,kFRENCH_TRANS[11]]) elif 2 <= ii <= 6: f.add_arc('7X', 'final', [str(ii)], [kFRENCH_TRANS[ii+10]]) else: f.add_arc('7X', 'final', [str(ii)], [kFRENCH_TRANS[10],kFRENCH_TRANS[ii]]) # 80s f.add_arc('0XX', '8X', [str(8)], [kFRENCH_TRANS[4],kFRENCH_TRANS[20]]) for ii in xrange(10): if ii == 0: f.add_arc('8X', 'final', [str(ii)], ()) else: f.add_arc('8X', 'final', [str(ii)], [kFRENCH_TRANS[ii]]) # 90s f.add_arc('0XX', '9X', [str(9)], [kFRENCH_TRANS[4],kFRENCH_TRANS[20]]) for ii in xrange(10): if ii < 7: f.add_arc('9X', 'final', [str(ii)], [kFRENCH_TRANS[ii+10]]) else: f.add_arc('9X', 'final', [str(ii)], [kFRENCH_TRANS[10],kFRENCH_TRANS[ii]]) # X00-X09 for ii in xrange(1,10): if ii == 1: f.add_arc('start', 'NXX', [str(ii)], [kFRENCH_TRANS[100]]) else: f.add_arc('start', 'NXX', [str(ii)], [kFRENCH_TRANS[ii],kFRENCH_TRANS[100]]) f.add_arc('NXX', 'N0X', [str(0)], ()) for ii in xrange(10): if ii == 0: f.add_arc('N0X', 'final', [str(0)], ()) else: f.add_arc('N0X', 'final', [str(ii)], [kFRENCH_TRANS[ii]]) # X10-X19 f.add_arc('NXX', '01X', [str(1)], ()) # X20-X69 for ii in xrange(2,7): f.add_arc('NXX', 'XX', [str(ii)], [kFRENCH_TRANS[ii*10]]) # X70s, X80s, X90s f.add_arc('NXX', '7X', [str(7)], [kFRENCH_TRANS[60]]) f.add_arc('NXX', '8X', [str(8)], [kFRENCH_TRANS[4],kFRENCH_TRANS[20]]) f.add_arc('NXX', '9X', [str(9)], [kFRENCH_TRANS[4],kFRENCH_TRANS[20]]) return f
def french_count(): f = FST('french') f.add_state('0') f.add_state('1') f.add_state('2') f.add_state('3') f.add_state('4') f.add_state('5') f.add_state('6') f.add_state('7') f.add_state('8') f.add_state('9') f.add_state('10') f.add_state('11') f.add_state('12') f.add_state('13') f.add_state('14') f.add_state('15') f.add_state('16') f.add_state('17') f.add_state('18') f.add_state('19') f.add_state('20') f.add_state('21') f.add_state('22') f.add_state('23') f.add_state('24') f.add_state('25') f.initial_state = '0' f.set_final('1') f.set_final('3') f.set_final('6') f.set_final('7') f.set_final('8') f.set_final('9') f.set_final('11') f.set_final('13') f.set_final('14') f.set_final('18') f.set_final('20') zero = [0] one = [1] two_to_six = [2,3,4,5,6] one_to_six = [1,2,3,4,5,6] seven = [7] seven_eight_nine = [7,8,9] eight = [8] nine = [9] singles_all = [1,2,3,4,5,6,7,8,9] singles = [2,3,4,5,6,7,8,9] tens = [20,30,40,50] # Edge from initial to final, if preceding zero in input for i in zero: # f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]]) f.add_arc('0','0', str(i), ()) f.add_arc('4','6', str(i), ()) f.add_arc('5','8', str(i), ()) f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]]) f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]]) f.add_arc('12','13', str(i), [kFRENCH_TRANS[20]]) f.add_arc('16','18', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[10]]) f.add_arc('17','19', str(i), ()) f.add_arc('19','9', str(i), ()) for i in one: f.add_arc('0','2', str(i), ()) f.add_arc('17','2', str(i), ()) f.add_arc('0','17', str(i), [kFRENCH_TRANS[100]]) f.add_arc('0','5', str(i), [kFRENCH_TRANS[i*10]]) f.add_arc('17','5', str(i), [kFRENCH_TRANS[i*10]]) f.add_arc('4','7', str(i), [kFRENCH_AND, kFRENCH_TRANS[i]]) f.add_arc('10','11', str(i), [kFRENCH_AND, kFRENCH_TRANS[i+10]]) f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i]]) f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i+10]]) for i in one_to_six: f.add_arc('2','3', str(i), [kFRENCH_TRANS[i+10]]) for i in two_to_six: f.add_arc('0','4', str(i), [kFRENCH_TRANS[i*10]]) f.add_arc('17','4', str(i), [kFRENCH_TRANS[i*10]]) f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]]) f.add_arc('16','20', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[i+10]]) for i in singles: f.add_arc('4','7', str(i), [kFRENCH_TRANS[i]]) f.add_arc('0','17', str(i), [kFRENCH_TRANS[i],kFRENCH_TRANS[100]]) f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[i]]) for i in singles_all: f.add_arc('0','1', str(i), [kFRENCH_TRANS[i]]) f.add_arc('19','1', str(i), [kFRENCH_TRANS[i]]) for i in seven_eight_nine: f.add_arc('5','8', str(i), [kFRENCH_TRANS[i]]) f.add_arc('10','11', str(i), [kFRENCH_TRANS[10], kFRENCH_TRANS[i]]) f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[10], kFRENCH_TRANS[i]]) for i in seven: f.add_arc('0','10',str(i), [kFRENCH_TRANS[60]]) f.add_arc('17','10',str(i), [kFRENCH_TRANS[60]]) for i in eight: f.add_arc('0','12',str(i), [kFRENCH_TRANS[4]]) f.add_arc('17','12',str(i), [kFRENCH_TRANS[4]]) for i in nine: f.add_arc('0','16',str(i), [kFRENCH_TRANS[4]]) f.add_arc('17','16',str(i), [kFRENCH_TRANS[4]]) return f
return output_list if __name__ == '__main__': f1 = FST('test-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in ['A','B','C','D']: f1.add_arc('start', 'next', letter, '1') f1.add_arc('next', 'next', letter, '0') f2 = FST('test-generate') f2.add_state('start') f2.add_state('next') f2.initial_state = 'start' f2.set_final('next') f2.add_arc('start', 'next', '1', 'a') f2.add_arc('start', 'next', '1', 'an') f2.add_arc('next', 'next', '0', 'b') output = compose(tuple('BAD'), f1, f2) print output for o in output:
def french_count(): f = FST('french') f.add_state('start') f.initial_state = 'start' f.add_state('1stzero') f.add_state('tens') f.add_state('seventeen') f.add_state('final_seventeen') f.add_state('eighteen') f.add_state('final_eighteen') f.add_state('nineteen') f.add_state('final_nineteen') f.add_state('zero') f.add_state('ones') f.add_state('20-69') f.add_state('70-ten') f.add_state('80s') f.add_state('90s') f.add_state('100s') f.add_state('et') f.add_state('10-et') f.add_state('et-un') f.add_state('et-onze') f.set_final('zero') f.set_final('ones') f.set_final('tens') f.set_final('final_seventeen') f.set_final('final_eighteen') f.set_final('final_nineteen') f.set_final('20-69') f.set_final('70-ten') f.set_final('80s') f.set_final('90s') f.set_final('et-un') f.set_final('et-onze') # 100 - 999 f.add_arc('start', '1stzero', '1', [kFRENCH_TRANS[100]]) for i in range(2, 10): f.add_arc('start', '100s', str(i), [kFRENCH_TRANS[i]]) f.add_arc('100s', '1stzero', (), [kFRENCH_TRANS[100]]) # 0 - 9 f.add_arc('start', '1stzero', '0', []) f.add_arc('1stzero', 'ones', '0', []) for ii in range(1, 10): f.add_arc('ones', 'ones', str(ii), [kFRENCH_TRANS[ii]]) f.add_arc('ones', 'ones', '0', []) # for i in range(10): # f.add_arc('ten-6', 'ten-6', str(i), kFRENCH_TRANS[(i+10]) # 10 - 16 f.add_arc('1stzero', 'tens', '1', []) f.add_arc('tens', 'tens', '0', [kFRENCH_TRANS[10]]) f.add_arc('tens', 'tens', '1', [kFRENCH_TRANS[11]]) f.add_arc('tens', 'tens', '2', [kFRENCH_TRANS[12]]) f.add_arc('tens', 'tens', '3', [kFRENCH_TRANS[13]]) f.add_arc('tens', 'tens', '4', [kFRENCH_TRANS[14]]) f.add_arc('tens', 'tens', '5', [kFRENCH_TRANS[15]]) f.add_arc('tens', 'tens', '6', [kFRENCH_TRANS[16]]) f.add_arc('tens', 'seventeen', '7', [kFRENCH_TRANS[10]]) f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]]) f.add_arc('tens', 'eighteen', '8', [kFRENCH_TRANS[10]]) f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]]) f.add_arc('tens', 'nineteen', '9', [kFRENCH_TRANS[10]]) f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]]) # 20 - 69 f.add_arc('1stzero', '20-69', '2', [kFRENCH_TRANS[20]]) f.add_arc('1stzero', '20-69', '3', [kFRENCH_TRANS[30]]) f.add_arc('1stzero', '20-69', '4', [kFRENCH_TRANS[40]]) f.add_arc('1stzero', '20-69', '5', [kFRENCH_TRANS[50]]) f.add_arc('1stzero', '20-69', '6', [kFRENCH_TRANS[60]]) # special cases: for i in range(2, 10): f.add_arc('20-69', '20-69', str(i), [kFRENCH_TRANS[i]]) # handles 20, 30 ... 60 for i in range(20, 60, 10): f.add_arc('20-69', '20-69', '0', []) # handles 21, 31, ... 61 f.add_arc('20-69', 'et', '1', [kFRENCH_AND]) f.add_arc('et', 'et-un', (),[kFRENCH_TRANS[1]]) # 70 - 79 f.add_arc('1stzero', '70-ten', '7', [kFRENCH_TRANS[60]]) f.add_arc('70-ten', '70-ten', '0', [kFRENCH_TRANS[10]]) # handle 71 here f.add_arc('70-ten', '10-et', '1', [kFRENCH_AND]) f.add_arc('10-et', 'et-onze', (),[kFRENCH_TRANS[11]]) f.add_arc('70-ten', '70-ten', '2', [kFRENCH_TRANS[12]]) f.add_arc('70-ten', '70-ten', '3', [kFRENCH_TRANS[13]]) f.add_arc('70-ten', '70-ten', '4', [kFRENCH_TRANS[14]]) f.add_arc('70-ten', '70-ten', '5', [kFRENCH_TRANS[15]]) f.add_arc('70-ten', '70-ten', '6', [kFRENCH_TRANS[16]]) f.add_arc('70-ten', 'seventeen', '7', [kFRENCH_TRANS[10]]) f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]]) f.add_arc('70-ten', 'eighteen', '8', [kFRENCH_TRANS[10]]) f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]]) f.add_arc('70-ten', 'nineteen', '9', [kFRENCH_TRANS[10]]) f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]]) # 80 - 89 f.add_arc('1stzero', '80s', '8', [kFRENCH_TRANS[4]]) f.add_arc('80s', 'ones', (), [kFRENCH_TRANS[20]]) f.add_arc('80s', '80s', '0', [kFRENCH_TRANS[20]]) # 90 - 99 f.add_arc('1stzero', '90s', '9', [kFRENCH_TRANS[4]]) f.add_arc('90s', 'tens', (), [kFRENCH_TRANS[20]]) return f
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') aeoy = ['a','e','h','i','o','u','w','y'] one = ['b','f','p','v'] two = ['c','g','j','k','q','s','x','z'] three = ['d','t'] four = ['l'] five = ['m','n'] six = ['r'] # Indicate that '1' is the initial state f1.add_state('initial') f1.add_state('0') f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') f1.initial_state = 'initial' # Set all the final states f1.set_final('0') f1.set_final('1') f1.set_final('2') f1.set_final('3') f1.set_final('4') f1.set_final('5') f1.set_final('6') # Add the rest of the arcs for letter in string.ascii_letters: f1.add_arc('initial','0',(letter),(letter)) if letter in aeoy: f1.add_arc('0','0', (letter), ()) f1.add_arc('1','0', (letter), ()) f1.add_arc('2','0', (letter), ()) f1.add_arc('3','0', (letter), ()) f1.add_arc('4','0', (letter), ()) f1.add_arc('5','0', (letter), ()) f1.add_arc('6','0', (letter), ()) else: if letter in one: f1.add_arc('0','1', (letter), '1') f1.add_arc('2','1', (letter), '1') f1.add_arc('3','1', (letter), '1') f1.add_arc('4','1', (letter), '1') f1.add_arc('5','1', (letter), '1') f1.add_arc('6','1', (letter), '1') f1.add_arc('1','0', (letter), ()) if letter in two: f1.add_arc('0','2', (letter), '2') f1.add_arc('1','2', (letter), '2') f1.add_arc('3','2', (letter), '2') f1.add_arc('4','2', (letter), '2') f1.add_arc('5','2', (letter), '2') f1.add_arc('6','2', (letter), '2') f1.add_arc('2','0', (letter), ()) if letter in three: f1.add_arc('0','3', (letter), '3') f1.add_arc('1','3', (letter), '3') f1.add_arc('2','3', (letter), '3') f1.add_arc('4','3', (letter), '3') f1.add_arc('5','3', (letter), '3') f1.add_arc('6','3', (letter), '3') f1.add_arc('3','0', (letter), ()) if letter in four: f1.add_arc('0','4', (letter), '4') f1.add_arc('1','4', (letter), '4') f1.add_arc('2','4', (letter), '4') f1.add_arc('3','4', (letter), '4') f1.add_arc('5','4', (letter), '4') f1.add_arc('6','4', (letter), '4') f1.add_arc('4','0', (letter), ()) if letter in five: f1.add_arc('0','5', (letter), '5') f1.add_arc('1','5', (letter), '5') f1.add_arc('2','5', (letter), '5') f1.add_arc('3','5', (letter), '5') f1.add_arc('4','5', (letter), '5') f1.add_arc('6','5', (letter), '5') f1.add_arc('5','0', (letter), ()) if letter in six: f1.add_arc('0','6', (letter), '6') f1.add_arc('1','6', (letter), '6') f1.add_arc('2','6', (letter), '6') f1.add_arc('3','6', (letter), '6') f1.add_arc('4','6', (letter), '6') f1.add_arc('5','6', (letter), '6') f1.add_arc('6','0', (letter), ()) return f1
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') f1.add_state('v') f1.initial_state = 'start' # Set all the final states f1.set_final('1') f1.set_final('2') f1.set_final('3') f1.set_final('4') f1.set_final('5') f1.set_final('6') f1.set_final('v') # Add the rest of the arcs #have loops for each of these lists list1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V'] list2 = [ 'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z' ] list3 = ['d', 't', 'D', 'T'] list4 = ['l', 'L'] list5 = ['m', 'n', 'M', 'N'] list6 = ['r', 'R'] listv = [ 'a', 'e', 'i', 'o', 'u', 'h', 'w', 'y', 'A', 'E', 'I', 'O', 'U', 'H', 'W', 'Y' ] for letter in list1: f1.add_arc('start', '1', (letter), (letter)) f1.add_arc('1', '1', (letter), ()) f1.add_arc('2', '1', (letter), '1') f1.add_arc('3', '1', (letter), '1') f1.add_arc('4', '1', (letter), '1') f1.add_arc('5', '1', (letter), '1') f1.add_arc('6', '1', (letter), '1') f1.add_arc('v', '1', (letter), '1') for letter in list2: f1.add_arc('start', '2', (letter), (letter)) f1.add_arc('1', '2', (letter), '2') f1.add_arc('2', '2', (letter), ()) f1.add_arc('3', '2', (letter), '2') f1.add_arc('4', '2', (letter), '2') f1.add_arc('5', '2', (letter), '2') f1.add_arc('6', '2', (letter), '2') f1.add_arc('v', '2', (letter), '2') for letter in list3: f1.add_arc('start', '3', (letter), (letter)) f1.add_arc('1', '3', (letter), '3') f1.add_arc('2', '3', (letter), '3') f1.add_arc('3', '3', (letter), ()) f1.add_arc('4', '3', (letter), '3') f1.add_arc('5', '3', (letter), '3') f1.add_arc('6', '3', (letter), '3') f1.add_arc('v', '3', (letter), '3') for letter in list3: f1.add_arc('start', '3', (letter), (letter)) f1.add_arc('1', '3', (letter), '3') f1.add_arc('2', '3', (letter), '3') f1.add_arc('3', '3', (letter), ()) f1.add_arc('4', '3', (letter), '3') f1.add_arc('5', '3', (letter), '3') f1.add_arc('6', '3', (letter), '3') f1.add_arc('v', '3', (letter), '3') for letter in list4: f1.add_arc('start', '4', (letter), (letter)) f1.add_arc('1', '4', (letter), '4') f1.add_arc('2', '4', (letter), '4') f1.add_arc('3', '4', (letter), '4') f1.add_arc('4', '4', (letter), ()) f1.add_arc('5', '4', (letter), '4') f1.add_arc('6', '4', (letter), '4') f1.add_arc('v', '4', (letter), '4') for letter in list5: f1.add_arc('start', '5', (letter), (letter)) f1.add_arc('1', '5', (letter), '5') f1.add_arc('2', '5', (letter), '5') f1.add_arc('3', '5', (letter), '5') f1.add_arc('4', '5', (letter), '5') f1.add_arc('5', '5', (letter), ()) f1.add_arc('6', '5', (letter), '5') f1.add_arc('v', '5', (letter), '5') for letter in list6: f1.add_arc('start', '6', (letter), (letter)) f1.add_arc('1', '6', (letter), '6') f1.add_arc('2', '6', (letter), '6') f1.add_arc('3', '6', (letter), '6') f1.add_arc('4', '6', (letter), '6') f1.add_arc('5', '6', (letter), '6') f1.add_arc('6', '6', (letter), ()) f1.add_arc('v', '6', (letter), '6') for letter in listv: f1.add_arc('start', 'v', (letter), (letter)) f1.add_arc('1', 'v', (letter), ()) f1.add_arc('2', 'v', (letter), ()) f1.add_arc('3', 'v', (letter), ()) f1.add_arc('4', 'v', (letter), ()) f1.add_arc('5', 'v', (letter), ()) f1.add_arc('6', 'v', (letter), ()) f1.add_arc('v', 'v', (letter), ()) return f1
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Add all states f1.add_state('0') f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') f1.add_state('7') f1.add_state('2a') f1.add_state('3a') f1.add_state('4a') f1.add_state('5a') f1.add_state('6a') f1.add_state('7a') # Indicate that '0' is the initial state f1.initial_state = '0' # Set all the final states f1.set_final('1') f1.set_final('2') f1.set_final('3') f1.set_final('4') f1.set_final('5') f1.set_final('6') f1.set_final('7') f1.set_final('2a') f1.set_final('3a') f1.set_final('4a') f1.set_final('5a') f1.set_final('6a') f1.set_final('7a') # Define all lists remove_letters = [ 'a', 'e', 'h', 'i', 'o', 'u', 'w', 'y', 'A', 'E', 'H', 'I', 'O', 'U', 'W', 'Y' ] g1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V'] g2 = [ 'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z' ] g3 = ['d', 't', 'D', 'T'] g4 = ['l', 'L'] g5 = ['m', 'n', 'M', 'N'] g6 = ['r', 'R'] # Add the rest of the arcs for letter in string.ascii_letters: # Retain the first character if letter in g1: f1.add_arc('0', '2a', (letter), (letter)) f1.add_arc('2a', '2', (letter), ()) f1.add_arc('3a', '2', (letter), ('1')) f1.add_arc('4a', '2', (letter), ('1')) f1.add_arc('5a', '2', (letter), ('1')) f1.add_arc('6a', '2', (letter), ('1')) f1.add_arc('7a', '2', (letter), ('1')) f1.add_arc('1', '2', (letter), ('1')) f1.add_arc('2', '2', (letter), ()) f1.add_arc('3', '2', (letter), ('1')) f1.add_arc('4', '2', (letter), ('1')) f1.add_arc('5', '2', (letter), ('1')) f1.add_arc('6', '2', (letter), ('1')) f1.add_arc('7', '2', (letter), ('1')) if letter in g2: f1.add_arc('0', '3a', (letter), (letter)) f1.add_arc('3a', '3', (letter), ()) f1.add_arc('2a', '3', (letter), ('2')) f1.add_arc('4a', '3', (letter), ('2')) f1.add_arc('5a', '3', (letter), ('2')) f1.add_arc('6a', '3', (letter), ('2')) f1.add_arc('7a', '3', (letter), ('2')) f1.add_arc('1', '3', (letter), ('2')) f1.add_arc('3', '3', (letter), ()) f1.add_arc('2', '3', (letter), ('2')) f1.add_arc('4', '3', (letter), ('2')) f1.add_arc('5', '3', (letter), ('2')) f1.add_arc('6', '3', (letter), ('2')) f1.add_arc('7', '3', (letter), ('2')) if letter in g3: f1.add_arc('0', '4a', (letter), (letter)) f1.add_arc('4a', '4', (letter), ()) f1.add_arc('2a', '4', (letter), ('3')) f1.add_arc('3a', '4', (letter), ('3')) f1.add_arc('5a', '4', (letter), ('3')) f1.add_arc('6a', '4', (letter), ('3')) f1.add_arc('7a', '4', (letter), ('3')) f1.add_arc('1', '4', (letter), ('3')) f1.add_arc('4', '4', (letter), ()) f1.add_arc('2', '4', (letter), ('3')) f1.add_arc('3', '4', (letter), ('3')) f1.add_arc('5', '4', (letter), ('3')) f1.add_arc('6', '4', (letter), ('3')) f1.add_arc('7', '4', (letter), ('3')) if letter in g4: f1.add_arc('0', '5a', (letter), (letter)) f1.add_arc('5a', '5', (letter), ()) f1.add_arc('2a', '5', (letter), ('4')) f1.add_arc('3a', '5', (letter), ('4')) f1.add_arc('4a', '5', (letter), ('4')) f1.add_arc('6a', '5', (letter), ('4')) f1.add_arc('7a', '5', (letter), ('4')) f1.add_arc('1', '5', (letter), ('4')) f1.add_arc('5', '5', (letter), ()) f1.add_arc('2', '5', (letter), ('4')) f1.add_arc('3', '5', (letter), ('4')) f1.add_arc('4', '5', (letter), ('4')) f1.add_arc('6', '5', (letter), ('4')) f1.add_arc('7', '5', (letter), ('4')) if letter in g5: f1.add_arc('0', '6a', (letter), (letter)) f1.add_arc('6a', '6', (letter), ()) f1.add_arc('2a', '6', (letter), ('5')) f1.add_arc('3a', '6', (letter), ('5')) f1.add_arc('4a', '6', (letter), ('5')) f1.add_arc('5a', '6', (letter), ('5')) f1.add_arc('7a', '6', (letter), ('5')) f1.add_arc('1', '6', (letter), ('5')) f1.add_arc('6', '6', (letter), ()) f1.add_arc('2', '6', (letter), ('5')) f1.add_arc('3', '6', (letter), ('5')) f1.add_arc('4', '6', (letter), ('5')) f1.add_arc('5', '6', (letter), ('5')) f1.add_arc('7', '6', (letter), ('5')) if letter in g6: f1.add_arc('0', '7a', (letter), (letter)) f1.add_arc('7a', '7', (letter), ()) f1.add_arc('2a', '7', (letter), ('6')) f1.add_arc('3a', '7', (letter), ('6')) f1.add_arc('4a', '7', (letter), ('6')) f1.add_arc('5a', '7', (letter), ('6')) f1.add_arc('6a', '7', (letter), ('6')) f1.add_arc('1', '7', (letter), ('6')) f1.add_arc('7', '7', (letter), ()) f1.add_arc('2', '7', (letter), ('6')) f1.add_arc('3', '7', (letter), ('6')) f1.add_arc('4', '7', (letter), ('6')) f1.add_arc('5', '7', (letter), ('6')) f1.add_arc('6', '7', (letter), ('6')) # Remove letters if letter in remove_letters: f1.add_arc('0', '1', (letter), (letter)) f1.add_arc('1', '1', (letter), ()) f1.add_arc('2a', '1', (letter), ()) f1.add_arc('3a', '1', (letter), ()) f1.add_arc('4a', '1', (letter), ()) f1.add_arc('5a', '1', (letter), ()) f1.add_arc('6a', '1', (letter), ()) f1.add_arc('7a', '1', (letter), ()) f1.add_arc('2', '1', (letter), ()) f1.add_arc('3', '1', (letter), ()) f1.add_arc('4', '1', (letter), ()) f1.add_arc('5', '1', (letter), ()) f1.add_arc('6', '1', (letter), ()) f1.add_arc('7', '1', (letter), ()) return f1
def french_count(): f = FST('french') f.add_state('1') f.add_state('2') f.add_state('3') f.add_state('4') f.add_state('5') f.add_state('6') f.add_state('7') f.add_state('8') f.add_state('9') f.add_state('10') f.initial_state = '1' f.set_final('4') #hundred's place for i in range(1): f.add_arc('1','2',[str(i)],()) for i in range(1,2): f.add_arc('1','9',[str(i)],[kFRENCH_TRANS[100]]) for i in range(2,10): f.add_arc('1','9',[str(i)],[kFRENCH_TRANS[i]]+[kFRENCH_TRANS[100]]) #ten's place when hundred's place was 0 for i in range (0,10): if i==0: f.add_arc('2','3',[str(i)],()) if i==1: f.add_arc('2','5',[str(i)],()) if i>1 and i<7: f.add_arc('2','6',[str(i)],[kFRENCH_TRANS[i*10]]) if i==7: f.add_arc('2','7',[str(i)],[kFRENCH_TRANS[60]]) if i==8: f.add_arc('2','8',[str(i)],[kFRENCH_TRANS[4]]+ [kFRENCH_TRANS[20]]) if i==9: f.add_arc('2','5',[str(i)],[kFRENCH_TRANS[4]]+ [kFRENCH_TRANS[20]]) #ten's place when hundred's place was 1-9 for i in range (0,10): if i==0: f.add_arc('9','10',[str(i)],()) if i==1: f.add_arc('9','5',[str(i)],()) if i>1 and i<7: f.add_arc('9','6',[str(i)],[kFRENCH_TRANS[i*10]]) if i==7: f.add_arc('9','7',[str(i)],[kFRENCH_TRANS[60]]) if i==8: f.add_arc('9','8',[str(i)],[kFRENCH_TRANS[4]]+ [kFRENCH_TRANS[20]]) if i==9: f.add_arc('9','5',[str(i)],[kFRENCH_TRANS[4]]+ [kFRENCH_TRANS[20]]) #one's place #state 10-->4 for ii in xrange(0,10): if ii==0: f.add_arc('10', '4', [str(ii)], ()) else: f.add_arc('10', '4', [str(ii)], [kFRENCH_TRANS[ii]]) #state 3-->4 for ii in xrange(0,10): f.add_arc('3', '4', [str(ii)], [kFRENCH_TRANS[ii]]) #state 5-->4 for i in range(0,10): if i<7: f.add_arc('5','4',[str(i)],[kFRENCH_TRANS[10+i]]) else: f.add_arc('5','4',[str(i)],[kFRENCH_TRANS[10]]+[kFRENCH_TRANS[i]]) #state 6-->4 for i in range(0,10): if i==0: f.add_arc('6','4',[str(i)],()) if i==1: f.add_arc('6','4',[str(i)],[kFRENCH_AND]+[kFRENCH_TRANS[i]]) if i>1: f.add_arc('6','4',[str(i)],[kFRENCH_TRANS[i]]) #state 7-->4 for i in range(0,10): if i==1: f.add_arc('7','4',[str(i)],[kFRENCH_AND]+[kFRENCH_TRANS[10+i]]) elif i>1 and i<7: f.add_arc('7','4',[str(i)],[kFRENCH_TRANS[10+i]]) elif i==0: f.add_arc('7','4',[str(i)],[kFRENCH_TRANS[10+i]]) else: f.add_arc('7','4',[str(i)],[kFRENCH_TRANS[10]]+[kFRENCH_TRANS[i]]) #state 8-->4 for ii in xrange(0,10): if ii==0: f.add_arc('8', '4', [str(ii)], ()) else: f.add_arc('8', '4', [str(ii)], [kFRENCH_TRANS[ii]]) return f
def french_count(): f = FST('french') f.add_state('start') # one number and two trailing unknowns f.add_state('n**') # exception from state n** f.add_state('n**+') # two numbers and one trailing unknown f.add_state('nn*') # zero and two uknown digits trailing and so on f.add_state('0**') f.add_state('00*') f.add_state('00n') f.add_state('0n*') f.add_state('0n*+') f.add_state('0nn') f.add_state('n00') f.add_state('nnn') f.add_state('nnn*') f.add_state('*et*') # vegasimal counting for 7 in ((0/n)n*) f.add_state('0n*Vega7+') f.add_state('0n*Vega7') f.add_state('0nnVega7') # vegasimal counting for 8 in ((0/n)n*) f.add_state('0n*Vega8') f.add_state('0n*Vega8+') f.add_state('0nnVega8') # vegasimal counting for 9 in ((0/n)n*) f.add_state('0n*Vega9') f.add_state('0n*Vega9+') f.add_state('0n*Vega9++') f.add_state('0nnVega9') # set final states f.set_final('00n') f.set_final('0nn') f.set_final('nnn') f.set_final('n00') f.set_final('0nnVega7') f.set_final('0nnVega8') f.set_final('0nnVega9') # initial state f.initial_state = 'start' # remove initial zeroes f.add_arc('start', '0**', '0', ()) f.add_arc('0**', '00*', '0', ()) for ii in xrange(10): #from '0n*Vega8' to '0nnVega8 if ii != 0: f.add_arc('0n*Vega8+', '0nnVega8', str(ii), [kFRENCH_TRANS[ii]]) elif ii == 0: f.add_arc('0n*Vega8+', '0nnVega8', str(ii), ()) #from '0n*Vega7' to '0nnVega7' 7-9 if ii == 0 or ii == 7 or ii ==8 or ii == 9: f.add_arc('0n*Vega7', '0n*Vega7+', (), [kFRENCH_TRANS[10]]) f.add_arc('0n*Vega7+', '0n*Vega7+', str(ii), [kFRENCH_TRANS[ii]]) # f.add_arc('0n*Vega9+', '0n*Vega9++', (), [kFRENCH_TRANS[10]]) f.add_arc('0n*Vega9++', '0nnVega9', str(ii), [kFRENCH_TRANS[ii]]) if ii == 0: f.add_arc('0n*Vega7+', '0nnVega7', '0', ()) f.add_arc('0n*Vega9++', '0nnVega9', '0', ()) elif ii == 7 or ii == 8 or ii == 9: f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii]]) #from '0n*Vega' to '0nnVega' 2-6 if ii == 2 or ii == 3 or ii ==4 or ii == 5 or ii == 6: f.add_arc('0n*Vega7', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]]) f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]]) if ii == 1: f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[10]]) f.add_arc('n**','0n*', str(ii), [kFRENCH_TRANS[10]]) f.add_arc('0n*Vega7', '0n*Vega7+', str(ii), [kFRENCH_AND]) f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]]) f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]]) #from '00*' to '00n' f.add_arc('00*', '00n', str(ii), [kFRENCH_TRANS[ii]]) #from '*n*' to '*nn' 2-9 if ii != 0 and ii !=9: f.add_arc('0n*','0nn', str(ii+1), [kFRENCH_TRANS[ii+1]]) f.add_arc('0n*+','0nn', str(ii), [kFRENCH_TRANS[ii]]) #from 'start' to 'nnn' 200,300,...,900 if ii != 0 and ii !=1: f.add_arc('start','n**+', str(ii), [kFRENCH_TRANS[ii]]) f.add_arc('n**+', 'n**', (), [kFRENCH_TRANS[100]]) #from 'n**' to 'n0*' 0 if ii == 0: f.add_arc('n**', 'n00', '00', ()) if ii == 1: f.add_arc('start', 'n**', '1', [kFRENCH_TRANS[100]]) #from '*n*' to '*et*' 1 f.add_arc('0n*','*et*', '1', [kFRENCH_AND]) #from '*et*' to '*nn' 1 f.add_arc('*et*','0nn', (), [kFRENCH_TRANS[1]]) #from '0**' to '*nn' 10-16 for ii in xrange(10,17): f.add_arc('0**','0nn', str(ii), [kFRENCH_TRANS[ii]]) f.add_arc('n**','0nn', str(ii), [kFRENCH_TRANS[ii]]) #from '0**' to '*nn' 20-60 for ii in xrange(2,7): f.add_arc('0**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]]) f.add_arc('n**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]]) #from '0**', to *n* f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[ii*10]]) #from 'n**' to '0n*' f.add_arc('n**', '0n*+', str(ii), [kFRENCH_TRANS[ii*10]]) for ii in xrange(7,10): if ii == 7: f.add_arc('0**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]]) f.add_arc('n**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]]) elif ii == 8: f.add_arc('0**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]]) f.add_arc('n**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]]) f.add_arc('0n*Vega8', '0n*Vega8+', (), [kFRENCH_TRANS[20]]) elif ii == 9: f.add_arc('0**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]]) f.add_arc('n**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]]) f.add_arc('0n*Vega9', '0n*Vega9+', (), [kFRENCH_TRANS[20]]) f.add_arc('n**', '0n*+', '0', ()) return f
def french_count(): f = FST('french') f.add_state('start') #states f.add_state('dig1zero') f.add_state('dig2zero') f.add_state('f_dig3') f.add_state('dig2_one') f.add_state('state5') f.add_state('state6') f.add_state('dig2_two') f.add_state('dig2_three') f.add_state('dig2_four') f.add_state('dig2_five') f.add_state('dig2_six') f.add_state('dig2_sev') f.add_state('dig2_eig') f.add_state('dig2_nine') f.add_state('dig1_nzero') f.add_state('sec_last') f.add_state('last') f.add_state('p1') f.add_state('p2') f.add_state('p3') f.initial_state = 'start' f.set_final('start') f.set_final('f_dig3') f.set_final('state5') f.set_final('state6') f.set_final('dig2_two') f.set_final('dig2_three') f.set_final('dig2_four') f.set_final('dig2_five') f.set_final('dig2_six') f.set_final('dig2_sev') f.set_final('dig2_eig') f.set_final('dig2_nine') f.set_final('last') f.set_final('p1') f.set_final('p2') f.set_final('p3') f.set_final('sec_last') # case for 09X f.add_arc('dig1zero', 'dig2_nine', '9', ()) f.add_arc('dig2_nine', 'dig2_nine', '0', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[10]]) for k in range(1, 7): f.add_arc('dig2_nine', 'dig2_nine', str(k), [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[k + 10]]) for k in range(7, 10): f.add_arc('dig2_nine', 'dig2_nine', str(k), [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[k]]) # case for 08X f.add_arc('dig1zero', 'dig2_eig', '8', ()) f.add_arc('dig2_eig', 'dig2_eig', '0', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]]) for j in range(1, 10): f.add_arc('dig2_eig', 'dig2_eig', str(j), [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[j]]) # case for 07X f.add_arc('dig1zero', 'dig2_sev', '7', ()) f.add_arc('dig2_sev', 'dig2_sev', '0', [kFRENCH_TRANS[60]] + [kFRENCH_TRANS[10]]) f.add_arc('dig2_sev', 'dig2_sev', '1', [kFRENCH_TRANS[60]] + [kFRENCH_AND] + [kFRENCH_TRANS[11]]) for k in range(2, 7): f.add_arc('dig2_sev', 'dig2_sev', str(k), [kFRENCH_TRANS[60]] + [kFRENCH_TRANS[k + 10]]) for k in range(7, 10): f.add_arc('dig2_sev', 'dig2_sev', str(k), [kFRENCH_TRANS[60]] + [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[k]]) #00X case in french f.add_arc('start', 'dig1zero', '0', ()) f.add_arc('dig1zero', 'dig2zero', '0', ()) for ii in range(10): f.add_arc('dig2zero', 'f_dig3', [str(ii)], [kFRENCH_TRANS[ii]]) #case for 02X f.add_arc('dig1zero', 'dig2_two', '2', ()) f.add_arc('dig2_two', 'dig2_two', '0', [kFRENCH_TRANS[20]]) f.add_arc('dig2_two', 'dig2_two', '1', [kFRENCH_TRANS[20]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]]) for j in range(2, 10): f.add_arc('dig2_two', 'dig2_two', str(j), [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[j]]) #01X case f.add_arc('dig1zero', 'dig2_one', '1', ()) for j in range(7): f.add_arc('dig2_one', 'state5', [str(j)], [kFRENCH_TRANS[j + 10]]) for j in range(7, 10): f.add_arc('dig2_one', 'state6', [str(j)], [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[j]]) # case for 04X f.add_arc('dig1zero', 'dig2_four', '4', ()) f.add_arc('dig2_four', 'dig2_four', '0', [kFRENCH_TRANS[40]]) f.add_arc('dig2_four', 'dig2_four', '1', [kFRENCH_TRANS[40]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]]) for j in range(2, 10): f.add_arc('dig2_four', 'dig2_four', str(j), [kFRENCH_TRANS[40]] + [kFRENCH_TRANS[j]]) # case for 03X f.add_arc('dig1zero', 'dig2_three', '3', ()) f.add_arc('dig2_three', 'dig2_three', '0', [kFRENCH_TRANS[30]]) f.add_arc('dig2_three', 'dig2_three', '1', [kFRENCH_TRANS[30]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]]) for j in range(2, 10): f.add_arc('dig2_three', 'dig2_three', str(j), [kFRENCH_TRANS[30]] + [kFRENCH_TRANS[j]]) # case for 05X f.add_arc('dig1zero', 'dig2_five', '5', ()) f.add_arc('dig2_five', 'dig2_five', '0', [kFRENCH_TRANS[50]]) f.add_arc('dig2_five', 'dig2_five', '1', [kFRENCH_TRANS[50]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]]) for k in range(2, 10): f.add_arc('dig2_five', 'dig2_five', str(k), [kFRENCH_TRANS[50]] + [kFRENCH_TRANS[k]]) # case for 06X f.add_arc('dig1zero', 'dig2_six', '6', ()) f.add_arc('dig2_six', 'dig2_six', '0', [kFRENCH_TRANS[60]]) f.add_arc('dig2_six', 'dig2_six', '1', [kFRENCH_TRANS[60]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]]) for k in range(2, 10): f.add_arc('dig2_six', 'dig2_six', str(k), [kFRENCH_TRANS[60]] + [kFRENCH_TRANS[k]]) f.add_arc('start', 'dig1_nzero', '1', [kFRENCH_TRANS[100]]) for j in range(2, 10): f.add_arc('start', 'dig1_nzero', str(j), [kFRENCH_TRANS[j]] + [kFRENCH_TRANS[100]]) for i in range(1, 10): f.add_arc('sec_last', 'sec_last', str(i), [kFRENCH_TRANS[i]]) f.add_arc('dig1_nzero', 'dig2_six', '6', ()) f.add_arc('dig1_nzero', 'dig2_sev', '7', ()) f.add_arc('dig1_nzero', 'dig2_eig', '8', ()) f.add_arc('dig1_nzero', 'dig2_nine', '9', ()) f.add_arc('dig1_nzero', 'sec_last', '0', ()) f.add_arc('dig1_nzero', 'dig2_one', '1', ()) f.add_arc('dig1_nzero', 'dig2_two', '2', ()) f.add_arc('dig1_nzero', 'dig2_three', '3', ()) f.add_arc('dig1_nzero', 'dig2_four', '4', ()) f.add_arc('dig1_nzero', 'dig2_five', '5', ()) f.add_arc('sec_last', 'last', '0', ()) return f