def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('start') f3.add_state('0') f3.add_state('1') f3.add_state('2') f3.add_state('2a') f3.add_state('3') f3.initial_state = 'start' f3.set_final('3') for letter in string.letters: f3.add_arc('start', '0', (letter), (letter)) for number in xrange(10): f3.add_arc('start', '1', (str(number)), (str(number))) f3.add_arc('0', '1', (str(number)), (str(number))) f3.add_arc('1', '2', (str(number)), (str(number))) f3.add_arc('1', '2a', (), ('0')) f3.add_arc('2', '3', (str(number)), (str(number))) f3.add_arc('2', '3', (), ('0')) f3.add_arc('2a', '3', (), ('0')) return f3
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.add_state('2') f2.add_state('3') f2.add_state('4') f2.initial_state = '1' f2.set_final('1') f2.set_final('2') f2.set_final('3') f2.set_final('4') # Adds letters from input string of 'A###0000' for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) # Adds numbers from first FST of range 0-9 for n in range(10): f2.add_arc('1', '2', str(n), (str(n))) f2.add_arc('2', '3', str(n), (str(n))) f2.add_arc('3', '4', str(n), (str(n))) f2.add_arc('4', '4', str(n), ()) return f2
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ start_state = 'start' letter_first = 'letter_first' number_first = 'number_first' numbers = list('0123456789') # Initialization f2 = FST('soundex-truncate') f2.add_state(start_state) f2.add_state(letter_first) f2.add_state(number_first) f2.set_final(number_first) #Don't think this would ever occur, but tests want it f2.initial_state = start_state for letter in string.ascii_letters: f2.add_arc(start_state, letter_first, letter, letter) for number in numbers: f2.add_arc(start_state, number_first, str(number), str(number)) get_letter_number(f2, letter_first, numbers) get_number_letter(f2, number_first, numbers) return f2
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states states = ['1', 'd1', 'd2', 'd3'] for state in states: f2.add_state(state) f2.initial_state = '1' for state in ['d1', 'd2', 'd3']: f2.set_final(state) # Add the arcs for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) for index, state in enumerate(states): if index > 0: for n in range(10): f2.add_arc(states[index - 1], states[index], str(n), str(n)) for n in range(10): f2.add_arc('d3', 'd3', str(n), ()) return f2
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.add_state('2') f2.add_state('3') f2.add_state('4') f2.initial_state = '1' f2.set_final('1') f2.set_final('2') f2.set_final('3') f2.set_final('4') for letter in list(string.ascii_letters): f2.add_arc('1', '1', letter, letter) f2.add_arc('2', '1', letter, letter) f2.add_arc('3', '1', letter, letter) f2.add_arc('4', '1', letter, letter) for digit in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']: f2.add_arc('1', '2', digit, digit) f2.add_arc('2', '3', digit, digit) f2.add_arc('3', '4', digit, digit) f2.add_arc('4', '4', digit, '') return f2
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('s0') f3.add_state('s1') f3.add_state('s2') f3.add_state('s3') f3.add_state('s4') f3.add_state('s5') f3.add_state('s6') f3.add_state('s7') f3.add_state('s8') f3.initial_state = 's0' f3.set_final('s4') f3.set_final('s7') for letter in string.letters: f3.add_arc('s0', 's1', (letter), (letter)) for digit in string.digits: f3.add_arc('s1', 's5', (digit), (digit)) f3.add_arc('s5', 's6', (digit), (digit)) f3.add_arc('s6', 's7', (digit), (digit)) f3.add_arc('s0', 's8', (digit), (digit)) f3.add_arc('s1', 's2', (), ('0')) f3.add_arc('s2', 's3', (), ('0')) f3.add_arc('s3', 's4', (), ('0')) f3.add_arc('s5', 's3', (), ('0')) f3.add_arc('s6', 's4', (), ('0')) f3.add_arc('s8', 's3', (), ('0')) return f3
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.add_state('2') f2.add_state('3') f2.add_state('4') f2.initial_state = '1' f2.set_final('4') f2.set_final('3') f2.set_final('2') f2.set_final('1') # Add the arcs for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) for n in range(10): f2.add_arc('1', '2', (str(n)), (str(n))) f2.add_arc('2', '3', (str(n)), (str(n))) f2.add_arc('3', '4', (str(n)), (str(n))) f2.add_arc('4', '4', (str(n)), ()) return f2
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states states = ['1', 'd1', 'd2', 'd3'] for state in states: f2.add_state(state) f2.initial_state = '1' for state in ['d1', 'd2', 'd3']: f2.set_final(state) # Add the arcs for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) for index, state in enumerate(states): if index > 0: for n in range(10): f2.add_arc(states[index-1], states[index], str(n), str(n)) for n in range(10): f2.add_arc('d3', 'd3', str(n), ()) return f2
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('1') f3.add_state('2') f3.add_state('3') f3.add_state('4') f3.initial_state = '1' f3.set_final('4') for letter in string.letters: f3.add_arc('1', '1', letter, letter) for number in xrange(10): f3.add_arc('1', '2', str(number), str(number)) f3.add_arc('2', '3', str(number), str(number)) f3.add_arc('3', '4', str(number), str(number)) for n in range(10): f3.add_arc('1', '4', (), '000') f3.add_arc('2', '4', (), '00') f3.add_arc('3', '4', (), '0') return f3
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.initial_state = '1' for i in range(2, 6): f2.add_state(str(i)) for i in range(2, 6): f2.set_final(str(i)) for letter in string.letters: f2.add_arc('1', '2', letter, letter) f2.add_arc('2', '2', letter, letter) for letter in ['1', '2', '3', '4', '5', '6']: f2.add_arc('1', '3', letter, letter) f2.add_arc('2', '3', letter, letter) f2.add_arc('3', '4', letter, letter) f2.add_arc('4', '5', letter, letter) f2.add_arc('5', '5', letter, '') return f2
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that 'start' is the initial state f1.add_state('start') f1.add_state('0') f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') f1.initial_state = 'start' # Set all the final states f1.set_final('0') f1.set_final('1') f1.set_final('2') f1.set_final('3') f1.set_final('4') f1.set_final('5') f1.set_final('6') replace_letters = { '0': 'aehiouwy', '1': 'bfpv', '2': 'cgjkqsxz', '3': 'dt', '4': 'l', '5': 'mn', '6': 'r' } # retaining the first letter # removing letters and replacing letters with numbers. for state, in_strs in replace_letters.items(): for in_str in in_strs: f1.add_arc('start', state, in_str, in_str) f1.add_arc('start', state, in_str.upper(), in_str.upper()) f1.add_arc(state, state, in_str, '') for state_supplementary in replace_letters: if not state_supplementary.startswith(state): for str_out_state in replace_letters[state_supplementary]: state_supplementary_out = '' if state_supplementary.startswith( '0') else state_supplementary f1.add_arc(state, state_supplementary, str_out_state, state_supplementary_out) return f1
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('1') f3.add_state('2') f3.initial_state = '1' f3.set_final('2') return f3
def french_count(): f = FST("french") f.add_state("start") f.initial_state = "start" for ii in xrange(10): f.add_arc("start", "start", str(ii), [kFRENCH_TRANS[ii]]) f.set_final("start") return f
def french_count(): f = FST('french') f.add_state('start') f.initial_state = 'start' for ii in xrange(10): f.add_arc('start', 'start', [str(ii)], [kFRENCH_TRANS[ii]]) f.set_final('start') return f
def french_count(): f = FST('french') f.add_state('start') f.initial_state = 'start' for ii in xrange(10): f.add_arc('start', 'start', str(ii), [kFRENCH_TRANS[ii]]) f.set_final('start') return f
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state states = ['q1', 'q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6'] for state in states: f1.add_state(state) f1.initial_state = 'q1' # Set all the final states for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']: f1.set_final(state) # Add the rest of the arcs for letter in string.ascii_lowercase: f1.add_arc('q1', 'q2', (letter), (letter)) if letter in set('aehiouwy'): for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']: f1.add_arc(state, state, (letter), ()) else: if letter in set('bfpv'): for state in ['q2', 'n2', 'n3', 'n4', 'n5', 'n6']: f1.add_arc(state, 'n1', (letter), ('1')) f1.add_arc('n1', 'n1', (letter), ()) elif letter in set('cgjkqsxz'): for state in ['q2', 'n1', 'n3', 'n4', 'n5', 'n6']: f1.add_arc(state, 'n2', (letter), ('2')) f1.add_arc('n2', 'n2', (letter), ()) elif letter in set('dt'): for state in ['q2', 'n1', 'n2', 'n4', 'n5', 'n6']: f1.add_arc(state, 'n3', (letter), ('3')) f1.add_arc('n3', 'n3', (letter), ()) elif letter in set('l'): for state in ['q2', 'n1', 'n2', 'n3', 'n5', 'n6']: f1.add_arc(state, 'n4', (letter), ('4')) f1.add_arc('n4', 'n4', (letter), ()) elif letter in set('mn'): for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n6']: f1.add_arc(state, 'n5', (letter), ('5')) f1.add_arc('n5', 'n5', (letter), ()) elif letter in set('r'): for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5']: f1.add_arc(state, 'n6', (letter), ('6')) f1.add_arc('n6', 'n6', (letter), ()) return f1
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('1') f3.add_state('2') f3.add_state('3') f3.add_state('4') f3.add_state('5') f3.add_state('6') f3.add_state('7') f3.add_state('8') f3.initial_state = '1' f3.set_final('5') f3.set_final('8') for letter in string.ascii_letters: f3.add_arc('1', '2', letter, letter) for num in string.digits: f3.add_arc('1', '3', num, num) f3.add_arc('2', '3', num, num) f3.add_arc('3', '4', num, num) f3.add_arc('4', '5', num, num) f3.add_arc('2', '6', '', '0') f3.add_arc('3', '7', '', '0') f3.add_arc('4', '8', '', '0') f3.add_arc('6', '7', '', '0') f3.add_arc('7', '8', '', '0') return f3
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('start') f3.initial_state = 'start' for x in range(4): f3.add_state(str(x)) f3.set_final(str(3)) # Add the arcs f3.add_arc(str(0),str(1),(''),('0')) f3.add_arc('start','1',(''),('0')) f3.add_arc(str(1),str(2),(''),('0')) f3.add_arc(str(2),str(3),(''),('0')) for letter in string.letters: f3.add_arc('start', '0', (letter), (letter)) for n in range(10): f3.add_arc('start','1',(str(n)),(str(n))) for x in range(3): for n in range(10): f3.add_arc(str(x), str(x+1), (str(n)), (str(n))) for n in range(10): f3.add_arc(str(3),str(3),(str(n)),()) ''' f3.add_state('1') f3.add_state('1a') f3.add_state('1b') f3.add_state('2') f3.initial_state = '1' f3.set_final('2') for letter in string.letters: f3.add_arc('1', '1', (letter), (letter)) for number in xrange(10): f3.add_arc('1', '1', (str(number)), (str(number))) f3.add_arc('1', '1a', (), ('0')) f3.add_arc('1a', '1b', (), ('0')) f3.add_arc('1b', '2', (), ('0')) ''' return f3
def french_count(): f = FST('french') f.add_state('start'); f.add_state('hundred'); f.add_state('unique'); f.add_state('sen'); f.initial_state = 'start' for number in xrange(1001): if number in hundredlist: f.add_arc('start', 'hundred', [str(number)], [hundredlist[number]]) elif number == 0: f.add_arc('start','start',[str(number)],[kFRENCH_TRANS[0]]) f.add_arc('unique','unique',[str(number)],[]) f.add_arc('hundred', 'hundred', [str(number)], []) elif number in uniquenumber: f.add_arc('start', 'unique', [str(number)], [kFRENCH_TRANS[number]]) f.add_arc('hundred', 'unique', [str(number)], [kFRENCH_TRANS[number]]) f.add_arc('unique', 'unique', [str(number)], [kFRENCH_TRANS[number]]) elif number in seveneightynine: f.add_arc('start', 'sen', [str(number)], [seveneightynine[number]]) f.add_arc('hundred', 'sen', [str(number)], [seveneightynine[number]]) f.set_final('hundred') f.set_final('unique') f.set_final('sen') return f
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('initial') f2.add_state('firstDigit') f2.add_state('secondDigit') f2.add_state('thirdDigit') f2.initial_state = 'initial' f2.set_final('initial') f2.set_final('firstDigit') f2.set_final('secondDigit') f2.set_final('thirdDigit') source = ['initial', 'firstDigit', 'secondDigit', 'thirdDigit'] destination = ['firstDigit', 'secondDigit', 'thirdDigit', 'thirdDigit'] # Add the arcs for letter in string.letters: f2.add_arc('initial', 'initial', (letter), (letter)) for cur, next in zip(source, destination): f2 = addTruncateStates(cur, next, f2) return f2
def truncate_to_three_digits(): """ A FST that will truncate a soundex string to three digits """ f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('start') f2.add_state('d1') f2.add_state('d2') f2.add_state('d3') f2.add_state('end') f2.initial_state = 'start' f2.set_final('end') # Add the arcs for letter in string.letters: f2.add_arc('start', 'd1', (letter), (letter)) f2.add_arc('d1', 'end', (), ()) f2.add_arc('d2', 'end', (), ()) f2.add_arc('d3', 'end', (), ()) for n in range(10): f2.add_arc('d1', 'd2', (str(n)), (str(n))) f2.add_arc('d2', 'd3', (str(n)), (str(n))) f2.add_arc('d3', 'end', (str(n)), (str(n))) f2.add_arc('end', 'end', (str(n)), ()) digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] for digit in digits: f2.add_arc('start', 'd2', (digit), (digit)) return f2
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.initial_state = '1' f2.set_final('1') return f2
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('1') f1.initial_state = '1' for i in range(2, 10): f1.add_state(str(i)) for letter in string.letters: f1.add_arc('1', '2', letter, letter) for letter in ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y']: for i in range(2, 10): f1.add_arc(str(i), '3', letter, '') for letter in ['b', 'f', 'p', 'v']: for i in range(2, 10): if str(i) == '4': f1.add_arc(str(i), '4', letter, '') else: f1.add_arc(str(i), '4', letter, '1') for letter in ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']: for i in range(2, 10): if str(i) == '5': f1.add_arc(str(i), '5', letter, '') else: f1.add_arc(str(i), '5', letter, '2') for letter in ['d', 't']: for i in range(2, 10): if str(i) == '6': f1.add_arc(str(i), '6', letter, '') else: f1.add_arc(str(i), '6', letter, '3') for letter in ['l']: for i in range(2, 10): if str(i) == '7': f1.add_arc(str(i), '7', letter, '') else: f1.add_arc(str(i), '7', letter, '4') for letter in ['m', 'n']: for i in range(2, 10): if str(i) == '8': f1.add_arc(str(i), '8', letter, '') else: f1.add_arc(str(i), '8', letter, '5') for letter in ['r']: for i in range(2, 10): if str(i) == '9': f1.add_arc(str(i), '9', letter, '') else: f1.add_arc(str(i), '9', letter, '6') # Set all the final states for i in range(2, 10): f1.set_final(str(i)) return f1
def generate(self, analysis): """Generate the morphologically correct word e.g. p = Parser() analysis = ['p','a','n','i','c','+past form'] p.generate(analysis) ---> 'panicked' """ start_state = 'start' f = FST('generator') f.add_state(start_state) f.initial_state = start_state self._build_generator_fst(f, analysis, start_state) return ''.join(f.transduce(analysis)[0])
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') num_maps = ["aehiouwy", "bfpv", "cgjkqsxz", "dt", "l", "mn", "r"] for state in '12345678': f1.add_state(state) f1.initial_state = '1' # Set all the final states for letter in '2345678': f1.set_final(letter) # Add the arcs for step 1 for letter in string.ascii_letters: f1.add_arc('1', '2', (letter), (letter)) # add the arcs for step 2 and step 3 states = "2345678" value = '0' index = 0 # for each sequence in ["aehiouwy","bfpv","cgjkqsxz","dt","l","mn","r"] for key in num_maps: # for each letter in "aehiouwy" for letter in key: # for each state node in "2345678" for state in states: # add arcs from 2->3, 2->4 with letter,value if (state != states[index]): # if dest node state is '2', arc should be letter, empty if (value == '0'): f1.add_arc(state, states[index], (letter), ()) else: f1.add_arc(state, states[index], (letter), (value)) # add arc to itself 2->2 with letter, empty else: f1.add_arc(state, state, (letter), ()) index += 1 value = chr(ord(value) + 1) return f1
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') return f1
def generate(self, analysis): """Generate the morphologically correct word e.g. p = Parser() analysis = ['p','a','n','i','c','+past form'] p.generate(analysis) ---> 'panicked' """ # Let's define our first FST f1 = FST('morphology-generate') # Indicate initial and final states f1.add_state('start') f1.add_state('vowel') f1.add_state('consonant') f1.add_state('c') f1.add_state('form_1') f1.add_state('form_2') f1.initial_state = 'start' f1.set_final('form_1') f1.set_final('form_2') # Generate vowels = 'aeiou' for vowel in vowels: f1.add_arc('start', 'vowel', vowel, vowel) f1.add_arc('vowel', 'vowel', vowel, vowel) f1.add_arc('consonant', 'vowel', vowel, vowel) f1.add_arc('c', 'vowel', vowel, vowel) for letter in string.ascii_lowercase: if letter in vowels: continue if not letter == 'c': f1.add_arc('vowel', 'consonant', letter, letter) f1.add_arc('start', 'consonant', letter, letter) f1.add_arc('consonant', 'consonant', letter, letter) f1.add_arc('c', 'consonant', letter, letter) f1.add_arc('vowel', 'c', 'c', 'c') f1.add_arc('c', 'form_1', '+past form', 'ked') f1.add_arc('c', 'form_1', '+present participle form', 'king') f1.add_arc('consonant', 'form_2', '+past form', 'ed') f1.add_arc('consonant', 'form_2', '+present participle form', 'ing') output = f1.transduce(analysis)[0] return "".join(output)
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.initial_state = '1' f2.set_final('1') f2.add_state('2L') f2.set_final('2L') f2.add_state('2D') f2.set_final('2D') f2.add_state('3D') f2.set_final('3D') f2.add_state('4D') f2.set_final('4D') for letter in string.letters: f2.add_arc('1', '2L', (letter), (letter)) f2.add_arc('2L', '2L', (letter), ()) # Add the arcs possible_chars = string.digits + string.letters for digit in string.digits: f2.add_arc('1', '2D', (digit), (digit)) f2.add_arc('2L', '2D', (digit), (digit)) f2.add_arc('2D', '3D', (digit), (digit)) f2.add_arc('3D', '4D', (digit), (digit)) #f2.add_arc('4', '5', (letter), (letter)) f2.add_arc('4D', '4D', (digit), ()) return f2
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in string.ascii_lowercase: f1.add_arc('start', 'next', (letter), (letter)) f1.add_arc('next', 'next', (letter), ('0')) return f1
def parse(self, word): """Parse a word morphologically e.g. p = Parser() word = ['p', 'a', 'n', 'i', 'c', 'k','e','d'] p.parse(word) ---> 'panic+past form' """ lexicon = {'panic', 'havoc', 'sync', 'lick', 'want'} start_state = 'start' k_insertion = 'k_insertion' f = FST('parser') f.add_state(start_state) f.initial_state = start_state #Add paths for each word previous = start_state for vocab in lexicon: for char in vocab: current = vocab + '-' + char #uniquely identify f.add_state(current) f.add_arc(previous, current, char, char) previous = current f.add_state(k_insertion + '-' + vocab) f.add_arc(previous, k_insertion + '-' + vocab, 'k', '') self._add_ending_states(f, vocab, k_insertion + '-' + vocab, k=k_insertion) self._add_ending_states(f, vocab, previous) previous = start_state return ''.join(f.transduce(word)[0])
def generate_control(self): arguments = self.matchers.keys() # this will be a hypercube control = FST() # zero state is for verb control.add_state("0", is_init=True, is_final=False) # inside states for the cube, except the last, accepting state for i in xrange(1, pow(2, len(arguments))): control.add_state(str(i), is_init=False, is_final=False) # last node of the hypercube control.add_state( str(int(pow(2, len(arguments)))), is_init=False, is_final=True) # first transition control.add_transition(KRPosMatcher("VERB"), [ExpandOperator( self.lexicon, self.working_area)], "0", "1") # count every transition as an increase in number of state for path in permutations(arguments): actual_state = 1 for arg in path: increase = pow(2, arguments.index(arg)) new_state = actual_state + increase control.add_transition( self.matchers[arg], [FillArgumentOperator(arg, self.working_area)], str(actual_state), str(new_state)) actual_state = new_state return control
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('1') f3.add_state('1a') f3.add_state('1b') f3.add_state('2') f3.initial_state = '1' # The soundex string will either need no padding in which case its final # state is 1, or it will need 1 to 3 zeros and have final state 2 f3.set_final('1') f3.set_final('2') f3.add_arc('1', '2', (), ('000')) f3.add_arc('1a', '2', (), ('00')) f3.add_arc('1b', '2', (), ('0')) for letter in string.letters: f3.add_arc('1', '1', (letter), (letter)) for number in xrange(10): f3.add_arc('1', '1a', (str(number)), (str(number))) for number in xrange(10): f3.add_arc('1a', '1b', (str(number)), (str(number))) for number in xrange(10): f3.add_arc('1b', '2', (str(number)), (str(number))) return f3
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in string.ascii_lowercase: # f1.add_arc('start', 'next', (letter), (letter)) # f1.add_arc('next', 'next', (letter), ('0')) f1.add_arc('start', 'next', (letter), (letter)) if letter in vowels: f1.add_arc('next', 'next', (letter), ()) elif letter in grp1: f1.add_arc('next', 'next', (letter), ('1')) elif letter in grp2: f1.add_arc('next', 'next', (letter), ('2')) elif letter in grp3: f1.add_arc('next', 'next', (letter), ('3')) elif letter in grp4: f1.add_arc('next', 'next', (letter), ('4')) elif letter in grp5: f1.add_arc('next', 'next', (letter), ('5')) elif letter in grp6: f1.add_arc('next', 'next', (letter), ('6')) else: continue #wtf return f1
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') # Indicate initial and final states f3.add_state('1') f3.add_state('1a') f3.add_state('1b') f3.add_state('2') f3.initial_state = '1' f3.set_final('2') # Add the arcs for letter in string.letters: f3.add_arc('1', '1', (letter), (letter)) for number in string.digits: f3.add_arc('1', '1a', (number), (number)) f3.add_arc('1a', '1b', (number), (number)) f3.add_arc('1b', '2', (number), (number)) f3.add_arc('1', '2', (), ('000')) f3.add_arc('1a', '2', (), ('00')) f3.add_arc('1b', '2', (), ('0')) return f3
def add_zero_padding(): # Now, the third fst - the zero-padding fst f3 = FST('soundex-padzero') f3.add_state('1') f3.add_state('2') f3.add_state('3') f3.add_state('4') f3.add_state('5') f3.add_state('6') f3.initial_state = '1' f3.set_final('6') for letter in list(string.ascii_letters): f3.add_arc('1', '1', letter, letter) for digit in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']: f3.add_arc('1', '2', digit, digit) f3.add_arc('2', '5', digit, digit) f3.add_arc('5', '6', digit, digit) f3.add_arc('1', '3', '', '0') f3.add_arc('3', '4', '', '0') f3.add_arc('4', '6', '', '0') f3.add_arc('2', '4', '', '0') f3.add_arc('5', '6', '', '0') return f3
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in string.ascii_lowercase: f1.add_arc('start', 'next', (letter), (letter)) f1.add_arc('next', 'next', (letter), '0') return f1
def truncate_to_three_digits(): """ Create an FST that will truncate a soundex string to three digits """ # Ok so now let's do the second FST, the one that will truncate # the number of digits to 3 f2 = FST('soundex-truncate') # Indicate initial and final states f2.add_state('1') f2.initial_state = '1' f2.set_final('1') # Add the arcs for letter in string.letters: f2.add_arc('1', '1', (letter), (letter)) for n in range(10): f2.add_arc('1', '1', str(n), str(n)) return f2
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') aeoy = ['a','e','h','i','o','u','w','y'] one = ['b','f','p','v'] two = ['c','g','j','k','q','s','x','z'] three = ['d','t'] four = ['l'] five = ['m','n'] six = ['r'] # Indicate that '1' is the initial state f1.add_state('initial') f1.add_state('0') f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') f1.initial_state = 'initial' # Set all the final states f1.set_final('0') f1.set_final('1') f1.set_final('2') f1.set_final('3') f1.set_final('4') f1.set_final('5') f1.set_final('6') # Add the rest of the arcs for letter in string.ascii_letters: f1.add_arc('initial','0',(letter),(letter)) if letter in aeoy: f1.add_arc('0','0', (letter), ()) f1.add_arc('1','0', (letter), ()) f1.add_arc('2','0', (letter), ()) f1.add_arc('3','0', (letter), ()) f1.add_arc('4','0', (letter), ()) f1.add_arc('5','0', (letter), ()) f1.add_arc('6','0', (letter), ()) else: if letter in one: f1.add_arc('0','1', (letter), '1') f1.add_arc('2','1', (letter), '1') f1.add_arc('3','1', (letter), '1') f1.add_arc('4','1', (letter), '1') f1.add_arc('5','1', (letter), '1') f1.add_arc('6','1', (letter), '1') f1.add_arc('1','0', (letter), ()) if letter in two: f1.add_arc('0','2', (letter), '2') f1.add_arc('1','2', (letter), '2') f1.add_arc('3','2', (letter), '2') f1.add_arc('4','2', (letter), '2') f1.add_arc('5','2', (letter), '2') f1.add_arc('6','2', (letter), '2') f1.add_arc('2','0', (letter), ()) if letter in three: f1.add_arc('0','3', (letter), '3') f1.add_arc('1','3', (letter), '3') f1.add_arc('2','3', (letter), '3') f1.add_arc('4','3', (letter), '3') f1.add_arc('5','3', (letter), '3') f1.add_arc('6','3', (letter), '3') f1.add_arc('3','0', (letter), ()) if letter in four: f1.add_arc('0','4', (letter), '4') f1.add_arc('1','4', (letter), '4') f1.add_arc('2','4', (letter), '4') f1.add_arc('3','4', (letter), '4') f1.add_arc('5','4', (letter), '4') f1.add_arc('6','4', (letter), '4') f1.add_arc('4','0', (letter), ()) if letter in five: f1.add_arc('0','5', (letter), '5') f1.add_arc('1','5', (letter), '5') f1.add_arc('2','5', (letter), '5') f1.add_arc('3','5', (letter), '5') f1.add_arc('4','5', (letter), '5') f1.add_arc('6','5', (letter), '5') f1.add_arc('5','0', (letter), ()) if letter in six: f1.add_arc('0','6', (letter), '6') f1.add_arc('1','6', (letter), '6') f1.add_arc('2','6', (letter), '6') f1.add_arc('3','6', (letter), '6') f1.add_arc('4','6', (letter), '6') f1.add_arc('5','6', (letter), '6') f1.add_arc('6','0', (letter), ()) return f1
def french_count(): f = FST('french') f.add_state('start') # one number and two trailing unknowns f.add_state('n**') # exception from state n** f.add_state('n**+') # two numbers and one trailing unknown f.add_state('nn*') # zero and two uknown digits trailing and so on f.add_state('0**') f.add_state('00*') f.add_state('00n') f.add_state('0n*') f.add_state('0n*+') f.add_state('0nn') f.add_state('n00') f.add_state('nnn') f.add_state('nnn*') f.add_state('*et*') # vegasimal counting for 7 in ((0/n)n*) f.add_state('0n*Vega7+') f.add_state('0n*Vega7') f.add_state('0nnVega7') # vegasimal counting for 8 in ((0/n)n*) f.add_state('0n*Vega8') f.add_state('0n*Vega8+') f.add_state('0nnVega8') # vegasimal counting for 9 in ((0/n)n*) f.add_state('0n*Vega9') f.add_state('0n*Vega9+') f.add_state('0n*Vega9++') f.add_state('0nnVega9') # set final states f.set_final('00n') f.set_final('0nn') f.set_final('nnn') f.set_final('n00') f.set_final('0nnVega7') f.set_final('0nnVega8') f.set_final('0nnVega9') # initial state f.initial_state = 'start' # remove initial zeroes f.add_arc('start', '0**', '0', ()) f.add_arc('0**', '00*', '0', ()) for ii in xrange(10): #from '0n*Vega8' to '0nnVega8 if ii != 0: f.add_arc('0n*Vega8+', '0nnVega8', str(ii), [kFRENCH_TRANS[ii]]) elif ii == 0: f.add_arc('0n*Vega8+', '0nnVega8', str(ii), ()) #from '0n*Vega7' to '0nnVega7' 7-9 if ii == 0 or ii == 7 or ii ==8 or ii == 9: f.add_arc('0n*Vega7', '0n*Vega7+', (), [kFRENCH_TRANS[10]]) f.add_arc('0n*Vega7+', '0n*Vega7+', str(ii), [kFRENCH_TRANS[ii]]) # f.add_arc('0n*Vega9+', '0n*Vega9++', (), [kFRENCH_TRANS[10]]) f.add_arc('0n*Vega9++', '0nnVega9', str(ii), [kFRENCH_TRANS[ii]]) if ii == 0: f.add_arc('0n*Vega7+', '0nnVega7', '0', ()) f.add_arc('0n*Vega9++', '0nnVega9', '0', ()) elif ii == 7 or ii == 8 or ii == 9: f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii]]) #from '0n*Vega' to '0nnVega' 2-6 if ii == 2 or ii == 3 or ii ==4 or ii == 5 or ii == 6: f.add_arc('0n*Vega7', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]]) f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]]) if ii == 1: f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[10]]) f.add_arc('n**','0n*', str(ii), [kFRENCH_TRANS[10]]) f.add_arc('0n*Vega7', '0n*Vega7+', str(ii), [kFRENCH_AND]) f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]]) f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]]) #from '00*' to '00n' f.add_arc('00*', '00n', str(ii), [kFRENCH_TRANS[ii]]) #from '*n*' to '*nn' 2-9 if ii != 0 and ii !=9: f.add_arc('0n*','0nn', str(ii+1), [kFRENCH_TRANS[ii+1]]) f.add_arc('0n*+','0nn', str(ii), [kFRENCH_TRANS[ii]]) #from 'start' to 'nnn' 200,300,...,900 if ii != 0 and ii !=1: f.add_arc('start','n**+', str(ii), [kFRENCH_TRANS[ii]]) f.add_arc('n**+', 'n**', (), [kFRENCH_TRANS[100]]) #from 'n**' to 'n0*' 0 if ii == 0: f.add_arc('n**', 'n00', '00', ()) if ii == 1: f.add_arc('start', 'n**', '1', [kFRENCH_TRANS[100]]) #from '*n*' to '*et*' 1 f.add_arc('0n*','*et*', '1', [kFRENCH_AND]) #from '*et*' to '*nn' 1 f.add_arc('*et*','0nn', (), [kFRENCH_TRANS[1]]) #from '0**' to '*nn' 10-16 for ii in xrange(10,17): f.add_arc('0**','0nn', str(ii), [kFRENCH_TRANS[ii]]) f.add_arc('n**','0nn', str(ii), [kFRENCH_TRANS[ii]]) #from '0**' to '*nn' 20-60 for ii in xrange(2,7): f.add_arc('0**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]]) f.add_arc('n**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]]) #from '0**', to *n* f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[ii*10]]) #from 'n**' to '0n*' f.add_arc('n**', '0n*+', str(ii), [kFRENCH_TRANS[ii*10]]) for ii in xrange(7,10): if ii == 7: f.add_arc('0**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]]) f.add_arc('n**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]]) elif ii == 8: f.add_arc('0**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]]) f.add_arc('n**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]]) f.add_arc('0n*Vega8', '0n*Vega8+', (), [kFRENCH_TRANS[20]]) elif ii == 9: f.add_arc('0**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]]) f.add_arc('n**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]]) f.add_arc('0n*Vega9', '0n*Vega9+', (), [kFRENCH_TRANS[20]]) f.add_arc('n**', '0n*+', '0', ()) return f
def french_count(): f = FST('french') f.add_state('start') f.initial_state = 'start' f.add_state('1stzero') f.add_state('tens') f.add_state('seventeen') f.add_state('final_seventeen') f.add_state('eighteen') f.add_state('final_eighteen') f.add_state('nineteen') f.add_state('final_nineteen') f.add_state('zero') f.add_state('ones') f.add_state('20-69') f.add_state('70-ten') f.add_state('80s') f.add_state('90s') f.add_state('100s') f.add_state('et') f.add_state('10-et') f.add_state('et-un') f.add_state('et-onze') f.set_final('zero') f.set_final('ones') f.set_final('tens') f.set_final('final_seventeen') f.set_final('final_eighteen') f.set_final('final_nineteen') f.set_final('20-69') f.set_final('70-ten') f.set_final('80s') f.set_final('90s') f.set_final('et-un') f.set_final('et-onze') # 100 - 999 f.add_arc('start', '1stzero', '1', [kFRENCH_TRANS[100]]) for i in range(2, 10): f.add_arc('start', '100s', str(i), [kFRENCH_TRANS[i]]) f.add_arc('100s', '1stzero', (), [kFRENCH_TRANS[100]]) # 0 - 9 f.add_arc('start', '1stzero', '0', []) f.add_arc('1stzero', 'ones', '0', []) for ii in range(1, 10): f.add_arc('ones', 'ones', str(ii), [kFRENCH_TRANS[ii]]) f.add_arc('ones', 'ones', '0', []) # for i in range(10): # f.add_arc('ten-6', 'ten-6', str(i), kFRENCH_TRANS[(i+10]) # 10 - 16 f.add_arc('1stzero', 'tens', '1', []) f.add_arc('tens', 'tens', '0', [kFRENCH_TRANS[10]]) f.add_arc('tens', 'tens', '1', [kFRENCH_TRANS[11]]) f.add_arc('tens', 'tens', '2', [kFRENCH_TRANS[12]]) f.add_arc('tens', 'tens', '3', [kFRENCH_TRANS[13]]) f.add_arc('tens', 'tens', '4', [kFRENCH_TRANS[14]]) f.add_arc('tens', 'tens', '5', [kFRENCH_TRANS[15]]) f.add_arc('tens', 'tens', '6', [kFRENCH_TRANS[16]]) f.add_arc('tens', 'seventeen', '7', [kFRENCH_TRANS[10]]) f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]]) f.add_arc('tens', 'eighteen', '8', [kFRENCH_TRANS[10]]) f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]]) f.add_arc('tens', 'nineteen', '9', [kFRENCH_TRANS[10]]) f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]]) # 20 - 69 f.add_arc('1stzero', '20-69', '2', [kFRENCH_TRANS[20]]) f.add_arc('1stzero', '20-69', '3', [kFRENCH_TRANS[30]]) f.add_arc('1stzero', '20-69', '4', [kFRENCH_TRANS[40]]) f.add_arc('1stzero', '20-69', '5', [kFRENCH_TRANS[50]]) f.add_arc('1stzero', '20-69', '6', [kFRENCH_TRANS[60]]) # special cases: for i in range(2, 10): f.add_arc('20-69', '20-69', str(i), [kFRENCH_TRANS[i]]) # handles 20, 30 ... 60 for i in range(20, 60, 10): f.add_arc('20-69', '20-69', '0', []) # handles 21, 31, ... 61 f.add_arc('20-69', 'et', '1', [kFRENCH_AND]) f.add_arc('et', 'et-un', (),[kFRENCH_TRANS[1]]) # 70 - 79 f.add_arc('1stzero', '70-ten', '7', [kFRENCH_TRANS[60]]) f.add_arc('70-ten', '70-ten', '0', [kFRENCH_TRANS[10]]) # handle 71 here f.add_arc('70-ten', '10-et', '1', [kFRENCH_AND]) f.add_arc('10-et', 'et-onze', (),[kFRENCH_TRANS[11]]) f.add_arc('70-ten', '70-ten', '2', [kFRENCH_TRANS[12]]) f.add_arc('70-ten', '70-ten', '3', [kFRENCH_TRANS[13]]) f.add_arc('70-ten', '70-ten', '4', [kFRENCH_TRANS[14]]) f.add_arc('70-ten', '70-ten', '5', [kFRENCH_TRANS[15]]) f.add_arc('70-ten', '70-ten', '6', [kFRENCH_TRANS[16]]) f.add_arc('70-ten', 'seventeen', '7', [kFRENCH_TRANS[10]]) f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]]) f.add_arc('70-ten', 'eighteen', '8', [kFRENCH_TRANS[10]]) f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]]) f.add_arc('70-ten', 'nineteen', '9', [kFRENCH_TRANS[10]]) f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]]) # 80 - 89 f.add_arc('1stzero', '80s', '8', [kFRENCH_TRANS[4]]) f.add_arc('80s', 'ones', (), [kFRENCH_TRANS[20]]) f.add_arc('80s', '80s', '0', [kFRENCH_TRANS[20]]) # 90 - 99 f.add_arc('1stzero', '90s', '9', [kFRENCH_TRANS[4]]) f.add_arc('90s', 'tens', (), [kFRENCH_TRANS[20]]) return f
def letters_to_numbers(): """ Returns an FST that converts letters to numbers as specified by the soundex algorithm """ # Let's define our first FST f1 = FST('soundex-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.add_state('one') f1.add_state('two') f1.add_state('three') f1.add_state('four') f1.add_state('five') f1.add_state('six') f1.initial_state = 'start' # Set all the final states f1.set_final('next') f1.set_final('one') f1.set_final('two') f1.set_final('three') f1.set_final('four') f1.set_final('five') f1.set_final('six') list_one = ['b', 'f', 'p', 'v'] list_two = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z'] list_three = ['d', 't'] list_four = ['l'] list_five = ['m', 'n'] list_six = ['r'] vowels = ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y'] # Add the rest of the arcs # changed string.ascii_lowercase to string.letters for letter in string.letters: f1.add_arc('start', 'next', (letter), (letter)) for letter in string.letters: if letter in list_one: f1.add_arc('next', 'one', (letter), '1') elif letter in list_two: f1.add_arc('next', 'two', (letter), '2') elif letter in list_three: f1.add_arc('next', 'three', (letter), '3') elif letter in list_four: f1.add_arc('next', 'four', (letter), '4') elif letter in list_five: f1.add_arc('next', 'five', (letter), '5') elif letter in list_six: f1.add_arc('next', 'six', (letter), '6') else: f1.add_arc('next', 'next', (letter), ()) for letter in string.letters: if letter in list_two: f1.add_arc('one', 'two', (letter), '2') elif letter in list_three: f1.add_arc('one', 'three', (letter), '3') elif letter in list_four: f1.add_arc('one', 'four', (letter), '4') elif letter in list_five: f1.add_arc('one', 'five', (letter), '5') elif letter in list_six: f1.add_arc('one', 'six', (letter), '6') else: f1.add_arc('one', 'one', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('two', 'one', (letter), '1') elif letter in list_three: f1.add_arc('two', 'three', (letter), '3') elif letter in list_four: f1.add_arc('two', 'four', (letter), '4') elif letter in list_five: f1.add_arc('two', 'five', (letter), '5') elif letter in list_six: f1.add_arc('two', 'six', (letter), '6') else: f1.add_arc('two', 'two', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('three', 'one', (letter), '1') elif letter in list_two: f1.add_arc('three', 'two', (letter), '2') elif letter in list_four: f1.add_arc('three', 'four', (letter), '4') elif letter in list_five: f1.add_arc('three', 'five', (letter), '5') elif letter in list_six: f1.add_arc('three', 'six', (letter), '6') else: f1.add_arc('three', 'three', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('four', 'one', (letter), '1') elif letter in list_two: f1.add_arc('four', 'two', (letter), '2') elif letter in list_three: f1.add_arc('four', 'three', (letter), '3') elif letter in list_five: f1.add_arc('four', 'five', (letter), '5') elif letter in list_six: f1.add_arc('four', 'six', (letter), '6') else: f1.add_arc('four', 'four', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('five', 'one', (letter), '1') elif letter in list_two: f1.add_arc('five', 'two', (letter), '2') elif letter in list_three: f1.add_arc('five', 'three', (letter), '3') elif letter in list_four: f1.add_arc('five', 'four', (letter), '4') elif letter in list_six: f1.add_arc('five', 'six', (letter), '6') else: f1.add_arc('five', 'five', (letter), ()) for letter in string.letters: if letter in list_one: f1.add_arc('six', 'one', (letter), '1') elif letter in list_two: f1.add_arc('six', 'two', (letter), '2') elif letter in list_three: f1.add_arc('six', 'three', (letter), '3') elif letter in list_four: f1.add_arc('six', 'four', (letter), '4') elif letter in list_five: f1.add_arc('six', 'five', (letter), '5') else: f1.add_arc('six', 'six', (letter), ()) return f1
# where ALL transducers use characters as input symbols def compose(input, *fsts): output_list = [input] for fst in fsts: next_output_list = [] for o in output_list: new_output = ''.join(o) next_output_list.extend(fst.transduce(new_output)) output_list = next_output_list return output_list if __name__ == '__main__': f1 = FST('test-generate') # Indicate that '1' is the initial state f1.add_state('start') f1.add_state('next') f1.initial_state = 'start' # Set all the final states f1.set_final('next') # Add the rest of the arcs for letter in ['A','B','C','D']: f1.add_arc('start', 'next', letter, '1') f1.add_arc('next', 'next', letter, '0') f2 = FST('test-generate') f2.add_state('start') f2.add_state('next') f2.initial_state = 'start'
def french_count(): f = FST('french') f.add_state('0') f.add_state('1') f.add_state('2') f.add_state('3') f.add_state('4') f.add_state('5') f.add_state('6') f.add_state('7') f.add_state('8') f.add_state('9') f.add_state('10') f.add_state('11') f.add_state('12') f.add_state('13') f.add_state('14') f.add_state('15') f.add_state('16') f.add_state('17') f.add_state('18') f.add_state('19') f.add_state('20') f.add_state('21') f.add_state('22') f.add_state('23') f.add_state('24') f.add_state('25') f.initial_state = '0' f.set_final('1') f.set_final('3') f.set_final('6') f.set_final('7') f.set_final('8') f.set_final('9') f.set_final('11') f.set_final('13') f.set_final('14') f.set_final('18') f.set_final('20') zero = [0] one = [1] two_to_six = [2,3,4,5,6] one_to_six = [1,2,3,4,5,6] seven = [7] seven_eight_nine = [7,8,9] eight = [8] nine = [9] singles_all = [1,2,3,4,5,6,7,8,9] singles = [2,3,4,5,6,7,8,9] tens = [20,30,40,50] # Edge from initial to final, if preceding zero in input for i in zero: # f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]]) f.add_arc('0','0', str(i), ()) f.add_arc('4','6', str(i), ()) f.add_arc('5','8', str(i), ()) f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]]) f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]]) f.add_arc('12','13', str(i), [kFRENCH_TRANS[20]]) f.add_arc('16','18', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[10]]) f.add_arc('17','19', str(i), ()) f.add_arc('19','9', str(i), ()) for i in one: f.add_arc('0','2', str(i), ()) f.add_arc('17','2', str(i), ()) f.add_arc('0','17', str(i), [kFRENCH_TRANS[100]]) f.add_arc('0','5', str(i), [kFRENCH_TRANS[i*10]]) f.add_arc('17','5', str(i), [kFRENCH_TRANS[i*10]]) f.add_arc('4','7', str(i), [kFRENCH_AND, kFRENCH_TRANS[i]]) f.add_arc('10','11', str(i), [kFRENCH_AND, kFRENCH_TRANS[i+10]]) f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i]]) f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i+10]]) for i in one_to_six: f.add_arc('2','3', str(i), [kFRENCH_TRANS[i+10]]) for i in two_to_six: f.add_arc('0','4', str(i), [kFRENCH_TRANS[i*10]]) f.add_arc('17','4', str(i), [kFRENCH_TRANS[i*10]]) f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]]) f.add_arc('16','20', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[i+10]]) for i in singles: f.add_arc('4','7', str(i), [kFRENCH_TRANS[i]]) f.add_arc('0','17', str(i), [kFRENCH_TRANS[i],kFRENCH_TRANS[100]]) f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[i]]) for i in singles_all: f.add_arc('0','1', str(i), [kFRENCH_TRANS[i]]) f.add_arc('19','1', str(i), [kFRENCH_TRANS[i]]) for i in seven_eight_nine: f.add_arc('5','8', str(i), [kFRENCH_TRANS[i]]) f.add_arc('10','11', str(i), [kFRENCH_TRANS[10], kFRENCH_TRANS[i]]) f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[10], kFRENCH_TRANS[i]]) for i in seven: f.add_arc('0','10',str(i), [kFRENCH_TRANS[60]]) f.add_arc('17','10',str(i), [kFRENCH_TRANS[60]]) for i in eight: f.add_arc('0','12',str(i), [kFRENCH_TRANS[4]]) f.add_arc('17','12',str(i), [kFRENCH_TRANS[4]]) for i in nine: f.add_arc('0','16',str(i), [kFRENCH_TRANS[4]]) f.add_arc('17','16',str(i), [kFRENCH_TRANS[4]]) return f