Esempio n. 1
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('start')
    f3.add_state('0')
    f3.add_state('1')
    f3.add_state('2')
    f3.add_state('2a')
    f3.add_state('3')

    f3.initial_state = 'start'

    f3.set_final('3')

    for letter in string.letters:
        f3.add_arc('start', '0', (letter), (letter))
    for number in xrange(10):
        f3.add_arc('start', '1', (str(number)), (str(number)))
        f3.add_arc('0', '1', (str(number)), (str(number)))
        f3.add_arc('1', '2', (str(number)), (str(number)))
        f3.add_arc('1', '2a', (), ('0'))
        f3.add_arc('2', '3', (str(number)), (str(number)))
        f3.add_arc('2', '3', (), ('0'))
        f3.add_arc('2a', '3', (), ('0'))

    return f3
Esempio n. 2
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.add_state('2')
    f2.add_state('3')
    f2.add_state('4')
    f2.initial_state = '1'
    f2.set_final('1')
    f2.set_final('2')
    f2.set_final('3')
    f2.set_final('4')

    # Adds letters from input string of 'A###0000'
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    # Adds numbers from first FST of range 0-9
    for n in range(10):
        f2.add_arc('1', '2', str(n), (str(n)))
        f2.add_arc('2', '3', str(n), (str(n)))
        f2.add_arc('3', '4', str(n), (str(n)))
        f2.add_arc('4', '4', str(n), ())


    return f2
Esempio n. 3
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """
    start_state = 'start'
    letter_first = 'letter_first'
    number_first = 'number_first'
    numbers = list('0123456789')

    # Initialization
    f2 = FST('soundex-truncate')
    f2.add_state(start_state)
    f2.add_state(letter_first)
    f2.add_state(number_first)
    f2.set_final(number_first) #Don't think this would ever occur, but tests want it
    f2.initial_state = start_state

    for letter in string.ascii_letters:
        f2.add_arc(start_state, letter_first, letter, letter)
    for number in numbers:
        f2.add_arc(start_state, number_first, str(number), str(number))

    get_letter_number(f2, letter_first, numbers)
    get_number_letter(f2, number_first, numbers)

    return f2
Esempio n. 4
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    states = ['1', 'd1', 'd2', 'd3']

    for state in states:
        f2.add_state(state)

    f2.initial_state = '1'

    for state in ['d1', 'd2', 'd3']:
        f2.set_final(state)

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for index, state in enumerate(states):
        if index > 0:
            for n in range(10):
                f2.add_arc(states[index - 1], states[index], str(n), str(n))

    for n in range(10):
        f2.add_arc('d3', 'd3', str(n), ())

    return f2
Esempio n. 5
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.add_state('2')
    f2.add_state('3')
    f2.add_state('4')
    f2.initial_state = '1'
    f2.set_final('1')
    f2.set_final('2')
    f2.set_final('3')
    f2.set_final('4')

    for letter in list(string.ascii_letters):
        f2.add_arc('1', '1', letter, letter)
        f2.add_arc('2', '1', letter, letter)
        f2.add_arc('3', '1', letter, letter)
        f2.add_arc('4', '1', letter, letter)

    for digit in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']:
        f2.add_arc('1', '2', digit, digit)
        f2.add_arc('2', '3', digit, digit)
        f2.add_arc('3', '4', digit, digit)
        f2.add_arc('4', '4', digit, '')

    return f2
def add_zero_padding():
    # Now, the third fst - the zero-padding fst

    f3 = FST('soundex-padzero')
    f3.add_state('s0')
    f3.add_state('s1')
    f3.add_state('s2')
    f3.add_state('s3')
    f3.add_state('s4')
    f3.add_state('s5')
    f3.add_state('s6')
    f3.add_state('s7')
    f3.add_state('s8')

    f3.initial_state = 's0'
    f3.set_final('s4')
    f3.set_final('s7')

    for letter in string.letters:
        f3.add_arc('s0', 's1', (letter), (letter))

    for digit in string.digits:
        f3.add_arc('s1', 's5', (digit), (digit))
        f3.add_arc('s5', 's6', (digit), (digit))
        f3.add_arc('s6', 's7', (digit), (digit))
        f3.add_arc('s0', 's8', (digit), (digit))

    f3.add_arc('s1', 's2', (), ('0'))
    f3.add_arc('s2', 's3', (), ('0'))
    f3.add_arc('s3', 's4', (), ('0'))
    f3.add_arc('s5', 's3', (), ('0'))
    f3.add_arc('s6', 's4', (), ('0'))
    f3.add_arc('s8', 's3', (), ('0'))

    return f3
Esempio n. 7
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.add_state('2')
    f2.add_state('3')
    f2.add_state('4')

    f2.initial_state = '1'
    f2.set_final('4')
    f2.set_final('3')
    f2.set_final('2')
    f2.set_final('1')

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for n in range(10):
        f2.add_arc('1', '2', (str(n)), (str(n)))
        f2.add_arc('2', '3', (str(n)), (str(n)))
        f2.add_arc('3', '4', (str(n)), (str(n)))
        f2.add_arc('4', '4', (str(n)), ())

    return f2
Esempio n. 8
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    states = ['1', 'd1', 'd2', 'd3']

    for state in states:
        f2.add_state(state)

    f2.initial_state = '1'

    for state in ['d1', 'd2', 'd3']:
        f2.set_final(state)

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for index, state in enumerate(states):
        if index > 0:
            for n in range(10):
                f2.add_arc(states[index-1], states[index], str(n), str(n))

    for n in range(10):
        f2.add_arc('d3', 'd3', str(n), ())

    return f2
Esempio n. 9
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('1')
    f3.add_state('2')
    f3.add_state('3')
    f3.add_state('4')
    
    f3.initial_state = '1'
    f3.set_final('4')

    for letter in string.letters:
        f3.add_arc('1', '1', letter, letter)
    for number in xrange(10):
        f3.add_arc('1', '2', str(number), str(number))
        f3.add_arc('2', '3', str(number), str(number))
        f3.add_arc('3', '4', str(number), str(number))
    
    for n in range(10):
        f3.add_arc('1', '4', (), '000')
        f3.add_arc('2', '4', (), '00')
        f3.add_arc('3', '4', (), '0')

    return f3
Esempio n. 10
0
def truncate_to_three_digits():
    """
	Create an FST that will truncate a soundex string to three digits
	"""

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.initial_state = '1'
    for i in range(2, 6):
        f2.add_state(str(i))
    for i in range(2, 6):
        f2.set_final(str(i))

    for letter in string.letters:
        f2.add_arc('1', '2', letter, letter)
        f2.add_arc('2', '2', letter, letter)
    for letter in ['1', '2', '3', '4', '5', '6']:
        f2.add_arc('1', '3', letter, letter)
        f2.add_arc('2', '3', letter, letter)
        f2.add_arc('3', '4', letter, letter)
        f2.add_arc('4', '5', letter, letter)
        f2.add_arc('5', '5', letter, '')

    return f2
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that 'start' is the initial state
    f1.add_state('start')
    f1.add_state('0')
    f1.add_state('1')
    f1.add_state('2')
    f1.add_state('3')
    f1.add_state('4')
    f1.add_state('5')
    f1.add_state('6')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('0')
    f1.set_final('1')
    f1.set_final('2')
    f1.set_final('3')
    f1.set_final('4')
    f1.set_final('5')
    f1.set_final('6')

    replace_letters = {
        '0': 'aehiouwy',
        '1': 'bfpv',
        '2': 'cgjkqsxz',
        '3': 'dt',
        '4': 'l',
        '5': 'mn',
        '6': 'r'
    }
    # retaining the first letter
    # removing letters and replacing letters with numbers.
    for state, in_strs in replace_letters.items():
        for in_str in in_strs:
            f1.add_arc('start', state, in_str, in_str)
            f1.add_arc('start', state, in_str.upper(), in_str.upper())
            f1.add_arc(state, state, in_str, '')

        for state_supplementary in replace_letters:
            if not state_supplementary.startswith(state):
                for str_out_state in replace_letters[state_supplementary]:
                    state_supplementary_out = '' if state_supplementary.startswith(
                        '0') else state_supplementary
                    f1.add_arc(state, state_supplementary, str_out_state,
                               state_supplementary_out)
    return f1
Esempio n. 12
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('1')
    f3.add_state('2')

    f3.initial_state = '1'
    f3.set_final('2')

    return f3
Esempio n. 13
0
def add_zero_padding():
	# Now, the third fst - the zero-padding fst
	f3 = FST('soundex-padzero')

	f3.add_state('1')
	f3.add_state('2')
	
	f3.initial_state = '1'
	f3.set_final('2')

	return f3
Esempio n. 14
0
def french_count():
    f = FST("french")

    f.add_state("start")
    f.initial_state = "start"

    for ii in xrange(10):
        f.add_arc("start", "start", str(ii), [kFRENCH_TRANS[ii]])

    f.set_final("start")

    return f
Esempio n. 15
0
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'

    for ii in xrange(10):
        f.add_arc('start', 'start', [str(ii)], [kFRENCH_TRANS[ii]])

    f.set_final('start')

    return f
Esempio n. 16
0
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'

    for ii in xrange(10):
        f.add_arc('start', 'start', str(ii), [kFRENCH_TRANS[ii]])

    f.set_final('start')

    return f
Esempio n. 17
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    states = ['q1', 'q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']
    for state in states:
        f1.add_state(state)

    f1.initial_state = 'q1'

    # Set all the final states
    for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
        f1.set_final(state)

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('q1', 'q2', (letter), (letter))
        if letter in set('aehiouwy'):
            for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
                f1.add_arc(state, state, (letter), ())
        else:
            if letter in set('bfpv'):
                for state in ['q2', 'n2', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n1', (letter), ('1'))
                f1.add_arc('n1', 'n1', (letter), ())
            elif letter in set('cgjkqsxz'):
                for state in ['q2', 'n1', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n2', (letter), ('2'))
                f1.add_arc('n2', 'n2', (letter), ())
            elif letter in set('dt'):
                for state in ['q2', 'n1', 'n2', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n3', (letter), ('3'))
                f1.add_arc('n3', 'n3', (letter), ())
            elif letter in set('l'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n5', 'n6']:
                    f1.add_arc(state, 'n4', (letter), ('4'))
                f1.add_arc('n4', 'n4', (letter), ())
            elif letter in set('mn'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n6']:
                    f1.add_arc(state, 'n5', (letter), ('5'))
                f1.add_arc('n5', 'n5', (letter), ())
            elif letter in set('r'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5']:
                    f1.add_arc(state, 'n6', (letter), ('6'))
                f1.add_arc('n6', 'n6', (letter), ())
    return f1
Esempio n. 18
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    states = ['q1', 'q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']
    for state in states:
        f1.add_state(state)

    f1.initial_state = 'q1'

    # Set all the final states
    for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
        f1.set_final(state)

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('q1', 'q2', (letter), (letter))
        if letter in set('aehiouwy'):
            for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
                f1.add_arc(state, state, (letter), ())
        else:
            if letter in set('bfpv'):
                for state in ['q2', 'n2', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n1', (letter), ('1'))
                f1.add_arc('n1', 'n1', (letter), ())
            elif letter in set('cgjkqsxz'):
                for state in ['q2', 'n1', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n2', (letter), ('2'))
                f1.add_arc('n2', 'n2', (letter), ())
            elif letter in set('dt'):
                for state in ['q2', 'n1', 'n2', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n3', (letter), ('3'))
                f1.add_arc('n3', 'n3', (letter), ())
            elif letter in set('l'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n5', 'n6']:
                    f1.add_arc(state, 'n4', (letter), ('4'))
                f1.add_arc('n4', 'n4', (letter), ())
            elif letter in set('mn'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n6']:
                    f1.add_arc(state, 'n5', (letter), ('5'))
                f1.add_arc('n5', 'n5', (letter), ())
            elif letter in set('r'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5']:
                    f1.add_arc(state, 'n6', (letter), ('6'))
                f1.add_arc('n6', 'n6', (letter), ())
    return f1
Esempio n. 19
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('1')
    f3.add_state('2')
    f3.add_state('3')
    f3.add_state('4')
    f3.add_state('5')
    f3.add_state('6')
    f3.add_state('7')
    f3.add_state('8')

    f3.initial_state = '1'
    f3.set_final('5')
    f3.set_final('8')

    for letter in string.ascii_letters:
        f3.add_arc('1', '2', letter, letter)

    for num in string.digits:
        f3.add_arc('1', '3', num, num)
        f3.add_arc('2', '3', num, num)
        f3.add_arc('3', '4', num, num)
        f3.add_arc('4', '5', num, num)

    f3.add_arc('2', '6', '', '0')
    f3.add_arc('3', '7', '', '0')
    f3.add_arc('4', '8', '', '0')
    f3.add_arc('6', '7', '', '0')
    f3.add_arc('7', '8', '', '0')

    return f3
Esempio n. 20
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('start')
    f3.initial_state = 'start'
    for x in range(4):
        f3.add_state(str(x))
    f3.set_final(str(3))

    # Add the arcs
    f3.add_arc(str(0),str(1),(''),('0'))
    f3.add_arc('start','1',(''),('0'))
    f3.add_arc(str(1),str(2),(''),('0'))
    f3.add_arc(str(2),str(3),(''),('0'))

    for letter in string.letters:
        f3.add_arc('start', '0', (letter), (letter))

    for n in range(10):
        f3.add_arc('start','1',(str(n)),(str(n)))
    

    for x in range(3):
        for n in range(10):
            f3.add_arc(str(x), str(x+1), (str(n)), (str(n)))
        

    for n in range(10):
        f3.add_arc(str(3),str(3),(str(n)),())

    '''
    f3.add_state('1')
    f3.add_state('1a')
    f3.add_state('1b')
    f3.add_state('2')
    
    f3.initial_state = '1'
    f3.set_final('2')

    for letter in string.letters:
        f3.add_arc('1', '1', (letter), (letter))
    for number in xrange(10):
        f3.add_arc('1', '1', (str(number)), (str(number)))
    
    f3.add_arc('1', '1a', (), ('0'))
    f3.add_arc('1a', '1b', (), ('0'))
    f3.add_arc('1b', '2', (), ('0'))
    '''
    return f3
Esempio n. 21
0
def french_count():
    f = FST('french')

    f.add_state('start');
    f.add_state('hundred');
    f.add_state('unique');
    f.add_state('sen');
    f.initial_state = 'start'

    for number in xrange(1001):
        if number in hundredlist:
            f.add_arc('start', 'hundred', [str(number)], [hundredlist[number]])
        elif number == 0:
            f.add_arc('start','start',[str(number)],[kFRENCH_TRANS[0]])
            f.add_arc('unique','unique',[str(number)],[])
            f.add_arc('hundred', 'hundred', [str(number)], [])
        elif number in uniquenumber:
            f.add_arc('start', 'unique', [str(number)], [kFRENCH_TRANS[number]])
            f.add_arc('hundred', 'unique', [str(number)], [kFRENCH_TRANS[number]])
            f.add_arc('unique', 'unique', [str(number)], [kFRENCH_TRANS[number]])
        elif number in seveneightynine:
            f.add_arc('start', 'sen', [str(number)], [seveneightynine[number]])
            f.add_arc('hundred', 'sen', [str(number)], [seveneightynine[number]])


    f.set_final('hundred')
    f.set_final('unique')
    f.set_final('sen')


    return f
Esempio n. 22
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('initial')
    f2.add_state('firstDigit')
    f2.add_state('secondDigit')
    f2.add_state('thirdDigit')

    f2.initial_state = 'initial'

    f2.set_final('initial')
    f2.set_final('firstDigit')
    f2.set_final('secondDigit')
    f2.set_final('thirdDigit')

    source = ['initial', 'firstDigit', 'secondDigit', 'thirdDigit']
    destination = ['firstDigit', 'secondDigit', 'thirdDigit', 'thirdDigit']

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('initial', 'initial', (letter), (letter))

    for cur, next in zip(source, destination):
        f2 = addTruncateStates(cur, next, f2)

    return f2
Esempio n. 23
0
def truncate_to_three_digits():
    """
    A FST that will truncate a soundex string to three digits
    """
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('start')
    f2.add_state('d1')
    f2.add_state('d2')
    f2.add_state('d3')
    f2.add_state('end')
    f2.initial_state = 'start'
    f2.set_final('end')

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('start', 'd1', (letter), (letter))

    f2.add_arc('d1', 'end', (), ())
    f2.add_arc('d2', 'end', (), ())
    f2.add_arc('d3', 'end', (), ())

    for n in range(10):
        f2.add_arc('d1', 'd2', (str(n)), (str(n)))
        f2.add_arc('d2', 'd3', (str(n)), (str(n)))
        f2.add_arc('d3', 'end', (str(n)), (str(n)))
        f2.add_arc('end', 'end', (str(n)), ())

    digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    for digit in digits:
        f2.add_arc('start', 'd2', (digit), (digit))

    return f2
Esempio n. 24
0
def truncate_to_three_digits():
    """
	Create an FST that will truncate a soundex string to three digits
	"""

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.initial_state = '1'
    f2.set_final('1')

    return f2
Esempio n. 25
0
def truncate_to_three_digits():
	"""
	Create an FST that will truncate a soundex string to three digits
	"""

	# Ok so now let's do the second FST, the one that will truncate
	# the number of digits to 3
	f2 = FST('soundex-truncate')

	# Indicate initial and final states
	f2.add_state('1')
	f2.initial_state = '1'
	f2.set_final('1')

	return f2
Esempio n. 26
0
def letters_to_numbers():
    """
	Returns an FST that converts letters to numbers as specified by
	the soundex algorithm
	"""
    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('1')
    f1.initial_state = '1'
    for i in range(2, 10):
        f1.add_state(str(i))
    for letter in string.letters:
        f1.add_arc('1', '2', letter, letter)
    for letter in ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y']:
        for i in range(2, 10):
            f1.add_arc(str(i), '3', letter, '')
    for letter in ['b', 'f', 'p', 'v']:
        for i in range(2, 10):
            if str(i) == '4': f1.add_arc(str(i), '4', letter, '')
            else: f1.add_arc(str(i), '4', letter, '1')
    for letter in ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']:
        for i in range(2, 10):
            if str(i) == '5': f1.add_arc(str(i), '5', letter, '')
            else: f1.add_arc(str(i), '5', letter, '2')
    for letter in ['d', 't']:
        for i in range(2, 10):
            if str(i) == '6': f1.add_arc(str(i), '6', letter, '')
            else: f1.add_arc(str(i), '6', letter, '3')
    for letter in ['l']:
        for i in range(2, 10):
            if str(i) == '7': f1.add_arc(str(i), '7', letter, '')
            else: f1.add_arc(str(i), '7', letter, '4')
    for letter in ['m', 'n']:
        for i in range(2, 10):
            if str(i) == '8': f1.add_arc(str(i), '8', letter, '')
            else: f1.add_arc(str(i), '8', letter, '5')
    for letter in ['r']:
        for i in range(2, 10):
            if str(i) == '9': f1.add_arc(str(i), '9', letter, '')
            else: f1.add_arc(str(i), '9', letter, '6')

    # Set all the final states
    for i in range(2, 10):
        f1.set_final(str(i))

    return f1
Esempio n. 27
0
    def generate(self, analysis):
        """Generate the morphologically correct word

        e.g.
        p = Parser()
        analysis = ['p','a','n','i','c','+past form']
        p.generate(analysis)
        ---> 'panicked'
        """
        start_state = 'start'

        f = FST('generator')
        f.add_state(start_state)
        f.initial_state = start_state
        self._build_generator_fst(f, analysis, start_state)

        return ''.join(f.transduce(analysis)[0])
Esempio n. 28
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """
    # Let's define our first FST
    f1 = FST('soundex-generate')
    num_maps = ["aehiouwy", "bfpv", "cgjkqsxz", "dt", "l", "mn", "r"]

    for state in '12345678':
        f1.add_state(state)
    f1.initial_state = '1'

    # Set all the final states
    for letter in '2345678':
        f1.set_final(letter)

    # Add the arcs for step 1
    for letter in string.ascii_letters:
        f1.add_arc('1', '2', (letter), (letter))

    # add the arcs for step 2 and step 3
    states = "2345678"
    value = '0'
    index = 0
    # for each sequence in ["aehiouwy","bfpv","cgjkqsxz","dt","l","mn","r"]
    for key in num_maps:
        # for each letter  in "aehiouwy"
        for letter in key:
            # for each state node in "2345678"
            for state in states:
                # add arcs from 2->3, 2->4 with letter,value
                if (state != states[index]):
                    # if dest node state is '2', arc should be letter, empty
                    if (value == '0'):
                        f1.add_arc(state, states[index], (letter), ())
                    else:
                        f1.add_arc(state, states[index], (letter), (value))
                # add arc to itself 2->2 with letter, empty
                else:
                    f1.add_arc(state, state, (letter), ())
        index += 1
        value = chr(ord(value) + 1)

    return f1
Esempio n. 29
0
def letters_to_numbers():
	"""
	Returns an FST that converts letters to numbers as specified by
	the soundex algorithm
	"""

	# Let's define our first FST
	f1 = FST('soundex-generate')

	# Indicate that '1' is the initial state
	f1.add_state('start')
	f1.add_state('next')
	f1.initial_state = 'start'

	# Set all the final states
	f1.set_final('next')

	return f1
Esempio n. 30
0
def letters_to_numbers():
    """
	Returns an FST that converts letters to numbers as specified by
	the soundex algorithm
	"""

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    return f1
    def generate(self, analysis):
        """Generate the morphologically correct word 

        e.g.
        p = Parser()
        analysis = ['p','a','n','i','c','+past form']
        p.generate(analysis) 
        ---> 'panicked'
        """

        # Let's define our first FST
        f1 = FST('morphology-generate')

        # Indicate initial and final states
        f1.add_state('start')
        f1.add_state('vowel')
        f1.add_state('consonant')
        f1.add_state('c')
        f1.add_state('form_1')
        f1.add_state('form_2')
        f1.initial_state = 'start'
        f1.set_final('form_1')
        f1.set_final('form_2')

        # Generate
        vowels = 'aeiou'
        for vowel in vowels:
            f1.add_arc('start', 'vowel', vowel, vowel)
            f1.add_arc('vowel', 'vowel', vowel, vowel)
            f1.add_arc('consonant', 'vowel', vowel, vowel)
            f1.add_arc('c', 'vowel', vowel, vowel)

        for letter in string.ascii_lowercase:
            if letter in vowels:
                continue

            if not letter == 'c':
                f1.add_arc('vowel', 'consonant', letter, letter)
            f1.add_arc('start', 'consonant', letter, letter)
            f1.add_arc('consonant', 'consonant', letter, letter)
            f1.add_arc('c', 'consonant', letter, letter)

        f1.add_arc('vowel', 'c', 'c', 'c')
        f1.add_arc('c', 'form_1', '+past form', 'ked')
        f1.add_arc('c', 'form_1', '+present participle form', 'king')
        f1.add_arc('consonant', 'form_2', '+past form', 'ed')
        f1.add_arc('consonant', 'form_2', '+present participle form', 'ing')

        output = f1.transduce(analysis)[0]

        return "".join(output)
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.initial_state = '1'
    f2.set_final('1')

    f2.add_state('2L')
    f2.set_final('2L')

    f2.add_state('2D')
    f2.set_final('2D')

    f2.add_state('3D')
    f2.set_final('3D')

    f2.add_state('4D')
    f2.set_final('4D')

    for letter in string.letters:
        f2.add_arc('1', '2L', (letter), (letter))
        f2.add_arc('2L', '2L', (letter), ())

    # Add the arcs
    possible_chars = string.digits + string.letters
    for digit in string.digits:
        f2.add_arc('1', '2D', (digit), (digit))
        f2.add_arc('2L', '2D', (digit), (digit))
        f2.add_arc('2D', '3D', (digit), (digit))
        f2.add_arc('3D', '4D', (digit), (digit))
        #f2.add_arc('4', '5', (letter), (letter))
        f2.add_arc('4D', '4D', (digit), ())

    return f2
Esempio n. 33
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """
    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('start', 'next', (letter), (letter))
        f1.add_arc('next', 'next', (letter), ('0'))
    return f1
Esempio n. 34
0
    def parse(self, word):
        """Parse a word morphologically

        e.g.
        p = Parser()
        word = ['p', 'a', 'n', 'i', 'c', 'k','e','d']
        p.parse(word)
        ---> 'panic+past form'
        """
        lexicon = {'panic', 'havoc', 'sync', 'lick', 'want'}
        start_state = 'start'
        k_insertion = 'k_insertion'

        f = FST('parser')
        f.add_state(start_state)
        f.initial_state = start_state

        #Add paths for each word
        previous = start_state
        for vocab in lexicon:
            for char in vocab:
                current = vocab + '-' + char  #uniquely identify
                f.add_state(current)
                f.add_arc(previous, current, char, char)
                previous = current
            f.add_state(k_insertion + '-' + vocab)
            f.add_arc(previous, k_insertion + '-' + vocab, 'k', '')
            self._add_ending_states(f,
                                    vocab,
                                    k_insertion + '-' + vocab,
                                    k=k_insertion)
            self._add_ending_states(f, vocab, previous)
            previous = start_state

        return ''.join(f.transduce(word)[0])
Esempio n. 35
0
    def generate_control(self):
        arguments = self.matchers.keys()

        # this will be a hypercube
        control = FST()

        # zero state is for verb
        control.add_state("0", is_init=True, is_final=False)

        # inside states for the cube, except the last, accepting state
        for i in xrange(1, pow(2, len(arguments))):
            control.add_state(str(i), is_init=False, is_final=False)

        # last node of the hypercube
        control.add_state(
            str(int(pow(2, len(arguments)))),
            is_init=False, is_final=True)

        # first transition
        control.add_transition(KRPosMatcher("VERB"), [ExpandOperator(
            self.lexicon, self.working_area)], "0", "1")

        # count every transition as an increase in number of state
        for path in permutations(arguments):
            actual_state = 1
            for arg in path:
                increase = pow(2, arguments.index(arg))
                new_state = actual_state + increase
                control.add_transition(
                    self.matchers[arg],
                    [FillArgumentOperator(arg, self.working_area)],
                    str(actual_state), str(new_state))

                actual_state = new_state
        return control
def add_zero_padding():
	# Now, the third fst - the zero-padding fst
	f3 = FST('soundex-padzero')

	f3.add_state('1')
	f3.add_state('1a')
	f3.add_state('1b')
	f3.add_state('2')

	f3.initial_state = '1'

	# The soundex string will either need no padding in which case its final 
	# state is 1, or it will need 1 to 3 zeros and have final state 2
	f3.set_final('1')
	f3.set_final('2')

	f3.add_arc('1', '2', (), ('000'))
	f3.add_arc('1a', '2', (), ('00'))
	f3.add_arc('1b', '2', (), ('0'))

	for letter in string.letters:
		f3.add_arc('1', '1', (letter), (letter))
	for number in xrange(10):
		f3.add_arc('1', '1a', (str(number)), (str(number)))
	for number in xrange(10):
		f3.add_arc('1a', '1b', (str(number)), (str(number)))
	for number in xrange(10):
		f3.add_arc('1b', '2', (str(number)), (str(number)))

	return f3
Esempio n. 37
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        # f1.add_arc('start', 'next', (letter), (letter))
        # f1.add_arc('next', 'next', (letter), ('0'))
        f1.add_arc('start', 'next', (letter), (letter))
        if letter in vowels:
            f1.add_arc('next', 'next', (letter), ())
        elif letter in grp1:
            f1.add_arc('next', 'next', (letter), ('1'))
        elif letter in grp2:
            f1.add_arc('next', 'next', (letter), ('2'))
        elif letter in grp3:
            f1.add_arc('next', 'next', (letter), ('3'))
        elif letter in grp4:
            f1.add_arc('next', 'next', (letter), ('4'))
        elif letter in grp5:
            f1.add_arc('next', 'next', (letter), ('5'))
        elif letter in grp6:
            f1.add_arc('next', 'next', (letter), ('6'))
        else:
            continue
            #wtf
    return f1
Esempio n. 38
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    # Indicate initial and final states
    f3.add_state('1')
    f3.add_state('1a')
    f3.add_state('1b')
    f3.add_state('2')

    f3.initial_state = '1'
    f3.set_final('2')

    # Add the arcs
    for letter in string.letters:
        f3.add_arc('1', '1', (letter), (letter))

    for number in string.digits:
        f3.add_arc('1', '1a', (number), (number))
        f3.add_arc('1a', '1b', (number), (number))
        f3.add_arc('1b', '2', (number), (number))

    f3.add_arc('1', '2', (), ('000'))
    f3.add_arc('1a', '2', (), ('00'))
    f3.add_arc('1b', '2', (), ('0'))

    return f3
Esempio n. 39
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('1')
    f3.add_state('2')
    f3.add_state('3')
    f3.add_state('4')
    f3.add_state('5')
    f3.add_state('6')

    f3.initial_state = '1'
    f3.set_final('6')

    for letter in list(string.ascii_letters):
        f3.add_arc('1', '1', letter, letter)

    for digit in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']:
        f3.add_arc('1', '2', digit, digit)
        f3.add_arc('2', '5', digit, digit)
        f3.add_arc('5', '6', digit, digit)

    f3.add_arc('1', '3', '', '0')
    f3.add_arc('3', '4', '', '0')
    f3.add_arc('4', '6', '', '0')
    f3.add_arc('2', '4', '', '0')
    f3.add_arc('5', '6', '', '0')
    return f3
Esempio n. 40
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('start', 'next', (letter), (letter))
        f1.add_arc('next', 'next', (letter), '0')
    return f1
Esempio n. 41
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.initial_state = '1'
    f2.set_final('1')

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for n in range(10):
        f2.add_arc('1', '1', str(n), str(n))

    return f2
Esempio n. 42
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')
    aeoy = ['a','e','h','i','o','u','w','y']
    one = ['b','f','p','v']
    two = ['c','g','j','k','q','s','x','z']
    three = ['d','t']
    four = ['l'] 
    five = ['m','n']
    six = ['r']

    # Indicate that '1' is the initial state
    f1.add_state('initial')
    f1.add_state('0')
    f1.add_state('1')
    f1.add_state('2')
    f1.add_state('3')
    f1.add_state('4')
    f1.add_state('5')
    f1.add_state('6')
    f1.initial_state = 'initial'

    # Set all the final states
    f1.set_final('0')
    f1.set_final('1')
    f1.set_final('2')
    f1.set_final('3')
    f1.set_final('4')
    f1.set_final('5')
    f1.set_final('6')

    # Add the rest of the arcs
    for letter in string.ascii_letters:
        f1.add_arc('initial','0',(letter),(letter))
        if letter in aeoy:
            f1.add_arc('0','0', (letter), ())
            f1.add_arc('1','0', (letter), ())
            f1.add_arc('2','0', (letter), ())
            f1.add_arc('3','0', (letter), ())
            f1.add_arc('4','0', (letter), ())
            f1.add_arc('5','0', (letter), ())
            f1.add_arc('6','0', (letter), ())
        else:
            if letter in one:
                f1.add_arc('0','1', (letter), '1')
                f1.add_arc('2','1', (letter), '1')
                f1.add_arc('3','1', (letter), '1')
                f1.add_arc('4','1', (letter), '1')
                f1.add_arc('5','1', (letter), '1')
                f1.add_arc('6','1', (letter), '1')
                f1.add_arc('1','0', (letter), ())
            if letter in two:
                f1.add_arc('0','2', (letter), '2')
                f1.add_arc('1','2', (letter), '2')
                f1.add_arc('3','2', (letter), '2')
                f1.add_arc('4','2', (letter), '2')
                f1.add_arc('5','2', (letter), '2')
                f1.add_arc('6','2', (letter), '2')
                f1.add_arc('2','0', (letter), ())
            if letter in three:
                f1.add_arc('0','3', (letter), '3')
                f1.add_arc('1','3', (letter), '3')
                f1.add_arc('2','3', (letter), '3')
                f1.add_arc('4','3', (letter), '3')
                f1.add_arc('5','3', (letter), '3')
                f1.add_arc('6','3', (letter), '3')
                f1.add_arc('3','0', (letter), ())
            if letter in four:
                f1.add_arc('0','4', (letter), '4')
                f1.add_arc('1','4', (letter), '4')
                f1.add_arc('2','4', (letter), '4')
                f1.add_arc('3','4', (letter), '4')
                f1.add_arc('5','4', (letter), '4')
                f1.add_arc('6','4', (letter), '4')
                f1.add_arc('4','0', (letter), ())
            if letter in five:
                f1.add_arc('0','5', (letter), '5')
                f1.add_arc('1','5', (letter), '5')
                f1.add_arc('2','5', (letter), '5')
                f1.add_arc('3','5', (letter), '5')
                f1.add_arc('4','5', (letter), '5')
                f1.add_arc('6','5', (letter), '5')
                f1.add_arc('5','0', (letter), ())
            if letter in six:
                f1.add_arc('0','6', (letter), '6')
                f1.add_arc('1','6', (letter), '6')
                f1.add_arc('2','6', (letter), '6')
                f1.add_arc('3','6', (letter), '6')
                f1.add_arc('4','6', (letter), '6')
                f1.add_arc('5','6', (letter), '6')
                f1.add_arc('6','0', (letter), ())

    return f1
Esempio n. 43
0
def french_count():
    f = FST('french')

    f.add_state('start')
    # one number and two trailing unknowns
    f.add_state('n**')
    # exception from state n**
    f.add_state('n**+')
    # two numbers and one trailing unknown
    f.add_state('nn*')
    # zero and two uknown digits trailing and so on
    f.add_state('0**')
    f.add_state('00*')
    f.add_state('00n')
    f.add_state('0n*')
    f.add_state('0n*+')
    f.add_state('0nn')
    f.add_state('n00')
    f.add_state('nnn')
    f.add_state('nnn*')
    f.add_state('*et*')
    # vegasimal counting for 7 in ((0/n)n*)
    f.add_state('0n*Vega7+')
    f.add_state('0n*Vega7')
    f.add_state('0nnVega7')
    # vegasimal counting for 8 in ((0/n)n*)
    f.add_state('0n*Vega8')
    f.add_state('0n*Vega8+')
    f.add_state('0nnVega8')
    # vegasimal counting for 9 in ((0/n)n*)
    f.add_state('0n*Vega9')
    f.add_state('0n*Vega9+')
    f.add_state('0n*Vega9++')
    f.add_state('0nnVega9')

    # set final states
    f.set_final('00n')
    f.set_final('0nn')
    f.set_final('nnn')
    f.set_final('n00')
    f.set_final('0nnVega7')
    f.set_final('0nnVega8')
    f.set_final('0nnVega9')

    # initial state
    f.initial_state = 'start'
    # remove initial zeroes
    f.add_arc('start', '0**', '0', ())
    f.add_arc('0**', '00*', '0', ())
    
    for ii in xrange(10):
        #from '0n*Vega8' to '0nnVega8
        if ii != 0:
            f.add_arc('0n*Vega8+', '0nnVega8', str(ii), [kFRENCH_TRANS[ii]])
        elif ii == 0:
            f.add_arc('0n*Vega8+', '0nnVega8', str(ii), ())
        #from '0n*Vega7' to '0nnVega7' 7-9
        if ii == 0 or ii == 7 or ii ==8 or ii == 9:
            f.add_arc('0n*Vega7', '0n*Vega7+', (), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega7+', '0n*Vega7+', str(ii), [kFRENCH_TRANS[ii]])
            #
            f.add_arc('0n*Vega9+', '0n*Vega9++', (), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega9++', '0nnVega9', str(ii), [kFRENCH_TRANS[ii]])
    
            if ii == 0:
                f.add_arc('0n*Vega7+', '0nnVega7', '0', ())
                f.add_arc('0n*Vega9++', '0nnVega9', '0', ())
                
            elif ii == 7 or ii == 8 or ii == 9:
                f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii]])
        #from '0n*Vega' to '0nnVega' 2-6
        if ii == 2 or ii == 3 or ii ==4 or ii == 5 or ii == 6:
            f.add_arc('0n*Vega7', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]])
            f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]])
        if ii == 1:
            f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[10]])
            f.add_arc('n**','0n*', str(ii), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega7', '0n*Vega7+', str(ii), [kFRENCH_AND])
            f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]])
            f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]])
            
        #from '00*' to '00n'
        f.add_arc('00*', '00n', str(ii), [kFRENCH_TRANS[ii]])
        #from '*n*' to '*nn' 2-9
        if ii != 0 and ii !=9:
            f.add_arc('0n*','0nn', str(ii+1), [kFRENCH_TRANS[ii+1]])
            f.add_arc('0n*+','0nn', str(ii), [kFRENCH_TRANS[ii]])
        #from 'start' to 'nnn' 200,300,...,900
        if ii != 0 and ii !=1:
            f.add_arc('start','n**+', str(ii), [kFRENCH_TRANS[ii]])
            f.add_arc('n**+', 'n**', (), [kFRENCH_TRANS[100]])
        #from 'n**' to 'n0*' 0
        if ii == 0:
            f.add_arc('n**', 'n00', '00', ())
        if ii == 1:
            f.add_arc('start', 'n**', '1', [kFRENCH_TRANS[100]])

        
    #from '*n*' to '*et*' 1
    f.add_arc('0n*','*et*', '1', [kFRENCH_AND])
    #from '*et*' to '*nn' 1
    f.add_arc('*et*','0nn', (), [kFRENCH_TRANS[1]])
    #from '0**' to '*nn' 10-16 
    for ii in xrange(10,17):
        f.add_arc('0**','0nn', str(ii), [kFRENCH_TRANS[ii]])
        f.add_arc('n**','0nn', str(ii), [kFRENCH_TRANS[ii]])
    #from '0**' to '*nn' 20-60
    for ii in xrange(2,7):
        f.add_arc('0**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]])
        f.add_arc('n**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]])
        
        #from '0**', to *n*
        f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[ii*10]])
        #from 'n**' to '0n*'
        f.add_arc('n**', '0n*+', str(ii), [kFRENCH_TRANS[ii*10]])
    for ii in xrange(7,10):
        if ii == 7:
            f.add_arc('0**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]])
            f.add_arc('n**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]])
        elif ii == 8:
            f.add_arc('0**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('n**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('0n*Vega8', '0n*Vega8+', (), [kFRENCH_TRANS[20]])
        elif ii == 9:
            f.add_arc('0**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('n**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('0n*Vega9', '0n*Vega9+', (), [kFRENCH_TRANS[20]])
            
    f.add_arc('n**', '0n*+', '0', ())

    return f
Esempio n. 44
0
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'
    f.add_state('1stzero')
    f.add_state('tens')
    f.add_state('seventeen')
    f.add_state('final_seventeen')
    f.add_state('eighteen')
    f.add_state('final_eighteen')
    f.add_state('nineteen')
    f.add_state('final_nineteen')
    f.add_state('zero')
    f.add_state('ones')
    f.add_state('20-69')
    f.add_state('70-ten')
    f.add_state('80s')
    f.add_state('90s')
    f.add_state('100s')
    f.add_state('et')
    f.add_state('10-et')
    f.add_state('et-un')
    f.add_state('et-onze')

    f.set_final('zero')
    f.set_final('ones')
    f.set_final('tens')
    f.set_final('final_seventeen')
    f.set_final('final_eighteen')
    f.set_final('final_nineteen')
    f.set_final('20-69')
    f.set_final('70-ten')
    f.set_final('80s')
    f.set_final('90s')
    f.set_final('et-un')
    f.set_final('et-onze')


# 100 - 999
    f.add_arc('start', '1stzero', '1', [kFRENCH_TRANS[100]])
    for i in range(2, 10):
        f.add_arc('start', '100s', str(i), [kFRENCH_TRANS[i]])

    f.add_arc('100s', '1stzero', (), [kFRENCH_TRANS[100]])


# 0 - 9
    f.add_arc('start', '1stzero', '0', [])
    f.add_arc('1stzero', 'ones', '0', [])
    for ii in range(1, 10):
        f.add_arc('ones', 'ones', str(ii), [kFRENCH_TRANS[ii]])

    f.add_arc('ones', 'ones', '0', [])
    
    # for i in range(10):
    #     f.add_arc('ten-6', 'ten-6', str(i), kFRENCH_TRANS[(i+10])
# 10 - 16
    f.add_arc('1stzero', 'tens', '1', [])
    f.add_arc('tens', 'tens', '0', [kFRENCH_TRANS[10]])
    f.add_arc('tens', 'tens', '1', [kFRENCH_TRANS[11]])
    f.add_arc('tens', 'tens', '2', [kFRENCH_TRANS[12]])
    f.add_arc('tens', 'tens', '3', [kFRENCH_TRANS[13]])
    f.add_arc('tens', 'tens', '4', [kFRENCH_TRANS[14]])
    f.add_arc('tens', 'tens', '5', [kFRENCH_TRANS[15]])
    f.add_arc('tens', 'tens', '6', [kFRENCH_TRANS[16]])

    f.add_arc('tens', 'seventeen', '7', [kFRENCH_TRANS[10]])
    f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]])
    f.add_arc('tens', 'eighteen', '8', [kFRENCH_TRANS[10]])
    f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]])
    f.add_arc('tens', 'nineteen', '9', [kFRENCH_TRANS[10]])
    f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]])

# 20 - 69
    f.add_arc('1stzero', '20-69', '2', [kFRENCH_TRANS[20]])
    f.add_arc('1stzero', '20-69', '3', [kFRENCH_TRANS[30]])
    f.add_arc('1stzero', '20-69', '4', [kFRENCH_TRANS[40]])
    f.add_arc('1stzero', '20-69', '5', [kFRENCH_TRANS[50]])
    f.add_arc('1stzero', '20-69', '6', [kFRENCH_TRANS[60]])

    # special cases:
    for i in range(2, 10):
        f.add_arc('20-69', '20-69', str(i), [kFRENCH_TRANS[i]])

        # handles 20, 30 ... 60
    for i in range(20, 60, 10):
        f.add_arc('20-69', '20-69', '0', [])

        # handles 21, 31, ... 61
    f.add_arc('20-69', 'et', '1', [kFRENCH_AND])
    f.add_arc('et', 'et-un', (),[kFRENCH_TRANS[1]])

# 70 - 79
    f.add_arc('1stzero', '70-ten', '7', [kFRENCH_TRANS[60]])
    f.add_arc('70-ten', '70-ten', '0', [kFRENCH_TRANS[10]])
    # handle 71 here
    f.add_arc('70-ten', '10-et', '1', [kFRENCH_AND])
    f.add_arc('10-et', 'et-onze', (),[kFRENCH_TRANS[11]])
    f.add_arc('70-ten', '70-ten', '2', [kFRENCH_TRANS[12]])
    f.add_arc('70-ten', '70-ten', '3', [kFRENCH_TRANS[13]])
    f.add_arc('70-ten', '70-ten', '4', [kFRENCH_TRANS[14]])
    f.add_arc('70-ten', '70-ten', '5', [kFRENCH_TRANS[15]])
    f.add_arc('70-ten', '70-ten', '6', [kFRENCH_TRANS[16]])
    
    f.add_arc('70-ten', 'seventeen', '7', [kFRENCH_TRANS[10]])
    f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]])
    f.add_arc('70-ten', 'eighteen', '8', [kFRENCH_TRANS[10]])
    f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]])
    f.add_arc('70-ten', 'nineteen', '9', [kFRENCH_TRANS[10]])
    f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]])

# 80 - 89
    f.add_arc('1stzero', '80s', '8', [kFRENCH_TRANS[4]])
    f.add_arc('80s', 'ones', (), [kFRENCH_TRANS[20]])
    f.add_arc('80s', '80s', '0', [kFRENCH_TRANS[20]])

# 90 - 99
    f.add_arc('1stzero', '90s', '9', [kFRENCH_TRANS[4]])
    f.add_arc('90s', 'tens', (), [kFRENCH_TRANS[20]])

    return f
Esempio n. 45
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.add_state('one')
    f1.add_state('two')
    f1.add_state('three')
    f1.add_state('four')
    f1.add_state('five')
    f1.add_state('six')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')
    f1.set_final('one')
    f1.set_final('two')
    f1.set_final('three')
    f1.set_final('four')
    f1.set_final('five')
    f1.set_final('six')

    list_one = ['b', 'f', 'p', 'v']
    list_two = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']
    list_three = ['d', 't']
    list_four = ['l']
    list_five = ['m', 'n']
    list_six = ['r']
    vowels = ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y']

    # Add the rest of the arcs
     # changed string.ascii_lowercase to string.letters
    
    for letter in string.letters:
        f1.add_arc('start', 'next', (letter), (letter))

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('next', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('next', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('next', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('next', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('next', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('next', 'six', (letter), '6')
        else:
            f1.add_arc('next', 'next', (letter), ())

    for letter in string.letters:
        if letter in list_two:
            f1.add_arc('one', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('one', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('one', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('one', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('one', 'six', (letter), '6')
        else:
            f1.add_arc('one', 'one', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('two', 'one', (letter), '1')
        elif letter in list_three:
            f1.add_arc('two', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('two', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('two', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('two', 'six', (letter), '6')
        else:
            f1.add_arc('two', 'two', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('three', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('three', 'two', (letter), '2')
        elif letter in list_four:
            f1.add_arc('three', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('three', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('three', 'six', (letter), '6')
        else:
            f1.add_arc('three', 'three', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('four', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('four', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('four', 'three', (letter), '3')
        elif letter in list_five:
            f1.add_arc('four', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('four', 'six', (letter), '6')
        else:
            f1.add_arc('four', 'four', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('five', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('five', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('five', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('five', 'four', (letter), '4')
        elif letter in list_six:
            f1.add_arc('five', 'six', (letter), '6')
        else:
            f1.add_arc('five', 'five', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('six', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('six', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('six', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('six', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('six', 'five', (letter), '5')
        else:
            f1.add_arc('six', 'six', (letter), ())
    return f1
Esempio n. 46
0
# where ALL transducers use characters as input symbols
def compose(input, *fsts):
	output_list = [input]
	for fst in fsts:
		next_output_list = []
		for o in output_list:
			new_output = ''.join(o)
			next_output_list.extend(fst.transduce(new_output))
		output_list = next_output_list
	return output_list

if __name__ == '__main__':
	f1 = FST('test-generate')

	# Indicate that '1' is the initial state
	f1.add_state('start')
	f1.add_state('next')
	f1.initial_state = 'start'

	# Set all the final states
	f1.set_final('next')

	# Add the rest of the arcs
	for letter in ['A','B','C','D']:
		f1.add_arc('start', 'next', letter, '1')
		f1.add_arc('next', 'next', letter, '0')

	f2 = FST('test-generate')
	f2.add_state('start')
	f2.add_state('next')
	f2.initial_state = 'start'
Esempio n. 47
0
def french_count():
    f = FST('french')

    f.add_state('0')
    f.add_state('1')
    f.add_state('2')
    f.add_state('3')
    f.add_state('4')
    f.add_state('5')
    f.add_state('6')
    f.add_state('7')
    f.add_state('8')
    f.add_state('9')
    f.add_state('10')
    f.add_state('11')
    f.add_state('12')
    f.add_state('13')
    f.add_state('14')
    f.add_state('15')
    f.add_state('16')
    f.add_state('17')
    f.add_state('18')
    f.add_state('19')
    f.add_state('20')
    f.add_state('21')
    f.add_state('22')
    f.add_state('23')
    f.add_state('24')
    f.add_state('25')

    f.initial_state = '0'

    f.set_final('1')
    f.set_final('3')
    f.set_final('6')
    f.set_final('7')
    f.set_final('8')
    f.set_final('9')
    f.set_final('11')
    f.set_final('13')
    f.set_final('14')
    f.set_final('18')
    f.set_final('20')

    zero = [0]
    one = [1]
    two_to_six = [2,3,4,5,6]
    one_to_six = [1,2,3,4,5,6]
    seven = [7]
    seven_eight_nine = [7,8,9]
    eight = [8]
    nine = [9]
    singles_all = [1,2,3,4,5,6,7,8,9]
    singles = [2,3,4,5,6,7,8,9]
    tens = [20,30,40,50]

    # Edge from initial to final, if preceding zero in input
    for i in zero:
        # f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('0','0', str(i), ())
        f.add_arc('4','6', str(i), ())
        f.add_arc('5','8', str(i), ())
        f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]])
        f.add_arc('12','13', str(i), [kFRENCH_TRANS[20]])
        f.add_arc('16','18', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[10]])
        f.add_arc('17','19', str(i), ())
        f.add_arc('19','9', str(i), ())

    for i in one:
        f.add_arc('0','2', str(i), ())
        f.add_arc('17','2', str(i), ())
        f.add_arc('0','17', str(i), [kFRENCH_TRANS[100]])
        f.add_arc('0','5', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('17','5', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('4','7', str(i), [kFRENCH_AND, kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_AND, kFRENCH_TRANS[i+10]])
        f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i+10]])

    for i in one_to_six:
        f.add_arc('2','3', str(i), [kFRENCH_TRANS[i+10]])

    for i in two_to_six:
        f.add_arc('0','4', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('17','4', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[i+10]])

    for i in singles:
        f.add_arc('4','7', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('0','17', str(i), [kFRENCH_TRANS[i],kFRENCH_TRANS[100]])
        f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[i]])

    for i in singles_all:
        f.add_arc('0','1', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('19','1', str(i), [kFRENCH_TRANS[i]])

    for i in seven_eight_nine:
        f.add_arc('5','8', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[10], kFRENCH_TRANS[i]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[10], kFRENCH_TRANS[i]])

    for i in seven:
        f.add_arc('0','10',str(i), [kFRENCH_TRANS[60]])
        f.add_arc('17','10',str(i), [kFRENCH_TRANS[60]])

    for i in eight:
        f.add_arc('0','12',str(i), [kFRENCH_TRANS[4]])
        f.add_arc('17','12',str(i), [kFRENCH_TRANS[4]])

    for i in nine:
        f.add_arc('0','16',str(i), [kFRENCH_TRANS[4]])
        f.add_arc('17','16',str(i), [kFRENCH_TRANS[4]])

    return f