Ejemplo n.º 1
0
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'

    for ii in xrange(10):
        f.add_arc('start', 'start', str(ii), [kFRENCH_TRANS[ii]])

    f.set_final('start')

    return f
Ejemplo n.º 2
0
def french_count():
    f = FST("french")

    f.add_state("start")
    f.initial_state = "start"

    for ii in xrange(10):
        f.add_arc("start", "start", str(ii), [kFRENCH_TRANS[ii]])

    f.set_final("start")

    return f
Ejemplo n.º 3
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.add_state('2')
    f2.add_state('3')
    f2.add_state('4')
    f2.initial_state = '1'
    f2.set_final('1')
    f2.set_final('2')
    f2.set_final('3')
    f2.set_final('4')

    # Adds letters from input string of 'A###0000'
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    # Adds numbers from first FST of range 0-9
    for n in range(10):
        f2.add_arc('1', '2', str(n), (str(n)))
        f2.add_arc('2', '3', str(n), (str(n)))
        f2.add_arc('3', '4', str(n), (str(n)))
        f2.add_arc('4', '4', str(n), ())


    return f2
Ejemplo n.º 4
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    states = ['1', 'd1', 'd2', 'd3']

    for state in states:
        f2.add_state(state)

    f2.initial_state = '1'

    for state in ['d1', 'd2', 'd3']:
        f2.set_final(state)

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for index, state in enumerate(states):
        if index > 0:
            for n in range(10):
                f2.add_arc(states[index-1], states[index], str(n), str(n))

    for n in range(10):
        f2.add_arc('d3', 'd3', str(n), ())

    return f2
Ejemplo n.º 5
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('start', 'next', (letter), (letter))
        f1.add_arc('next', 'next', (letter), '0')
    return f1
Ejemplo n.º 6
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.initial_state = '1'
    f2.set_final('1')

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))

    for n in range(10):
        f2.add_arc('1', '1', str(n), str(n))

    return f2
Ejemplo n.º 7
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    states = ['1', '2', '3', '4']

    for state in states:
        f3.add_state(state)

    f3.initial_state = '1'
    f3.set_final('4')

    for letter in string.letters:
        f3.add_arc('1', '1', letter, letter)

    for number in range(1, 10):
        f3.add_arc('1', '2', str(number), str(number))
        f3.add_arc('2', '3', str(number), str(number))
        f3.add_arc('3', '4', str(number), str(number))

    f3.add_arc('2', '4', (), '00')
    f3.add_arc('3', '4', (), '0')
    return f3
Ejemplo n.º 8
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.add_state('one')
    f1.add_state('two')
    f1.add_state('three')
    f1.add_state('four')
    f1.add_state('five')
    f1.add_state('six')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')
    f1.set_final('one')
    f1.set_final('two')
    f1.set_final('three')
    f1.set_final('four')
    f1.set_final('five')
    f1.set_final('six')

    list_one = ['b', 'f', 'p', 'v']
    list_two = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']
    list_three = ['d', 't']
    list_four = ['l']
    list_five = ['m', 'n']
    list_six = ['r']
    vowels = ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y']

    # Add the rest of the arcs
     # changed string.ascii_lowercase to string.letters
    
    for letter in string.letters:
        f1.add_arc('start', 'next', (letter), (letter))

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('next', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('next', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('next', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('next', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('next', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('next', 'six', (letter), '6')
        else:
            f1.add_arc('next', 'next', (letter), ())

    for letter in string.letters:
        if letter in list_two:
            f1.add_arc('one', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('one', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('one', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('one', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('one', 'six', (letter), '6')
        else:
            f1.add_arc('one', 'one', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('two', 'one', (letter), '1')
        elif letter in list_three:
            f1.add_arc('two', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('two', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('two', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('two', 'six', (letter), '6')
        else:
            f1.add_arc('two', 'two', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('three', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('three', 'two', (letter), '2')
        elif letter in list_four:
            f1.add_arc('three', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('three', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('three', 'six', (letter), '6')
        else:
            f1.add_arc('three', 'three', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('four', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('four', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('four', 'three', (letter), '3')
        elif letter in list_five:
            f1.add_arc('four', 'five', (letter), '5')
        elif letter in list_six:
            f1.add_arc('four', 'six', (letter), '6')
        else:
            f1.add_arc('four', 'four', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('five', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('five', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('five', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('five', 'four', (letter), '4')
        elif letter in list_six:
            f1.add_arc('five', 'six', (letter), '6')
        else:
            f1.add_arc('five', 'five', (letter), ())

    for letter in string.letters:
        if letter in list_one:
            f1.add_arc('six', 'one', (letter), '1')
        elif letter in list_two:
            f1.add_arc('six', 'two', (letter), '2')
        elif letter in list_three:
            f1.add_arc('six', 'three', (letter), '3')
        elif letter in list_four:
            f1.add_arc('six', 'four', (letter), '4')
        elif letter in list_five:
            f1.add_arc('six', 'five', (letter), '5')
        else:
            f1.add_arc('six', 'six', (letter), ())
    return f1
Ejemplo n.º 9
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    #f1.add_state('next')
    f1.add_state('grp0')
    f1.add_state('grp1')
    f1.add_state('grp2')
    f1.add_state('grp3')
    f1.add_state('grp4')
    f1.add_state('grp5')
    f1.add_state('grp6')
    f1.initial_state = 'start'

    # Set all the final states
    #f1.set_final('next')
    f1.set_final('grp0')
    f1.set_final('grp1')
    f1.set_final('grp2')
    f1.set_final('grp3')
    f1.set_final('grp4')
    f1.set_final('grp5')
    f1.set_final('grp6')

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        #f1.add_arc('start', 'next', (letter), (letter)) # do we need to move this to end ????
        '''
        if letter in "aeiouhwy":
            f1.add_arc('next', 'next', (letter), ())
            f1.add_arc('grp1', 'grp1', (letter), ())
            f1.add_arc('grp2', 'grp2', (letter), ())
            f1.add_arc('grp3', 'grp3', (letter), ())
            f1.add_arc('grp4', 'grp4', (letter), ())
            f1.add_arc('grp5', 'grp5', (letter), ())
            f1.add_arc('grp6', 'grp6', (letter), ())
        if letter in "bfpv":
            f1.add_arc('next', 'grp1', (letter), ('1'))
            f1.add_arc('grp1', 'grp1', (letter), ())
            f1.add_arc('grp2', 'grp1', (letter), ('1'))
            f1.add_arc('grp3', 'grp1', (letter), ('1'))
            f1.add_arc('grp4', 'grp1', (letter), ('1'))
            f1.add_arc('grp5', 'grp1', (letter), ('1'))
            f1.add_arc('grp6', 'grp1', (letter), ('1'))
        if letter in "cgjkqsxz":
            f1.add_arc('next', 'grp2', (letter), ('2'))
            f1.add_arc('grp1', 'grp2', (letter), ('2'))
            f1.add_arc('grp2', 'grp2', (letter), ())
            f1.add_arc('grp3', 'grp2', (letter), ('2'))
            f1.add_arc('grp4', 'grp2', (letter), ('2'))
            f1.add_arc('grp5', 'grp2', (letter), ('2'))
            f1.add_arc('grp6', 'grp2', (letter), ('2'))
        if letter in "dt":
            f1.add_arc('next', 'grp3', (letter), ('3'))
            f1.add_arc('grp1', 'grp3', (letter), ('3'))
            f1.add_arc('grp2', 'grp3', (letter), ('3'))
            f1.add_arc('grp3', 'grp3', (letter), ())
            f1.add_arc('grp4', 'grp3', (letter), ('3'))
            f1.add_arc('grp5', 'grp3', (letter), ('3'))
            f1.add_arc('grp6', 'grp3', (letter), ('3'))
        if letter in "l":
            f1.add_arc('next', 'grp4', (letter), ('4'))
            f1.add_arc('grp1', 'grp4', (letter), ('4'))
            f1.add_arc('grp2', 'grp4', (letter), ('4'))
            f1.add_arc('grp3', 'grp4', (letter), ('4'))
            f1.add_arc('grp4', 'grp4', (letter), ())
            f1.add_arc('grp5', 'grp4', (letter), ('4'))
            f1.add_arc('grp6', 'grp4', (letter), ('4'))
        if letter in "mn":
            f1.add_arc('next', 'grp5', (letter), ('5'))
            f1.add_arc('grp1', 'grp5', (letter), ('5'))
            f1.add_arc('grp2', 'grp5', (letter), ('5'))
            f1.add_arc('grp3', 'grp5', (letter), ('5'))
            f1.add_arc('grp4', 'grp5', (letter), ('5'))
            f1.add_arc('grp5', 'grp5', (letter), ())
            f1.add_arc('grp6', 'grp5', (letter), ('5'))
        if letter in "r":
            f1.add_arc('next', 'grp6', (letter), ('6'))
            f1.add_arc('grp1', 'grp6', (letter), ('6'))
            f1.add_arc('grp2', 'grp6', (letter), ('6'))
            f1.add_arc('grp3', 'grp6', (letter), ('6'))
            f1.add_arc('grp4', 'grp6', (letter), ('6'))
            f1.add_arc('grp5', 'grp6', (letter), ('6'))
            f1.add_arc('grp6', 'grp6', (letter), ())
        f1.add_arc('start', 'next', (letter), (letter))
        '''
        if letter in "aeiouhwy":
            f1.add_arc('start', 'grp0', (letter), (letter))
            f1.add_arc('grp0', 'grp0', (letter), ())
            f1.add_arc('grp1', 'grp0', (letter), ())
            f1.add_arc('grp2', 'grp0', (letter), ())
            f1.add_arc('grp3', 'grp0', (letter), ())
            f1.add_arc('grp4', 'grp0', (letter), ())
            f1.add_arc('grp5', 'grp0', (letter), ())
            f1.add_arc('grp6', 'grp0', (letter), ())
        if letter in "bfpv":
            f1.add_arc('start', 'grp1', (letter), (letter))
            f1.add_arc('grp0', 'grp1', (letter), ('1'))
            f1.add_arc('grp1', 'grp1', (letter), ())
            f1.add_arc('grp2', 'grp1', (letter), ('1'))
            f1.add_arc('grp3', 'grp1', (letter), ('1'))
            f1.add_arc('grp4', 'grp1', (letter), ('1'))
            f1.add_arc('grp5', 'grp1', (letter), ('1'))
            f1.add_arc('grp6', 'grp1', (letter), ('1'))
        if letter in "cgjkqsxz":
            f1.add_arc('start', 'grp2', (letter), (letter))
            f1.add_arc('grp0', 'grp2', (letter), ('2'))
            f1.add_arc('grp1', 'grp2', (letter), ('2'))
            f1.add_arc('grp2', 'grp2', (letter), ())
            f1.add_arc('grp3', 'grp2', (letter), ('2'))
            f1.add_arc('grp4', 'grp2', (letter), ('2'))
            f1.add_arc('grp5', 'grp2', (letter), ('2'))
            f1.add_arc('grp6', 'grp2', (letter), ('2'))
        if letter in "dt":
            f1.add_arc('start', 'grp3', (letter), (letter))
            f1.add_arc('grp0', 'grp3', (letter), ('3'))
            f1.add_arc('grp1', 'grp3', (letter), ('3'))
            f1.add_arc('grp2', 'grp3', (letter), ('3'))
            f1.add_arc('grp3', 'grp3', (letter), ())
            f1.add_arc('grp4', 'grp3', (letter), ('3'))
            f1.add_arc('grp5', 'grp3', (letter), ('3'))
            f1.add_arc('grp6', 'grp3', (letter), ('3'))
        if letter in "l":
            f1.add_arc('start', 'grp4', (letter), (letter))
            f1.add_arc('grp0', 'grp4', (letter), ('4'))
            f1.add_arc('grp1', 'grp4', (letter), ('4'))
            f1.add_arc('grp2', 'grp4', (letter), ('4'))
            f1.add_arc('grp3', 'grp4', (letter), ('4'))
            f1.add_arc('grp4', 'grp4', (letter), ())
            f1.add_arc('grp5', 'grp4', (letter), ('4'))
            f1.add_arc('grp6', 'grp4', (letter), ('4'))
        if letter in "mn":
            f1.add_arc('start', 'grp5', (letter), (letter))
            f1.add_arc('grp0', 'grp5', (letter), ('5'))
            f1.add_arc('grp1', 'grp5', (letter), ('5'))
            f1.add_arc('grp2', 'grp5', (letter), ('5'))
            f1.add_arc('grp3', 'grp5', (letter), ('5'))
            f1.add_arc('grp4', 'grp5', (letter), ('5'))
            f1.add_arc('grp5', 'grp5', (letter), ())
            f1.add_arc('grp6', 'grp5', (letter), ('5'))
        if letter in "r":
            f1.add_arc('start', 'grp6', (letter), (letter))
            f1.add_arc('grp0', 'grp6', (letter), ('6'))
            f1.add_arc('grp1', 'grp6', (letter), ('6'))
            f1.add_arc('grp2', 'grp6', (letter), ('6'))
            f1.add_arc('grp3', 'grp6', (letter), ('6'))
            f1.add_arc('grp4', 'grp6', (letter), ('6'))
            f1.add_arc('grp5', 'grp6', (letter), ('6'))
            f1.add_arc('grp6', 'grp6', (letter), ())

    return f1
    def parse(self, word):
        """Parse a word morphologically 

        e.g.
        p = Parser()
        word = ['p','a','n','i','c','k','i','n','g']
        p.parse(word)
        ---> 'panic+present participle form'
        """

        # Ok so now let's do the second FST
        f2 = FST('morphology-parse')  # Detecting affixes ('ing' and 'ed')

        # Indicate initial and final states
        states = ['start', 'i', 'n', 'g', 'e', 'd']
        for state in states:
            f2.add_state(state)
        f2.initial_state = 'start'
        f2.set_final('d')
        f2.set_final('g')

        for letter in string.ascii_lowercase:
            if not letter in 'ie':
                f2.add_arc('start', 'start', letter, letter)
            if not letter == 'n':
                f2.add_arc('i', 'start', letter, ('i', letter))
            if not letter == 'd':
                f2.add_arc('e', 'start', letter, ['e', letter])
            if not letter == 'g':
                f2.add_arc('n', 'start', letter, ['i', 'n', letter])

        f2.add_arc('start', 'i', 'i', '')
        f2.add_arc('i', 'n', 'n', '')
        f2.add_arc('n', 'g', 'g', '+present participle form')
        f2.add_arc('start', 'e', 'e', '')
        f2.add_arc('e', 'd', 'd', '+past form')

        f3 = FST('morphology-parse')  # K-deletion
        # Indicate initial and final states
        f3.add_state('start')
        f3.add_state('vowel')
        f3.add_state('consonant')
        f3.add_state('c')
        f3.add_state('k')
        f3.add_state('lick_l')
        f3.add_state('lick_i')
        f3.add_state('lick_c')
        f3.add_state('lick_k')
        f3.add_state('parse')
        f3.initial_state = 'start'
        f3.set_final('parse')

        vowels = 'aeiou'
        for vowel in vowels:
            f3.add_arc('start', 'vowel', vowel, vowel)
            f3.add_arc('vowel', 'vowel', vowel, vowel)
            f3.add_arc('consonant', 'vowel', vowel, vowel)
            f3.add_arc('c', 'vowel', vowel, vowel)

        for letter in string.ascii_lowercase:
            f3.add_arc('parse', 'parse', letter, letter)

            if letter in vowels:
                continue

            if not letter == 'c':
                f3.add_arc('vowel', 'consonant', letter, letter)

            if not letter == 'l':
                f3.add_arc('start', 'consonant', letter, letter)

            if not letter == 'k':
                f3.add_arc('c', 'consonant', letter, letter)
            f3.add_arc('consonant', 'consonant', letter, letter)

        f3.add_arc('vowel', 'c', 'c', 'c')
        f3.add_arc('c', 'k', 'k', '')
        f3.add_arc('start', 'lick_l', 'l', 'l')
        f3.add_arc('lick_l', 'lick_i', 'i', 'i')
        f3.add_arc('lick_i', 'lick_c', 'c', 'c')
        f3.add_arc('lick_c', 'lick_k', 'k', 'k')
        f3.add_arc('lick_k', 'parse', '+', '+')
        f3.add_arc('k', 'parse', '+', '+')
        f3.add_arc('consonant', 'parse', '+', '+')
        f3.add_arc('parse', 'parse', ' ', ' ')

        output = compose(word, f2, f3)[0]

        return "".join(output)
Ejemplo n.º 11
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    states = ['q1', 'q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']
    for state in states:
        f1.add_state(state)

    f1.initial_state = 'q1'

    # Set all the final states
    for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
        f1.set_final(state)

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('q1', 'q2', (letter), (letter))
        if letter in set('aehiouwy'):
            for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
                f1.add_arc(state, state, (letter), ())
        else:
            if letter in set('bfpv'):
                for state in ['q2', 'n2', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n1', (letter), ('1'))
                f1.add_arc('n1', 'n1', (letter), ())
            elif letter in set('cgjkqsxz'):
                for state in ['q2', 'n1', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n2', (letter), ('2'))
                f1.add_arc('n2', 'n2', (letter), ())
            elif letter in set('dt'):
                for state in ['q2', 'n1', 'n2', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n3', (letter), ('3'))
                f1.add_arc('n3', 'n3', (letter), ())
            elif letter in set('l'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n5', 'n6']:
                    f1.add_arc(state, 'n4', (letter), ('4'))
                f1.add_arc('n4', 'n4', (letter), ())
            elif letter in set('mn'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n6']:
                    f1.add_arc(state, 'n5', (letter), ('5'))
                f1.add_arc('n5', 'n5', (letter), ())
            elif letter in set('r'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5']:
                    f1.add_arc(state, 'n6', (letter), ('6'))
                f1.add_arc('n6', 'n6', (letter), ())
    return f1
Ejemplo n.º 12
0
def truncate_to_three_digits():
    """
    Create an FST that will truncate a soundex string to three digits
    """

    # Ok so now let's do the second FST, the one that will truncate
    # the number of digits to 3
    f2 = FST('soundex-truncate')

    # Indicate initial and final states
    f2.add_state('1')
    f2.add_state('2')
    f2.add_state('3')
    f2.add_state('4')
    f2.initial_state = '1'
    f2.set_final('4')

    # Add the arcs
    for letter in string.letters:
        f2.add_arc('1', '1', (letter), (letter))
    f2.add_arc('1', '4', (), ())
    for n in range(10):
        f2.add_arc('1', '2', (str(n)), (str(n)))
    f2.add_arc('2', '4', (), ())
    for n in range(10):
        f2.add_arc('2', '3', (str(n)), (str(n)))
    f2.add_arc('3', '4', (), ())
    for n in range(10):
        f2.add_arc('3', '4', (str(n)), (str(n)))
    for n in range(10):
        f2.add_arc('4', '4', (str(n)), ())
    return f2
Ejemplo n.º 13
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """
    # Let's define our first FST
    f1 = FST('soundex-generate')
    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('1')
    f1.add_state('2')
    f1.add_state('3')
    f1.add_state('4')
    f1.add_state('5')
    f1.add_state('6')
    f1.add_state('7')
    f1.add_state('next')
    f1.initial_state = 'start'
    # Set all the final states
    f1.set_final('7')
    #setting the rules
    non_in = [
        'a', 'e', 'i', 'o', 'u', 'h', 'w', 'y', 'A', 'E', 'I', 'O', 'U', 'H',
        'W', 'Y'
    ]
    rep1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V']
    rep2 = [
        'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S',
        'X', 'Z'
    ]
    rep3 = ['d', 't', 'D', 'T']
    rep4 = ['l', 'L']
    rep5 = ['m', 'n', 'M', 'N']
    rep6 = ['r', 'R']

    # Add the rest of the arcs
    for letter in string.ascii_letters:
        if letter in non_in:
            f1.add_arc('start', 'next', (letter), (letter))
        if letter in rep1:
            f1.add_arc('start', '1', (letter), (letter))
        if letter in rep2:
            f1.add_arc('start', '2', (letter), (letter))
        if letter in rep3:
            f1.add_arc('start', '3', (letter), (letter))
        if letter in rep4:
            f1.add_arc('start', '4', (letter), (letter))
        if letter in rep5:
            f1.add_arc('start', '5', (letter), (letter))
        if letter in rep6:
            f1.add_arc('start', '6', (letter), (letter))

    for letter in string.ascii_letters:
        if letter in non_in:
            f1.add_arc('next', 'next', (letter), ())
        if letter in rep1:
            f1.add_arc('next', '1', (letter), ('1'))
        if letter in rep2:
            f1.add_arc('next', '2', (letter), ('2'))
        if letter in rep3:
            f1.add_arc('next', '3', (letter), ('3'))
        if letter in rep4:
            f1.add_arc('next', '4', (letter), ('4'))
        if letter in rep5:
            f1.add_arc('next', '5', (letter), ('5'))
        if letter in rep6:
            f1.add_arc('next', '6', (letter), ('6'))

    f1.add_arc('next', '7', (), ())

    for letter in string.ascii_letters:
        if letter in non_in:
            f1.add_arc('1', 'next', (letter), ())
        if letter in rep1:
            f1.add_arc('1', '1', (letter), ())
        if letter in rep2:
            f1.add_arc('1', '2', (letter), ('2'))
        if letter in rep3:
            f1.add_arc('1', '3', (letter), ('3'))
        if letter in rep4:
            f1.add_arc('1', '4', (letter), ('4'))
        if letter in rep5:
            f1.add_arc('1', '5', (letter), ('5'))
        if letter in rep6:
            f1.add_arc('1', '6', (letter), ('6'))
    f1.add_arc('1', '7', (), ())

    for letter in string.ascii_letters:
        if letter in non_in:
            f1.add_arc('2', 'next', (letter), ())
        if letter in rep1:
            f1.add_arc('2', '1', (letter), ('1'))
        if letter in rep2:
            f1.add_arc('2', '2', (letter), ())
        if letter in rep3:
            f1.add_arc('2', '3', (letter), ('3'))
        if letter in rep4:
            f1.add_arc('2', '4', (letter), ('4'))
        if letter in rep5:
            f1.add_arc('2', '5', (letter), ('5'))
        if letter in rep6:
            f1.add_arc('2', '6', (letter), ('6'))

    f1.add_arc('2', '7', (), ())

    for letter in string.ascii_letters:
        if letter in non_in:
            f1.add_arc('3', 'next', (letter), ())
        if letter in rep1:
            f1.add_arc('3', '1', (letter), ('1'))
        if letter in rep2:
            f1.add_arc('3', '2', (letter), ('2'))
        if letter in rep3:
            f1.add_arc('3', '3', (letter), ())
        if letter in rep4:
            f1.add_arc('3', '4', (letter), ('4'))
        if letter in rep5:
            f1.add_arc('3', '5', (letter), ('5'))
        if letter in rep6:
            f1.add_arc('3', '6', (letter), ('6'))
    f1.add_arc('3', '7', (), ())

    for letter in string.ascii_letters:
        if letter in non_in:
            f1.add_arc('4', 'next', (letter), ())
        if letter in rep1:
            f1.add_arc('4', '1', (letter), ('1'))
        if letter in rep2:
            f1.add_arc('4', '2', (letter), ('2'))
        if letter in rep3:
            f1.add_arc('4', '3', (letter), (''))
        if letter in rep4:
            f1.add_arc('4', '4', (letter), ())
        if letter in rep5:
            f1.add_arc('4', '5', (letter), ('5'))
        if letter in rep6:
            f1.add_arc('4', '6', (letter), ('6'))
    f1.add_arc('4', '7', (), ())

    for letter in string.ascii_letters:
        if letter in non_in:
            f1.add_arc('5', 'next', (letter), ())
        if letter in rep1:
            f1.add_arc('5', '1', (letter), ('1'))
        if letter in rep2:
            f1.add_arc('5', '2', (letter), ('2'))
        if letter in rep3:
            f1.add_arc('5', '3', (letter), (''))
        if letter in rep4:
            f1.add_arc('5', '4', (letter), ('4'))
        if letter in rep5:
            f1.add_arc('5', '5', (letter), ())
        if letter in rep6:
            f1.add_arc('5', '6', (letter), ('6'))
    f1.add_arc('5', '7', (), ())

    for letter in string.ascii_letters:
        if letter in non_in:
            f1.add_arc('6', 'next', (letter), ())
        if letter in rep1:
            f1.add_arc('6', '1', (letter), ('1'))
        if letter in rep2:
            f1.add_arc('6', '2', (letter), ('2'))
        if letter in rep3:
            f1.add_arc('6', '3', (letter), (''))
        if letter in rep4:
            f1.add_arc('6', '4', (letter), ('4'))
        if letter in rep5:
            f1.add_arc('6', '5', (letter), ('5'))
        if letter in rep6:
            f1.add_arc('6', '6', (letter), ())
    f1.add_arc('6', '7', (), ())

    return f1
def french_count():
    f = FST('french')

    # Indicate initial and final states
    f.add_state('start')
    f.initial_state = 'start'
    f.add_state('znod1')
    f.add_state('znod2')
    f.add_state('0')
    f.add_state('units')
    f.add_state('tens')
    f.add_state('hundreds')
    f.add_state('unod1')
    f.add_state('unod2')
    f.add_state('1s')
    f.add_state('1e')
    f.add_state('2-9s')
    f.add_state('tnod1')
    f.add_state('11s')
    f.add_state('11e')
    f.add_state('10-60s')
    f.add_state('10-60e')
    f.add_state('11-16s')
    f.add_state('12-16e')
    f.add_state('7090s')
    f.add_state('80s')
    f.add_state('100-900s')
    f.add_state('100-900t')
    f.add_state('100-900e')

    # add arcs to first level
    f.add_arc('start', 'znod1', ('0'), ())
    f.add_arc('znod1', 'znod2', ('0'), ())
    f.add_arc('znod2', '0', ('0'), [kFRENCH_TRANS[0]])
    f.set_final('0')
    f.add_arc('start', 'units', (), ())
    f.add_arc('start', 'tens', (), ())
    f.add_arc('start', 'hundreds', (), ())

    # covers 1
    f.add_arc('units', 'unod1', ('0'), ())
    f.add_arc('unod1', 'unod2', ('0'), ())
    f.add_arc('unod2', '1s', (), ())
    f.add_arc('1s', '1e', ('1'), [kFRENCH_TRANS[1]])
    f.set_final('1e')

    # Covers 2-9
    for i in range(2, 10):
        f.add_arc('unod2', '2-9s', (str(i)), [kFRENCH_TRANS[i]])
    f.set_final('2-9s')

    # covers 10-60
    f.add_arc('tens', 'tnod1', ('0'), ())
    for i in range(1, 7):
        f.add_arc('tnod1', '10-60s', (str(i)), [kFRENCH_TRANS[i * 10]])
    f.add_arc('10-60s', '10-60e', ('0'), ())
    f.add_arc('10-60s', 'unod2', (), ())
    f.add_arc('10-60s', '1s', (), [kFRENCH_AND])
    f.set_final('10-60e')

    # covers 11-16
    f.add_arc('tnod1', '11-16s', ('1'), ())
    #f.add_arc('11-16s', 'units', (), ())

    # covers 11
    f.add_arc('11-16s', '11s', (), ())
    f.add_arc('11s', '11e', ('1'), [kFRENCH_TRANS[11]])
    f.set_final('11e')

    # covers 12 - 19
    for i in range(2, 7):
        f.add_arc('11-16s', '12-16e', (str(i)), [kFRENCH_TRANS[i + 10]])
    f.set_final('12-16e')

    # covers 70-90
    f.add_arc('tnod1', '7090s', ('7'), [kFRENCH_TRANS[60]])
    f.add_arc('tnod1', '7090s', ('9'),
              [kFRENCH_TRANS[4] + " " + kFRENCH_TRANS[20]])
    f.add_arc('7090s', 'unod2', (), [kFRENCH_TRANS[10]])
    f.add_arc('7090s', '11-16s', (), ())
    f.add_arc('7090s', '10-60e', ('0'), [kFRENCH_TRANS[10]])
    f.add_arc('tnod1', '80s', ('8'),
              [kFRENCH_TRANS[4] + " " + kFRENCH_TRANS[20]])
    f.add_arc('80s', 'unod2', (), ())
    f.add_arc('80s', '10-60e', ('0'), ())

    # Covers 100 - 900

    f.add_arc('hundreds', '100-900s', ('1'), [kFRENCH_TRANS[100]])
    for i in range(2, 10):
        f.add_arc('hundreds', '100-900s', (str(i)),
                  [kFRENCH_TRANS[i] + " " + kFRENCH_TRANS[100]])
    f.add_arc('100-900s', 'tnod1', (), ())
    f.add_arc('100-900s', '100-900t', ('0'), ())
    f.add_arc('100-900t', 'unod2', (), ())
    f.add_arc('100-900t', '100-900e', ('0'), ())
    f.set_final('100-900e')

    return f
def letters_to_numbers():
	"""
	Returns an FST that converts letters to numbers as specified by
	the soundex algorithm
	"""
	remove = ['a', 'e', 'h', 'i', 'o', 'u', 'w', 'y']
	group1 = ['b', 'f', 'p', 'v']
	group2 = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']
	group3 = ['d', 't']
	group4 = ['l']
	group5 = ['m', 'n']
	group6 = ['r']

	# Let's define our first FST
	f1 = FST('soundex-generate')

	# Indicate that 'start' is the initial state
	f1.add_state('start')
	f1.add_state('1')
	f1.add_state('2')
	f1.add_state('3')
	f1.add_state('4')
	f1.add_state('5')
	f1.add_state('6')
	f1.add_state('7')
	f1.add_state('8')
	f1.add_state('9')
	f1.add_state('10')
	f1.add_state('11')
	f1.add_state('12')
	f1.add_state('13')
	f1.add_state('14')
	f1.add_state('15')
	f1.add_state('16')
	f1.add_state('17')
	f1.add_state('18')
	f1.add_state('19')
	f1.add_state('20')

	f1.initial_state = 'start'

	# Set all the final states
	f1.set_final('16')
	f1.set_final('17')
	f1.set_final('18')
	f1.set_final('19')
	f1.set_final('20')

	for letter in string.ascii_letters:
		f1.add_arc('start', '1', (letter), (letter))
		if letter in remove:
			f1.add_arc('1', '1', (letter), ())
			f1.add_arc('2', '8', (letter), ())
			f1.add_arc('3', '8', (letter), ())
			f1.add_arc('4', '8', (letter), ())
			f1.add_arc('5', '8', (letter), ())
			f1.add_arc('6', '8', (letter), ())
			f1.add_arc('7', '8', (letter), ())
			f1.add_arc('8', '8', (letter), ())
			f1.add_arc('9', '15', (letter), ())
			f1.add_arc('10', '15', (letter), ())
			f1.add_arc('11', '15', (letter), ())
			f1.add_arc('12', '15', (letter), ())
			f1.add_arc('13', '15', (letter), ())
			f1.add_arc('14', '15', (letter), ())
			f1.add_arc('15', '15', (letter), ())
			f1.add_arc('16', '16', (letter), ())

		elif letter in group1:
			f1.add_arc('1', '2', (letter), ('1'))
			f1.add_arc('2', '2', (letter), ())
			f1.add_arc('3', '9', (letter), ('1'))
			f1.add_arc('4', '9', (letter), ('1'))
			f1.add_arc('5', '9', (letter), ('1'))
			f1.add_arc('6', '9', (letter), ('1'))
			f1.add_arc('7', '9', (letter), ('1'))
			f1.add_arc('8', '9', (letter), ('1'))
			f1.add_arc('9', '9', (letter), ())
			f1.add_arc('10', '16', (letter), ('1'))
			f1.add_arc('11', '16', (letter), ('1'))
			f1.add_arc('12', '16', (letter), ('1'))
			f1.add_arc('13', '16', (letter), ('1'))
			f1.add_arc('14', '16', (letter), ('1'))
			f1.add_arc('15', '16', (letter), ('1'))
			f1.add_arc('16', '16', (letter), ('1'))

		elif letter in group2:
			f1.add_arc('1', '3', (letter), ('2'))
			f1.add_arc('3', '3', (letter), ())
			f1.add_arc('2', '10', (letter), ('2'))
			f1.add_arc('4', '10', (letter), ('2'))
			f1.add_arc('5', '10', (letter), ('2'))
			f1.add_arc('6', '10', (letter), ('2'))
			f1.add_arc('7', '10', (letter), ('2'))
			f1.add_arc('8', '10', (letter), ('2'))
			f1.add_arc('10', '10', (letter), ())
			f1.add_arc('9', '16', (letter), ('2'))
			f1.add_arc('11', '16', (letter), ('2'))
			f1.add_arc('12', '16', (letter), ('2'))
			f1.add_arc('13', '16', (letter), ('2'))
			f1.add_arc('14', '16', (letter), ('2'))
			f1.add_arc('15', '16', (letter), ('2'))
			f1.add_arc('16', '16', (letter), ('2'))

		elif letter in group3:
			f1.add_arc('1', '4', (letter), ('3'))
			f1.add_arc('4', '4', (letter), ())
			f1.add_arc('2', '11', (letter), ('3'))
			f1.add_arc('3', '11', (letter), ('3'))
			f1.add_arc('5', '11', (letter), ('3'))
			f1.add_arc('6', '11', (letter), ('3'))
			f1.add_arc('7', '11', (letter), ('3'))
			f1.add_arc('8', '11', (letter), ('3'))
			f1.add_arc('11', '11', (letter), ())
			f1.add_arc('9', '16', (letter), ('3'))
			f1.add_arc('10', '16', (letter), ('3'))
			f1.add_arc('12', '16', (letter), ('3'))
			f1.add_arc('13', '16', (letter), ('3'))
			f1.add_arc('14', '16', (letter), ('3'))
			f1.add_arc('15', '16', (letter), ('3'))
			f1.add_arc('16', '16', (letter), ('3'))

		elif letter in group4:
			f1.add_arc('1', '5', (letter), ('4'))
			f1.add_arc('5', '5', (letter), ())
			f1.add_arc('2', '12', (letter), ('4'))
			f1.add_arc('3', '12', (letter), ('4'))
			f1.add_arc('4', '12', (letter), ('4'))
			f1.add_arc('6', '12', (letter), ('4'))
			f1.add_arc('7', '12', (letter), ('4'))
			f1.add_arc('8', '12', (letter), ('4'))
			f1.add_arc('12', '12', (letter), ())
			f1.add_arc('9', '16', (letter), ('4'))
			f1.add_arc('10', '16', (letter), ('4'))
			f1.add_arc('11', '16', (letter), ('4'))
			f1.add_arc('13', '16', (letter), ('4'))
			f1.add_arc('14', '16', (letter), ('4'))
			f1.add_arc('15', '16', (letter), ('4'))
			f1.add_arc('16', '16', (letter), ('4'))

		elif letter in group5:
			f1.add_arc('1', '6', (letter), ('5'))
			f1.add_arc('6', '6', (letter), ())
			f1.add_arc('2', '13', (letter), ('5'))
			f1.add_arc('3', '13', (letter), ('5'))
			f1.add_arc('4', '13', (letter), ('5'))
			f1.add_arc('5', '13', (letter), ('5'))
			f1.add_arc('7', '13', (letter), ('5'))
			f1.add_arc('8', '13', (letter), ('5'))
			f1.add_arc('13', '13', (letter), ())
			f1.add_arc('9', '16', (letter), ('5'))
			f1.add_arc('10', '16', (letter), ('5'))
			f1.add_arc('11', '16', (letter), ('5'))
			f1.add_arc('12', '16', (letter), ('5'))
			f1.add_arc('14', '16', (letter), ('5'))
			f1.add_arc('15', '16', (letter), ('5'))
			f1.add_arc('16', '16', (letter), ('5'))

		elif letter in group6:
			f1.add_arc('1', '7', (letter), ('6'))
			f1.add_arc('7', '7', (letter), ())
			f1.add_arc('2', '14', (letter), ('6'))
			f1.add_arc('3', '14', (letter), ('6'))
			f1.add_arc('4', '14', (letter), ('6'))
			f1.add_arc('5', '14', (letter), ('6'))
			f1.add_arc('6', '14', (letter), ('6'))
			f1.add_arc('8', '14', (letter), ('6'))
			f1.add_arc('14', '14', (letter), ())
			f1.add_arc('9', '16', (letter), ('6'))
			f1.add_arc('10', '16', (letter), ('6'))
			f1.add_arc('11', '16', (letter), ('6'))
			f1.add_arc('12', '16', (letter), ('6'))
			f1.add_arc('13', '16', (letter), ('6'))
			f1.add_arc('15', '16', (letter), ('6'))
			f1.add_arc('16', '16', (letter), ('6'))
		
		else:
			f1.add_arc('1', '17', (), ())
			f1.add_arc('2', '18', (), ())
			f1.add_arc('3', '18', (), ())
			f1.add_arc('4', '18', (), ())
			f1.add_arc('5', '18', (), ())
			f1.add_arc('6', '18', (), ())
			f1.add_arc('7', '18', (), ())
			f1.add_arc('8', '18', (), ())
			f1.add_arc('9', '19', (), ())
			f1.add_arc('10', '19', (), ())
			f1.add_arc('11', '19', (), ())
			f1.add_arc('12', '19', (), ())
			f1.add_arc('13', '19', (), ())
			f1.add_arc('14', '19', (), ())
			f1.add_arc('15', '19', (), ())
			f1.add_arc('16', '20', (), ())
	
	return f1
Ejemplo n.º 16
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.initial_state = 'start'
    f1.add_state('0')
    f1.set_final('0')
    for letter in string.ascii_letters:
        f1.add_arc('start', '0', (letter), (letter))  # for 1st letter in IP

    removal_letters_set = {'a', 'e', 'i', 'o', 'u', 'h', 'w', 'y'}
    for removeChar in list(removal_letters_set):
        f1.add_arc('0', '0', (removeChar), ())

    soundex_letter_lkp = [(['b', 'f', 'p', 'v'], '1'),
                          (['c', 'g', 'j', 'k', 'q', 's', 'x', 'z'], '2'),
                          (['d', 't'], '3'), (['l'], '4'), (['m', 'n'], '5'),
                          (['r'], '6')]
    soundex_chars_set = set()
    for charList, state in soundex_letter_lkp:
        soundex_chars_set = soundex_chars_set.union(set(charList))
        f1.add_state(state)
        f1.set_final(state)

    #build automata
    for charList, state in soundex_letter_lkp:

        for char in charList:
            f1.add_arc('0', state, (char), (state))  #1st jump
            f1.add_arc(state, state, (char), ())  # self loop

        for char in list(removal_letters_set):  # for vowelsset
            f1.add_arc(state, '0', (char), ())

        for char in list(
                soundex_chars_set.difference(set(charList))
        ):  # any other char from different group will cause return to 0 state, with OP
            for returnCharList, returnState in soundex_letter_lkp:
                if char in returnCharList:
                    f1.add_arc(state, returnState, (char), (returnState))

    return f1
Ejemplo n.º 17
0
def french_count():
    f = FST('french')

    list26 = [2, 3, 4, 5, 6]
    list16 = [1, 2, 3, 4, 5, 6]
    list79 = [7, 8, 9]
    list19 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    list29 = [2, 3, 4, 5, 6, 7, 8, 9]

    f.add_state('1')
    f.add_state('2')
    f.add_state('3')
    f.add_state('4')
    f.add_state('5')
    f.add_state('6')
    f.add_state('7')
    f.add_state('8')
    f.add_state('9')
    f.add_state('10')
    f.add_state('11')
    f.add_state('12')
    f.add_state('13')
    f.add_state('14')
    f.add_state('15')
    f.add_state('16')
    f.add_state('17')
    f.add_state('18')
    f.add_state('19')
    f.add_state('20')
    f.add_state('21')

    f.initial_state = '1'

    f.set_final('2')
    f.set_final('4')
    f.set_final('7')
    f.set_final('8')
    f.set_final('9')
    f.set_final('10')
    f.set_final('12')
    f.set_final('14')
    f.set_final('15')
    f.set_final('19')
    f.set_final('21')

    element_z = [0]
    element_o = [1]

    for i in element_o:
        i = str(i)
        f.add_arc('1', '6', i, [kFRENCH_TRANS[int(i) * 10]])
        f.add_arc('18', '6', i, [kFRENCH_TRANS[int(i) * 10]])
        f.add_arc('1', '3', i, ())
        f.add_arc('18', '3', i, ())
        f.add_arc('11', '12', i, [kFRENCH_AND, kFRENCH_TRANS[int(i) + 10]])
        f.add_arc('13', '15', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[int(i)]])
        f.add_arc('17', '21', i,
                  [kFRENCH_TRANS[20], kFRENCH_TRANS[int(i) + 10]])
        f.add_arc('1', '18', i, [kFRENCH_TRANS[100]])
        f.add_arc('5', '8', i, [kFRENCH_AND, kFRENCH_TRANS[int(i)]])

    for i in element_z:
        i = str(i)
        f.add_arc('1', '10', i, [kFRENCH_TRANS[int(i)]])
        f.add_arc('11', '12', i, [kFRENCH_TRANS[int(i) + 10]])
        f.add_arc('18', '20', i, ())
        f.add_arc('6', '9', i, ())
        f.add_arc('20', '10', i, ())
        f.add_arc('13', '14', i, [kFRENCH_TRANS[20]])
        f.add_arc('17', '19', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[10]])
        f.add_arc('1', '1', i, ())
        f.add_arc('5', '7', i, ())

    for i in list16:
        i = str(i)
        f.add_arc('3', '4', i, [kFRENCH_TRANS[int(i) + 10]])

    for i in list29:
        i = str(i)
        f.add_arc('13', '15', i, [kFRENCH_TRANS[20], kFRENCH_TRANS[int(i)]])
        f.add_arc('5', '8', i, [kFRENCH_TRANS[int(i)]])
        f.add_arc('1', '18', i, [kFRENCH_TRANS[int(i)], kFRENCH_TRANS[100]])

    for i in list26:
        i = str(i)
        f.add_arc('11', '12', i, [kFRENCH_TRANS[int(i) + 10]])
        f.add_arc('17', '21', i,
                  [kFRENCH_TRANS[20], kFRENCH_TRANS[int(i) + 10]])
        f.add_arc('18', '5', i, [kFRENCH_TRANS[int(i) * 10]])
        f.add_arc('1', '5', i, [kFRENCH_TRANS[int(i) * 10]])

    for i in list19:
        i = str(i)
        f.add_arc('20', '2', i, [kFRENCH_TRANS[int(i)]])
        f.add_arc('1', '2', i, [kFRENCH_TRANS[int(i)]])

    for i in list79:
        i = str(i)
        f.add_arc('11', '12', i, [kFRENCH_TRANS[10], kFRENCH_TRANS[int(i)]])
        f.add_arc('6', '9', i, [kFRENCH_TRANS[int(i)]])
        f.add_arc(
            '17', '21', i,
            [kFRENCH_TRANS[20], kFRENCH_TRANS[10], kFRENCH_TRANS[int(i)]])

    list7 = [7]
    list8 = [8]
    list9 = [9]
    y = int(60)
    x = int(4)

    for i in list7:
        i = str(i)
        f.add_arc('18', '11', i, [kFRENCH_TRANS[y]])
        f.add_arc('1', '11', i, [kFRENCH_TRANS[y]])

    for i in list8:
        i = str(i)
        f.add_arc('1', '13', i, [kFRENCH_TRANS[x]])
        f.add_arc('18', '13', i, [kFRENCH_TRANS[x]])

    for i in list9:
        i = str(i)
        f.add_arc('1', '17', i, [kFRENCH_TRANS[x]])
        f.add_arc('18', '17', i, [kFRENCH_TRANS[x]])

    return f
Ejemplo n.º 18
0
def add_zero_padding():
    #===========================================================================
    # #Now, the third fst - the zero-padding fst
    # f3 = FST('soundex-padzero')
    #
    # # Indicate initial and final states
    # f3.add_state('1')
    # f3.initial_state = '1'
    #
    # f3.add_state('2')
    # f3.add_state('3')
    # f3.add_state('4')
    # f3.add_state('5')
    #
    # #f3.set_final('2')
    # #f3.set_final('3')
    # #f3.set_final('4')
    # f3.set_final('5')
    #
    # # Add the arcs
    # possible_chars = string.digits+ string.letters
    # for letter in possible_chars:
    #     f3.add_arc('1', '2', (letter), (letter))
    #
    #     f3.add_arc('2', '3', (letter), (letter))
    #
    #
    #     f3.add_arc('3', '4', (letter), (letter))
    #
    #
    #     f3.add_arc('4', '5', (letter), (letter))
    #
    #
    #     f3.add_arc('5', '5', (letter), (letter))
    #
    # #===========================================================================
    # #f3.add_arc('1', '2', (), ('0'))
    # f3.add_arc('2', '3', (), ('0'))
    # f3.add_arc('3', '4', (), ('0'))
    # f3.add_arc('4', '5', (), ('0'))
    # #===========================================================================
    # return f3
    #===========================================================================

    f3 = FST('soundex-padzero')
    f3.add_state('1')
    f3.add_state('1a')
    f3.add_state('1b')
    f3.add_state('2')

    f3.initial_state = '1'
    f3.set_final('2')

    for letter in string.letters:
        f3.add_arc('1', '1', (letter), (letter))
    for number in xrange(10):
        f3.add_arc('1', '1', (str(number)), (str(number)))

    f3.add_arc('1', '1a', (), ('0'))
    f3.add_arc('1a', '1b', (), ('0'))
    f3.add_arc('1b', '2', (), ('0'))
    return f3
Ejemplo n.º 19
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        # f1.add_arc('start', 'next', (letter), (letter))
        # f1.add_arc('next', 'next', (letter), ('0'))
        f1.add_arc('start', 'next', (letter), (letter))
        if letter in vowels:
            f1.add_arc('next', 'next', (letter), ())
        elif letter in grp1:
            f1.add_arc('next', 'next', (letter), ('1'))
        elif letter in grp2:
            f1.add_arc('next', 'next', (letter), ('2'))
        elif letter in grp3:
            f1.add_arc('next', 'next', (letter), ('3'))
        elif letter in grp4:
            f1.add_arc('next', 'next', (letter), ('4'))
        elif letter in grp5:
            f1.add_arc('next', 'next', (letter), ('5'))
        elif letter in grp6:
            f1.add_arc('next', 'next', (letter), ('6'))
        else:
            continue
            #wtf
    return f1
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'
    
    for index in range(10):
        f.add_state('H'+str(index))
        f.add_state('T'+str(index))
        f.add_state('U'+str(index))
        f.add_state('U1'+str(index))
        f.set_final('U'+str(index))
        f.set_final('U1'+str(index))
        
    f.add_state('H:(T0-T9)_COMMON')
    f.add_state('T:(U2-U9)_COMMON')
    f.add_state('T:(U11-U19)_COMMON')
    
    
    # Prepare 100s
    for i in range(10):
        if i==0: opList= []
        elif i ==1: opList = [ kFRENCH_TRANS[100] ]
        else: opList = [ kFRENCH_TRANS[i] + ' '+kFRENCH_TRANS[100] ]
        f.add_arc('start','H'+str(i), [str(i)], opList )    
        if i !=0: f.add_arc('H'+str(i), 'H:(T0-T9)_COMMON', [], [])   # 0 has special case, connections made in line 58
    #- special case 0 
    f.add_state('0_case')
    f.add_arc('H0','0_case',['0'],[])
    f.add_arc('0_case', 'U0',['0'],[kFRENCH_TRANS[0]])
    f.add_arc('0_case', 'U0',['1'],[kFRENCH_TRANS[1]])
    f.add_arc('0_case', 'T:(U2-U9)_COMMON',[],[])
        
    # Prepare 10s - arcs to reach state
    for i in range(10):
        if i==0 or i==1: opList = []
        elif i<=6: opList = [ kFRENCH_TRANS[i*10] ]
        elif i==7: opList = [ kFRENCH_TRANS[60] ]
        else:      opList = [ kFRENCH_TRANS[4]+' '+kFRENCH_TRANS[20] ]
        f.add_arc('H:(T0-T9)_COMMON','T'+str(i), [str(i)], opList ) 
        if i !=0: f.add_arc('H0', 'T'+str(i), [str(i)], opList ) 
        
    # Prepare 10s - arcs to exit to 1s                                         - Major Mapping BEGIN
    f.add_arc('T0', 'U0', ['0'], [])
    f.add_arc('T0', 'U1', ['1'], [kFRENCH_TRANS[1]])
    f.add_arc('T0', 'T:(U2-U9)_COMMON', [], [])
    
    f.add_arc('T1', 'T:(U11-U19)_COMMON', [], [])
    f.add_arc('T1', 'U10', ['0'], [kFRENCH_TRANS[10]])
    
    f.add_arc('T2', 'U0', ['0'], [])
    f.add_arc('T2', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]])
    f.add_arc('T2', 'T:(U2-U9)_COMMON', [], [])
    
    f.add_arc('T3', 'U0', ['0'], [])
    f.add_arc('T3', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]])
    f.add_arc('T3', 'T:(U2-U9)_COMMON', [], [])
    
    f.add_arc('T4', 'U0', ['0'], [])
    f.add_arc('T4', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]])
    f.add_arc('T4', 'T:(U2-U9)_COMMON', [], [])
    
    f.add_arc('T5', 'U0', ['0'], [])
    f.add_arc('T5', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]])
    f.add_arc('T5', 'T:(U2-U9)_COMMON', [], [])
    
    f.add_arc('T6', 'U0', ['0'], [])
    f.add_arc('T6', 'U1', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[1]])
    f.add_arc('T6', 'T:(U2-U9)_COMMON', [], [])
    
    for i in range(10):
        if i == 1: opList = [ kFRENCH_AND + ' '+kFRENCH_TRANS[i+10] ]
        elif i in [7,8,9]: opList = [ kFRENCH_TRANS[10]+' '+kFRENCH_TRANS[i] ]
        else:      opList = [ kFRENCH_TRANS[i+10] ]
        f.add_arc('T7', 'U'+str(10+i), [str(i)], opList )
    #=========================================================================== weird behavior on epsilon, backup above
    # f.add_arc('T7', 'U10', ['0'], [kFRENCH_TRANS[10]])
    # f.add_arc('T7', 'U11', ['1'], [kFRENCH_AND+' '+kFRENCH_TRANS[11]])
    # f.add_arc('T7', 'T:(U11-U19)_COMMON', [], [])
    #===========================================================================

    f.add_arc('T8', 'U0', ['0'], [])
    f.add_arc('T8', 'U1', ['1'], [kFRENCH_TRANS[1]])
    f.add_arc('T8', 'T:(U2-U9)_COMMON', [], [])
    
    f.add_arc('T9', 'U10', ['0'], [kFRENCH_TRANS[10]])
    f.add_arc('T9', 'T:(U11-U19)_COMMON', [], [])
    # Prepare 10s - arcs to exit to 1s                                         - Major Mapping END      
        
        
    # prepare Common: 1s
    for i in range(1,10):
        if i!=1: f.add_arc('T:(U2-U9)_COMMON', 'U'+str(i), [str(i)], [ kFRENCH_TRANS[i]])
        if i+10 not in [17,18,19]: f.add_arc('T:(U11-U19)_COMMON', 'U'+str(10+i), [str(i)], [ kFRENCH_TRANS[i+10]])
        else:                      f.add_arc('T:(U11-U19)_COMMON', 'U'+str(10+i), [str(i)],  [ kFRENCH_TRANS[10]+' '+kFRENCH_TRANS[i]])

    return f
Ejemplo n.º 21
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    # Indicate initial and final states
    f3.add_state('0')
    f3.add_state('1')
    f3.add_state('1a')
    f3.add_state('1b')
    f3.add_state('1c')
    f3.add_state('2')
    f3.add_state('3a')
    f3.add_state('3b')
    f3.add_state('4')
    f3.initial_state = '0'
    f3.set_final('4')

    # Add the arcs
    for letter in string.letters:
        f3.add_arc('0', '1', (letter), (letter))

    for n in range(10):
        f3.add_arc('1', '1a', (str(n)), (str(n)))
        f3.add_arc('1a', '1b', (str(n)), (str(n)))
        f3.add_arc('1b', '4', (str(n)), (str(n)))
        f3.add_arc('1', '2', (str(n)), (str(n)))
        f3.add_arc('1', '3a', (str(n)), (str(n)))
        f3.add_arc('3a', '3b', (str(n)), (str(n)))
    f3.add_arc('0', '1', (), ())
    f3.add_arc('1', '4', (), ('000'))
    f3.add_arc('2', '4', (), ('00'))
    f3.add_arc('3b', '4', (), ('0'))
    return f3
def french_count():
    f = FST('french')

    f.add_state('start')
    f.add_state('2')
    f.add_state('3')
    f.add_state('4')
    f.add_state('5')
    f.add_state('6')
    f.add_state('7')
    f.add_state('8')
    f.add_state('9')
    f.add_state('10')
    f.add_state('11')
    f.add_state('12')
    f.add_state('13')
    f.add_state('14')
    f.add_state('15')
    f.add_state('16')
    # f.add_state('17')
    # f.add_state('18')
    # f.add_state('19')
    # f.add_state('20')
    # f.add_state('21')
    # f.add_state('22')

    f.initial_state = 'start'

    f.set_final('4')
    f.set_final('6')
    f.set_final('7')
    f.set_final('9')
    # f.set_final('13')
    # f.set_final('14')
    # f.set_final('15')
    f.set_final('12')
    f.set_final('16')
    # f.set_final('17')
    # f.set_final('18')
    # f.set_final('20')
    # f.set_final('21')
    # f.set_final('22')

    #takes care of single digit
    f.add_arc('start', '2', '0', ())
    f.add_arc('2', '3', '0', ())
    for digit in range(0, 10):
        f.add_arc('3', '4', [str(digit)], [kFRENCH_TRANS[digit]])

    #Two-digits -in range (10,16)

    f.add_arc('2', '5', '1', ())
    for digit in range(0, 7):
        f.add_arc('5', '6', [str(digit)], [kFRENCH_TRANS[digit + 10]])

    #takes care of 17,18,19
    for digit in range(7, 10):
        f.add_arc('5', '7', [str(digit)],
                  [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[digit]])

    #takes care of 20
    for digit in range(2, 7):
        f.add_arc('2', '8', [str(digit)], [kFRENCH_TRANS[digit * 10]])

    f.add_arc('8', '9', '0', ())

    #takes care of 21-29(Takes care of 20 to 60)
    for digit in range(1, 10):
        if digit == 1:
            f.add_arc('8', '4', [str(digit)],
                      [kFRENCH_AND] + [kFRENCH_TRANS[digit]])
        else:
            f.add_arc('8', '4', [str(digit)], [kFRENCH_TRANS[digit]])

    #takes care of 70-79
    f.add_arc('2', '10', '7', [kFRENCH_TRANS[60]])

    for digit in range(0, 7):
        if digit == 1:
            f.add_arc('10', '6', [str(digit)],
                      [kFRENCH_AND] + [kFRENCH_TRANS[digit + 10]])
        else:
            f.add_arc('10', '6', [str(digit)], [kFRENCH_TRANS[digit + 10]])

    for digit in range(7, 10):
        f.add_arc('10', '7', [str(digit)],
                  [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[digit]])

    #takes care of 80-89
    f.add_arc('2', '11', '8', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]])
    #takes care of 80
    f.add_arc('11', '12', '0', ())
    for digit in range(1, 10):
        f.add_arc('11', '4', [str(digit)], [kFRENCH_TRANS[digit]])

    #takes care of 90-99
    f.add_arc('2', '13', '9', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]])
    for digit in range(0, 7):
        f.add_arc('13', '6', [str(digit)], [kFRENCH_TRANS[digit + 10]])

    for digit in range(7, 10):
        f.add_arc('13', '7', [str(digit)],
                  [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[digit]])

    #takes care of 100
    f.add_arc('start', '14', '1', ['cent'])

    f.add_arc('14', '15', '0', ())
    f.add_arc('15', '16', '0', ())

    #takes care of 101-109
    for digit in range(1, 10):
        f.add_arc('15', '4', [str(digit)], [kFRENCH_TRANS[digit]])

    #takes care of 110-119
    f.add_arc('14', '5', '1', ())

    #takes care of 120-169
    for digit in range(2, 7):
        f.add_arc('14', '8', [str(digit)], [kFRENCH_TRANS[digit * 10]])

    #takes care of 170-179
    f.add_arc('14', '10', '7', [kFRENCH_TRANS[60]])

    #takes care of 180-189
    f.add_arc('14', '11', '8', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]])

    #takes care of 190-199
    f.add_arc('14', '13', '9', [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]])

    #takes care of 200-999
    for digit in range(2, 10):
        f.add_arc('start', '14', [str(digit)],
                  [kFRENCH_TRANS[digit]] + ['cent'])

    return f
Ejemplo n.º 23
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.add_state('next0')
    f1.add_state('next1')
    f1.add_state('next2')
    f1.add_state('next3')
    f1.add_state('next4')
    f1.add_state('next5')
    f1.add_state('next6')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next0')
    f1.set_final('next1')
    f1.set_final('next2')
    f1.set_final('next3')
    f1.set_final('next4')
    f1.set_final('next5')
    f1.set_final('next6')

    group0 = [
        'a', 'e', 'h', 'i', 'o', 'u', 'w', 'y', 'A', 'E', 'H', 'I', 'O', 'U',
        'W', 'Y'
    ]
    group1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V']
    group2 = [
        'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S',
        'X', 'Z'
    ]
    group3 = ['d', 't', 'D', 'T']
    group4 = ['l', 'L']
    group5 = ['m', 'n', 'M', 'N']
    group6 = ['r', 'R']

    # Add the rest of the arcs
    for letter in string.letters:

        if (letter in group0):
            f1.add_arc('start', 'next0', (letter), (letter))

            f1.add_arc('next', 'next0', (letter), ())
            f1.add_arc('next0', 'next0', (letter), ())

            f1.add_arc('next1', 'next0', (letter), ())
            f1.add_arc('next2', 'next0', (letter), ())
            f1.add_arc('next3', 'next0', (letter), ())
            f1.add_arc('next4', 'next0', (letter), ())
            f1.add_arc('next5', 'next0', (letter), ())
            f1.add_arc('next6', 'next0', (letter), ())

        elif (letter in group1):
            f1.add_arc('start', 'next1', (letter), (letter))

            f1.add_arc('next', 'next1', (letter), ('1'))
            f1.add_arc('next1', 'next1', (letter), ())

            f1.add_arc('next0', 'next1', (letter), ('1'))
            f1.add_arc('next2', 'next1', (letter), ('1'))
            f1.add_arc('next3', 'next1', (letter), ('1'))
            f1.add_arc('next4', 'next1', (letter), ('1'))
            f1.add_arc('next5', 'next1', (letter), ('1'))
            f1.add_arc('next6', 'next1', (letter), ('1'))

        elif (letter in group2):
            f1.add_arc('start', 'next2', (letter), (letter))

            f1.add_arc('next', 'next2', (letter), ('2'))
            f1.add_arc('next2', 'next2', (letter), ())

            f1.add_arc('next0', 'next2', (letter), ('2'))
            f1.add_arc('next1', 'next2', (letter), ('2'))
            f1.add_arc('next3', 'next2', (letter), ('2'))
            f1.add_arc('next4', 'next2', (letter), ('2'))
            f1.add_arc('next5', 'next2', (letter), ('2'))
            f1.add_arc('next6', 'next2', (letter), ('2'))

        elif (letter in group3):
            f1.add_arc('start', 'next3', (letter), (letter))

            f1.add_arc('next', 'next3', (letter), ('3'))
            f1.add_arc('next3', 'next3', (letter), ())

            f1.add_arc('next0', 'next3', (letter), ('3'))
            f1.add_arc('next1', 'next3', (letter), ('3'))
            f1.add_arc('next2', 'next3', (letter), ('3'))
            f1.add_arc('next4', 'next3', (letter), ('3'))
            f1.add_arc('next5', 'next3', (letter), ('3'))
            f1.add_arc('next6', 'next3', (letter), ('3'))

        elif (letter in group4):
            f1.add_arc('start', 'next4', (letter), (letter))

            f1.add_arc('next', 'next4', (letter), ('4'))
            f1.add_arc('next4', 'next4', (letter), ())

            f1.add_arc('next0', 'next4', (letter), ('4'))
            f1.add_arc('next1', 'next4', (letter), ('4'))
            f1.add_arc('next2', 'next4', (letter), ('4'))
            f1.add_arc('next3', 'next4', (letter), ('4'))
            f1.add_arc('next5', 'next4', (letter), ('4'))
            f1.add_arc('next6', 'next4', (letter), ('4'))

        elif (letter in group5):
            f1.add_arc('start', 'next5', (letter), (letter))

            f1.add_arc('next', 'next5', (letter), ('5'))
            f1.add_arc('next5', 'next5', (letter), ())

            f1.add_arc('next0', 'next5', (letter), ('5'))
            f1.add_arc('next1', 'next5', (letter), ('5'))
            f1.add_arc('next2', 'next5', (letter), ('5'))
            f1.add_arc('next3', 'next5', (letter), ('5'))
            f1.add_arc('next4', 'next5', (letter), ('5'))
            f1.add_arc('next6', 'next5', (letter), ('5'))

        elif (letter in group6):
            f1.add_arc('start', 'next6', (letter), (letter))

            f1.add_arc('next', 'next6', (letter), ('6'))
            f1.add_arc('next6', 'next6', (letter), ())

            f1.add_arc('next0', 'next6', (letter), ('6'))
            f1.add_arc('next1', 'next6', (letter), ('6'))
            f1.add_arc('next2', 'next6', (letter), ('6'))
            f1.add_arc('next3', 'next6', (letter), ('6'))
            f1.add_arc('next4', 'next6', (letter), ('6'))
            f1.add_arc('next5', 'next6', (letter), ('6'))

    return f1
Ejemplo n.º 24
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')
    # Indicate that '1' is the initial state
    f1.add_state('1')
    f1.add_state('2')
    f1.add_state('3')
    f1.add_state('4')
    f1.add_state('5')
    f1.add_state('6')
    f1.add_state('7')
    f1.add_state('8')

    f1.initial_state = '1'

    # Set all the final states
    f1.set_final('2')
    f1.set_final('3')
    f1.set_final('4')
    f1.set_final('5')
    f1.set_final('6')
    f1.set_final('7')
    f1.set_final('8')

    set1 = {
        'a': 1,
        'e': 1,
        'i': 1,
        'o': 1,
        'u': 1,
        'h': 1,
        'w': 1,
        'y': 1,
        'A': 1,
        'E': 1,
        'I': 1,
        'O': 1,
        'U': 1,
        'H': 1,
        'W': 1,
        'Y': 1
    }
    set2 = {'b': 1, 'f': 1, 'p': 1, 'v': 1, 'B': 1, 'F': 1, 'P': 1, 'V': 1}
    set3 = {
        'c': 1,
        'g': 1,
        'j': 1,
        'k': 1,
        'q': 1,
        's': 1,
        'x': 1,
        'z': 1,
        'C': 1,
        'G': 1,
        'J': 1,
        'K': 1,
        'Q': 1,
        'S': 1,
        'X': 1,
        'Z': 1
    }
    set4 = {'d': 1, 't': 1, 'D': 1, 'T': 1}
    set5 = {'l': 1, 'L': 1}
    set6 = {'m': 1, 'n': 1, 'M': 1, 'N': 1}
    set7 = {'r': 1, 'R': 1}

    # Add the rest of the arcs
    for letter in string.ascii_letters:
        if letter in set1:
            f1.add_arc('1', '2', (letter), (letter))
            f1.add_arc('2', '2', (letter), ())
            f1.add_arc('3', '2', (letter), ())
            f1.add_arc('4', '2', (letter), ())
            f1.add_arc('5', '2', (letter), ())
            f1.add_arc('6', '2', (letter), ())
            f1.add_arc('7', '2', (letter), ())
            f1.add_arc('8', '2', (letter), ())

        elif letter in set2:
            f1.add_arc('1', '3', (letter), (letter))
            f1.add_arc('2', '3', (letter), ('1'))
            f1.add_arc('3', '3', (letter), ())
            f1.add_arc('4', '3', (letter), ('1'))
            f1.add_arc('5', '3', (letter), ('1'))
            f1.add_arc('6', '3', (letter), ('1'))
            f1.add_arc('7', '3', (letter), ('1'))
            f1.add_arc('8', '3', (letter), ('1'))

        elif letter in set3:
            f1.add_arc('1', '4', (letter), (letter))
            f1.add_arc('2', '4', (letter), ('2'))
            f1.add_arc('3', '4', (letter), ('2'))
            f1.add_arc('4', '4', (letter), ())
            f1.add_arc('5', '4', (letter), ('2'))
            f1.add_arc('6', '4', (letter), ('2'))
            f1.add_arc('7', '4', (letter), ('2'))
            f1.add_arc('8', '4', (letter), ('2'))

        elif letter in set4:
            f1.add_arc('1', '5', (letter), (letter))
            f1.add_arc('2', '5', (letter), ('3'))
            f1.add_arc('3', '5', (letter), ('3'))
            f1.add_arc('4', '5', (letter), ('3'))
            f1.add_arc('5', '5', (letter), ())
            f1.add_arc('6', '5', (letter), ('3'))
            f1.add_arc('7', '5', (letter), ('3'))
            f1.add_arc('8', '5', (letter), ('3'))

        elif letter in set5:
            f1.add_arc('1', '6', (letter), (letter))
            f1.add_arc('2', '6', (letter), ('4'))
            f1.add_arc('3', '6', (letter), ('4'))
            f1.add_arc('4', '6', (letter), ('4'))
            f1.add_arc('5', '6', (letter), ('4'))
            f1.add_arc('6', '6', (letter), ())
            f1.add_arc('7', '6', (letter), ('4'))
            f1.add_arc('8', '6', (letter), ('4'))

        elif letter in set6:
            f1.add_arc('1', '7', (letter), (letter))
            f1.add_arc('2', '7', (letter), ('5'))
            f1.add_arc('3', '7', (letter), ('5'))
            f1.add_arc('4', '7', (letter), ('5'))
            f1.add_arc('5', '7', (letter), ('5'))
            f1.add_arc('6', '7', (letter), ('5'))
            f1.add_arc('8', '7', (letter), ('5'))

        elif letter in set7:
            f1.add_arc('1', '8', (letter), (letter))
            f1.add_arc('2', '8', (letter), ('6'))
            f1.add_arc('3', '8', (letter), ('6'))
            f1.add_arc('4', '8', (letter), ('6'))
            f1.add_arc('5', '8', (letter), ('6'))
            f1.add_arc('6', '8', (letter), ('6'))
            f1.add_arc('7', '8', (letter), ('6'))
            f1.add_arc('8', '8', (letter), ())
    return f1
def add_zero_padding():
	# Now, the third fst - the zero-padding fst
	f3 = FST('soundex-padzero')

	f3.add_state('1')
	f3.add_state('1a')
	f3.add_state('1b')
	f3.add_state('2')

	f3.initial_state = '1'

	# The soundex string will either need no padding in which case its final 
	# state is 1, or it will need 1 to 3 zeros and have final state 2
	f3.set_final('1')
	f3.set_final('2')

	f3.add_arc('1', '2', (), ('000'))
	f3.add_arc('1a', '2', (), ('00'))
	f3.add_arc('1b', '2', (), ('0'))

	for letter in string.letters:
		f3.add_arc('1', '1', (letter), (letter))
	for number in xrange(10):
		f3.add_arc('1', '1a', (str(number)), (str(number)))
	for number in xrange(10):
		f3.add_arc('1a', '1b', (str(number)), (str(number)))
	for number in xrange(10):
		f3.add_arc('1b', '2', (str(number)), (str(number)))

	return f3
Ejemplo n.º 26
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')
    letter_groups = [['b','f','p','v','B','F','P','V'],['c','C', 'g','G','J', 'j', 'K','k','Q', 'q','S', 's','X', 'x', 'Z','z'],['d','D','T','t'],['L','l'],['M','N','m','n'],['R','r']]
    vowels = ['a','e','i','o','u','w','y','h','A','E','I','O','U','W','Y','H']
    states_num = len(letter_groups)
    
    f1.add_state('start')
    f1.add_state('vowels')
    f1.set_final('vowels')
    for i in range(states_num) :
        f1.add_state(i)
        f1.set_final(i)

    f1.initial_state = 'start'

    # Add the rest of the arcs
    # f1.add_arc('vowels','start',(),())

    for letter in string.ascii_letters:
        if letter in vowels :
            f1.add_arc('start','vowels',(letter),(letter)) #first char is vowel
            f1.add_arc('vowels','vowels',(letter),()) #ignoring consecutive vowels iin start
            for i in range(states_num) :
                f1.add_arc(i,'vowels',(letter),())

        else :
            for conso_state in range(states_num):
                if letter in letter_groups[conso_state] :
                    f1.add_arc('start',conso_state,(letter),(letter))
                    f1.add_arc('vowels',conso_state,(letter),(str(conso_state+1)[0]))
                    f1.add_arc(conso_state,conso_state,(letter),())
                    for other_conso_state in range(states_num):
                        if other_conso_state != conso_state :
                            f1.add_arc(other_conso_state,conso_state,(letter),(str(conso_state+1)[0]))


    return f1
Ejemplo n.º 27
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')
    char_removal = [
        'a', 'e', 'h', 'i', 'o', 'u', 'w', 'y', 'A', 'E', 'I', 'O', 'U', 'W',
        'Y'
    ]
    char_replace1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V']
    char_replace2 = [
        'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S',
        'X', 'Z'
    ]
    char_replace3 = ['d', 't', 'D', 'T']
    char_replace4 = ['l', 'L']
    char_replace5 = ['m', 'n', 'M', 'N']
    char_replace6 = ['r', 'R']

    #indicate all the states
    f1.add_state('start')
    f1.add_state('next')
    f1.add_state('rp1')
    f1.add_state('rp2')
    f1.add_state('rp3')
    f1.add_state('rp4')
    f1.add_state('rp5')
    f1.add_state('rp6')

    #indicate the initial and final state
    f1.initial_state = 'start'
    f1.set_final('next')
    f1.set_final('rp1')
    f1.set_final('rp2')
    f1.set_final('rp3')
    f1.set_final('rp4')
    f1.set_final('rp5')
    f1.set_final('rp6')

    #Add the arcs
    for letter in char_removal:
        f1.add_arc('start', 'next', (letter), (letter))
        f1.add_arc('next', 'next', (letter), ())

    for letter in char_replace1:
        f1.add_arc('start', 'rp1', (letter), (letter))
        f1.add_arc('next', 'rp1', (letter), ('1'))
        f1.add_arc('rp1', 'rp1', (letter), ())
        for letter in char_removal:
            f1.add_arc('rp1', 'rp1', (letter), ())
        for letter in char_replace2:
            f1.add_arc('rp1', 'rp2', (letter), ('2'))
        for letter in char_replace3:
            f1.add_arc('rp1', 'rp3', (letter), ('3'))
        for letter in char_replace4:
            f1.add_arc('rp1', 'rp4', (letter), ('4'))
        for letter in char_replace5:
            f1.add_arc('rp1', 'rp5', (letter), ('5'))
        for letter in char_replace6:
            f1.add_arc('rp1', 'rp6', (letter), ('6'))
        ##end of rp1 to all rp6 connections

    for letter in char_replace2:
        f1.add_arc('start', 'rp2', (letter), (letter))
        f1.add_arc('next', 'rp2', (letter), ('2'))
        f1.add_arc('rp2', 'rp2', (letter), ())
        for letter in char_removal:
            f1.add_arc('rp2', 'rp2', (letter), ())
        for letter in char_replace1:
            f1.add_arc('rp2', 'rp1', (letter), ('1'))
        for letter in char_replace3:
            f1.add_arc('rp2', 'rp3', (letter), ('3'))
        for letter in char_replace4:
            f1.add_arc('rp2', 'rp4', (letter), ('4'))
        for letter in char_replace5:
            f1.add_arc('rp2', 'rp5', (letter), ('5'))
        for letter in char_replace6:
            f1.add_arc('rp2', 'rp6', (letter), ('6'))
        ##end of rp2 to all rp6 connections

    for letter in char_replace3:
        f1.add_arc('start', 'rp3', (letter), (letter))
        f1.add_arc('next', 'rp3', (letter), ('3'))
        f1.add_arc('rp3', 'rp3', (letter), ())
        for letter in char_removal:
            f1.add_arc('rp3', 'rp3', (letter), ())
        for letter in char_replace1:
            f1.add_arc('rp3', 'rp1', (letter), ('1'))
        for letter in char_replace2:
            f1.add_arc('rp3', 'rp2', (letter), ('2'))
        for letter in char_replace4:
            f1.add_arc('rp3', 'rp4', (letter), ('4'))
        for letter in char_replace5:
            f1.add_arc('rp3', 'rp5', (letter), ('5'))
        for letter in char_replace6:
            f1.add_arc('rp3', 'rp6', (letter), ('6'))
        ##end of rp3 to all rp6 connections

    for letter in char_replace4:
        f1.add_arc('start', 'rp4', (letter), (letter))
        f1.add_arc('next', 'rp4', (letter), ('4'))
        f1.add_arc('rp4', 'rp4', (letter), ())
        for letter in char_removal:
            f1.add_arc('rp4', 'rp4', (letter), ())
        for letter in char_replace1:
            f1.add_arc('rp4', 'rp1', (letter), ('1'))
        for letter in char_replace2:
            f1.add_arc('rp4', 'rp2', (letter), ('2'))
        for letter in char_replace3:
            f1.add_arc('rp4', 'rp3', (letter), ('3'))
        for letter in char_replace5:
            f1.add_arc('rp4', 'rp5', (letter), ('5'))
        for letter in char_replace6:
            f1.add_arc('rp4', 'rp6', (letter), ('6'))
    ##end of rp4 to all rp6 connections

    for letter in char_replace5:
        f1.add_arc('start', 'rp5', (letter), (letter))
        f1.add_arc('next', 'rp5', (letter), ('5'))
        f1.add_arc('rp5', 'rp5', (letter), ())
        for letter in char_removal:
            f1.add_arc('rp5', 'rp5', (letter), ())
        for letter in char_replace1:
            f1.add_arc('rp5', 'rp1', (letter), ('1'))
        for letter in char_replace2:
            f1.add_arc('rp5', 'rp2', (letter), ('2'))
        for letter in char_replace3:
            f1.add_arc('rp5', 'rp3', (letter), ('3'))
        for letter in char_replace4:
            f1.add_arc('rp5', 'rp4', (letter), ('4'))
        for letter in char_replace6:
            f1.add_arc('rp5', 'rp6', (letter), ('6'))
        ##end of rp5 to all rp6 connections

    for letter in char_replace6:
        f1.add_arc('start', 'rp6', (letter), (letter))
        f1.add_arc('next', 'rp6', (letter), ('6'))
        f1.add_arc('rp6', 'rp6', (letter), ())
        for letter in char_removal:
            f1.add_arc('rp6', 'rp6', (letter), ())
        for letter in char_replace1:
            f1.add_arc('rp6', 'rp1', (letter), ('1'))
        for letter in char_replace2:
            f1.add_arc('rp6', 'rp2', (letter), ('2'))
        for letter in char_replace3:
            f1.add_arc('rp6', 'rp3', (letter), ('3'))
        for letter in char_replace4:
            f1.add_arc('rp6', 'rp4', (letter), ('4'))
        for letter in char_replace5:
            f1.add_arc('rp6', 'rp5', (letter), ('5'))

        ##end of rp6 to all rp6 connections

    return f1
Ejemplo n.º 28
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    #create 0,1,2,3,4,5,6 states for all classes
    for x in range(0,7):
        f1.add_state(str(x))
        f1.set_final(str(x))

    list_0 = ['a','e','h','i','o','u','w','y']
    list_1 = ['b','f','p','v']
    list_2 = ['c','g','j','k','q','s','x','z']
    list_3 = ['d','t']
    list_4 = ['l']
    list_5 = ['m','n']
    list_6 = ['r']

    all_lists = [list_0,list_1,list_2,list_3,list_4,list_5,list_6]
    # Set all the final states

    for index,item in enumerate(all_lists):
        for letter in item:
            f1.add_arc('start',str(index),(letter),(letter))
            f1.add_arc('start',str(index),(letter.upper()),(letter.upper()))


    for x in range(0,7):
        for index,item in enumerate(all_lists):
            for letter in item:
                if x == index:
                    f1.add_arc(str(x),str(index),(letter),())
                    f1.add_arc(str(x),str(index),(letter.upper()),())
                elif index == 0:
                    f1.add_arc(str(x),str(index),(letter),())
                    f1.add_arc(str(x),str(index),(letter.upper()),())
                else:
                    f1.add_arc(str(x),str(index),(letter),(str(index)))
                    f1.add_arc(str(x),str(index),(letter.upper()),(str(index)))

    return f1
Ejemplo n.º 29
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('1')
    f3.add_state('1a')
    f3.add_state('1b')
    f3.add_state('2')

    f3.initial_state = '1'
    f3.set_final('2')

    for letter in string.letters:
        f3.add_arc('1', '1', (letter), (letter))
    f3.add_arc('1', '1a', (), ('0'))
    for number in xrange(10):
        f3.add_arc('1', '1a', (str(number)), (str(number)))
    f3.add_arc('1a', '1b', (), ('0'))
    for number in xrange(10):
        f3.add_arc('1a', '1b', (str(number)), (str(number)))
    f3.add_arc('1b', '2', (), ('0'))
    for number in xrange(10):
        f3.add_arc('1b', '2', (str(number)), (str(number)))
    return f3
Ejemplo n.º 30
0
def add_zero_padding():
    # Now, the third fst - the zero-padding fst
    f3 = FST('soundex-padzero')

    f3.add_state('start')
    f3.initial_state = 'start'
    for x in range(4):
        f3.add_state(str(x))
    f3.set_final(str(3))

    # Add the arcs
    f3.add_arc(str(0),str(1),(''),('0'))
    f3.add_arc('start','1',(''),('0'))
    f3.add_arc(str(1),str(2),(''),('0'))
    f3.add_arc(str(2),str(3),(''),('0'))

    for letter in string.letters:
        f3.add_arc('start', '0', (letter), (letter))

    for n in range(10):
        f3.add_arc('start','1',(str(n)),(str(n)))
    

    for x in range(3):
        for n in range(10):
            f3.add_arc(str(x), str(x+1), (str(n)), (str(n)))
        

    for n in range(10):
        f3.add_arc(str(3),str(3),(str(n)),())

    '''
    f3.add_state('1')
    f3.add_state('1a')
    f3.add_state('1b')
    f3.add_state('2')
    
    f3.initial_state = '1'
    f3.set_final('2')

    for letter in string.letters:
        f3.add_arc('1', '1', (letter), (letter))
    for number in xrange(10):
        f3.add_arc('1', '1', (str(number)), (str(number)))
    
    f3.add_arc('1', '1a', (), ('0'))
    f3.add_arc('1a', '1b', (), ('0'))
    f3.add_arc('1b', '2', (), ('0'))
    '''
    return f3
    def generate(self, analysis):
        """Generate the morphologically correct word 

        e.g.
        p = Parser()
        analysis = ['p','a','n','i','c','+past form']
        p.generate(analysis) 
        ---> 'panicked'
        """

        # Let's define our first FST
        f1 = FST('morphology-generate')

        # Indicate initial and final states
        f1.add_state('start')
        f1.add_state('vowel')
        f1.add_state('consonant')
        f1.add_state('c')
        f1.add_state('form_1')
        f1.add_state('form_2')
        f1.initial_state = 'start'
        f1.set_final('form_1')
        f1.set_final('form_2')

        # Generate
        vowels = 'aeiou'
        for vowel in vowels:
            f1.add_arc('start', 'vowel', vowel, vowel)
            f1.add_arc('vowel', 'vowel', vowel, vowel)
            f1.add_arc('consonant', 'vowel', vowel, vowel)
            f1.add_arc('c', 'vowel', vowel, vowel)

        for letter in string.ascii_lowercase:
            if letter in vowels:
                continue

            if not letter == 'c':
                f1.add_arc('vowel', 'consonant', letter, letter)
            f1.add_arc('start', 'consonant', letter, letter)
            f1.add_arc('consonant', 'consonant', letter, letter)
            f1.add_arc('c', 'consonant', letter, letter)

        f1.add_arc('vowel', 'c', 'c', 'c')
        f1.add_arc('c', 'form_1', '+past form', 'ked')
        f1.add_arc('c', 'form_1', '+present participle form', 'king')
        f1.add_arc('consonant', 'form_2', '+past form', 'ed')
        f1.add_arc('consonant', 'form_2', '+present participle form', 'ing')

        output = f1.transduce(analysis)[0]

        return "".join(output)
Ejemplo n.º 32
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """
    vowels = 'aeiouwhyAEIOUWHY'
    q1 = 'BFPVbfpv'
    q2 = 'CGJKQSXZcgjkqsxz'
    q3 = 'dtDT'
    q4 = 'lL'
    q5 = 'mnMN'
    q6 = 'rR'
    
    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    '''f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('start', 'next', (letter), (letter))
        f1.add_arc('next', 'next', (letter), ('0'))
    return f1'''
    
    f1.add_state('0')
    f1.add_state('1')
    f1.add_state('v')
    f1.add_state('q1')
    f1.add_state('q2')
    f1.add_state('q3')
    f1.add_state('q4')
    f1.add_state('q5')
    f1.add_state('q6')
    
    f1.initial_state = '0'
    
    f1.set_final('1')
    f1.set_final('v')
    f1.set_final('q1')
    f1.set_final('q2')
    f1.set_final('q3')
    f1.set_final('q4')
    f1.set_final('q5')
    f1.set_final('q6')
    
    
    for letter in string.ascii_letters:
        
        f1.add_arc('0', '1', (letter), (letter))
   
        if letter in vowels: 
            f1.add_arc('1', 'v', (letter), ())
            f1.add_arc('v', 'v', (letter), ())
            f1.add_arc('q1', 'v', (letter), ())
            f1.add_arc('q2', 'v', (letter), ())
            f1.add_arc('q3', 'v', (letter), ())
            f1.add_arc('q4', 'v', (letter), ())
            f1.add_arc('q5', 'v', (letter), ())
            f1.add_arc('q6', 'v', (letter), ())   
        if letter in q1: 
            f1.add_arc('1', 'q1', (letter), ('1'))
            f1.add_arc('v', 'q1', (letter), ('1'))
            f1.add_arc('q1','q1', (letter), ())
            f1.add_arc('q2','q1', (letter), ('1'))
            f1.add_arc('q3', 'q1', (letter), ('1'))
            f1.add_arc('q4', 'q1', (letter), ('1'))
            f1.add_arc('q5', 'q1', (letter), ('1'))
            f1.add_arc('q6', 'q1', (letter), ('1'))
        if letter in q2: 
            f1.add_arc('1', 'q2', (letter), ('2'))
            f1.add_arc('v', 'q2', (letter), ('2'))
            f1.add_arc('q1','q2', (letter), ('2'))
            f1.add_arc('q2','q2', (letter), ())
            f1.add_arc('q3', 'q2', (letter), ('2'))
            f1.add_arc('q4', 'q2', (letter), ('2'))
            f1.add_arc('q5', 'q2', (letter), ('2'))
            f1.add_arc('q6', 'q2', (letter), ('2'))
        if letter in q3:
            f1.add_arc('1', 'q3', (letter), ('3'))
            f1.add_arc('v', 'q3', (letter), ('3'))
            f1.add_arc('q1','q3', (letter), ('3'))
            f1.add_arc('q2','q3', (letter), ('3'))
            f1.add_arc('q3', 'q3', (letter), ())
            f1.add_arc('q4', 'q3', (letter), ('3'))
            f1.add_arc('q5', 'q3', (letter),('3'))
            f1.add_arc('q6', 'q3', (letter), ('3'))
        if letter in q4:
            f1.add_arc('1', 'q4', (letter), ('4'))
            f1.add_arc('v', 'q4', (letter), ('4'))
            f1.add_arc('q1','q4', (letter), ('4'))
            f1.add_arc('q2','q4', (letter), ('4'))
            f1.add_arc('q3', 'q4', (letter), ('4'))
            f1.add_arc('q4', 'q4', (letter), ())
            f1.add_arc('q5', 'q4', (letter), ('4'))
            f1.add_arc('q6', 'q4', (letter), ('4'))
        if letter in q5:
            f1.add_arc('1', 'q5', (letter), ('5'))
            f1.add_arc('v', 'q5', (letter), ('5'))
            f1.add_arc('q1','q5', (letter), ('5'))
            f1.add_arc('q2','q5', (letter), ('5'))
            f1.add_arc('q3','q5', (letter), ('5'))
            f1.add_arc('q4','q5', (letter), ('5'))
            f1.add_arc('q5', 'q5', (letter), ())
            f1.add_arc('q6', 'q5', (letter), ('5'))
        if letter in q6:
            f1.add_arc('1', 'q6', (letter), ('6'))
            f1.add_arc('v', 'q6', (letter), ('6'))
            f1.add_arc('q1','q6', (letter), ('6'))
            f1.add_arc('q2','q6', (letter), ('6'))
            f1.add_arc('q3', 'q6', (letter), ('6'))
            f1.add_arc('q4', 'q6', (letter), ('6'))
            f1.add_arc('q5', 'q6', (letter), ('6'))
            f1.add_arc('q6', 'q6', (letter), ())
            
    return f1
Ejemplo n.º 33
0

if __name__ == '__main__':
    f1 = FST('test-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('next')
    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('next')

    # Add the rest of the arcs
    for letter in ['A', 'B', 'C', 'D']:
        f1.add_arc('start', 'next', letter, '1')
        f1.add_arc('next', 'next', letter, '0')

    f2 = FST('test-generate')
    f2.add_state('start')
    f2.add_state('next')
    f2.initial_state = 'start'
    f2.set_final('next')

    f2.add_arc('start', 'next', '1', 'a')
    f2.add_arc('start', 'next', '1', 'an')
    f2.add_arc('next', 'next', '0', 'b')

    output = compose(tuple('BAD'), f1, f2)
    print output
    for o in output:
def french_count():
    f = FST('french')

    f.add_state('start')
    f.add_state('final')
    f.add_state('0XX')
    f.add_state('00X')
    f.add_state('01X')
    f.add_state('XX')
    f.add_state('7X')
    f.add_state('8X')
    f.add_state('9X')
    f.add_state('NXX')
    f.add_state('N0X')

    f.initial_state = 'start'
    f.set_final('final')

    # single digit numbers
    for ii in xrange(10):
        if ii == 0:
            f.add_arc('start', '0XX', [str(ii)], ())
            f.add_arc('0XX', '00X', [str(ii)], ())
        f.add_arc('00X', 'final', [str(ii)], [kFRENCH_TRANS[ii]])

    # 10-19
    f.add_arc('0XX', '01X', [str(1)], ())
    for ii in xrange(10):
        if ii < 7:
            f.add_arc('01X', 'final', [str(ii)], [kFRENCH_TRANS[ii+10]])
        else:
            f.add_arc('01X', 'final', [str(ii)], [kFRENCH_TRANS[10],kFRENCH_TRANS[ii]])

    # 20-69
    for ii in xrange(2,7):
        f.add_arc('0XX', 'XX', [str(ii)], [kFRENCH_TRANS[ii*10]])
    # dont add anything that ends in 0...e.g. 20, 30, 40, 50, 60
    f.add_arc('XX', 'final', [str(0)], ())
    # add "and" to numbers if number is 21,31,41,51,61
    f.add_arc('XX', 'final', [str(1)], [kFRENCH_AND,kFRENCH_TRANS[1]])
    for ii in xrange(2,10):
        f.add_arc('XX', 'final', [str(ii)], [kFRENCH_TRANS[ii]])

    # 70s
    f.add_arc('0XX', '7X', [str(7)], [kFRENCH_TRANS[60]])
    for ii in xrange(10):
        if ii == 0:
            f.add_arc('7X', 'final', [str(ii)], [kFRENCH_TRANS[10]])
        elif ii == 1:
            f.add_arc('7X', 'final', [str(ii)], [kFRENCH_AND,kFRENCH_TRANS[11]])
        elif 2 <= ii <= 6:
            f.add_arc('7X', 'final', [str(ii)], [kFRENCH_TRANS[ii+10]])
        else:
            f.add_arc('7X', 'final', [str(ii)], [kFRENCH_TRANS[10],kFRENCH_TRANS[ii]])

    # 80s
    f.add_arc('0XX', '8X', [str(8)], [kFRENCH_TRANS[4],kFRENCH_TRANS[20]])
    for ii in xrange(10):
        if ii == 0:
            f.add_arc('8X', 'final', [str(ii)], ())
        else:
            f.add_arc('8X', 'final', [str(ii)], [kFRENCH_TRANS[ii]])

    # 90s
    f.add_arc('0XX', '9X', [str(9)], [kFRENCH_TRANS[4],kFRENCH_TRANS[20]])
    for ii in xrange(10):
        if ii < 7:
            f.add_arc('9X', 'final', [str(ii)], [kFRENCH_TRANS[ii+10]])
        else:
            f.add_arc('9X', 'final', [str(ii)], [kFRENCH_TRANS[10],kFRENCH_TRANS[ii]])

    # X00-X09
    for ii in xrange(1,10):
        if ii == 1:
            f.add_arc('start', 'NXX', [str(ii)], [kFRENCH_TRANS[100]])
        else:
            f.add_arc('start', 'NXX', [str(ii)], [kFRENCH_TRANS[ii],kFRENCH_TRANS[100]])
    f.add_arc('NXX', 'N0X', [str(0)], ())
    for ii in xrange(10):
        if ii == 0:
            f.add_arc('N0X', 'final', [str(0)], ())
        else:
            f.add_arc('N0X', 'final', [str(ii)], [kFRENCH_TRANS[ii]])

    # X10-X19
    f.add_arc('NXX', '01X', [str(1)], ())

    # X20-X69
    for ii in xrange(2,7):
        f.add_arc('NXX', 'XX', [str(ii)], [kFRENCH_TRANS[ii*10]])

    # X70s, X80s, X90s
    f.add_arc('NXX', '7X', [str(7)], [kFRENCH_TRANS[60]])
    f.add_arc('NXX', '8X', [str(8)], [kFRENCH_TRANS[4],kFRENCH_TRANS[20]])
    f.add_arc('NXX', '9X', [str(9)], [kFRENCH_TRANS[4],kFRENCH_TRANS[20]])

    return f
Ejemplo n.º 35
0
def french_count():
    f = FST('french')

    f.add_state('0')
    f.add_state('1')
    f.add_state('2')
    f.add_state('3')
    f.add_state('4')
    f.add_state('5')
    f.add_state('6')
    f.add_state('7')
    f.add_state('8')
    f.add_state('9')
    f.add_state('10')
    f.add_state('11')
    f.add_state('12')
    f.add_state('13')
    f.add_state('14')
    f.add_state('15')
    f.add_state('16')
    f.add_state('17')
    f.add_state('18')
    f.add_state('19')
    f.add_state('20')
    f.add_state('21')
    f.add_state('22')
    f.add_state('23')
    f.add_state('24')
    f.add_state('25')

    f.initial_state = '0'

    f.set_final('1')
    f.set_final('3')
    f.set_final('6')
    f.set_final('7')
    f.set_final('8')
    f.set_final('9')
    f.set_final('11')
    f.set_final('13')
    f.set_final('14')
    f.set_final('18')
    f.set_final('20')

    zero = [0]
    one = [1]
    two_to_six = [2,3,4,5,6]
    one_to_six = [1,2,3,4,5,6]
    seven = [7]
    seven_eight_nine = [7,8,9]
    eight = [8]
    nine = [9]
    singles_all = [1,2,3,4,5,6,7,8,9]
    singles = [2,3,4,5,6,7,8,9]
    tens = [20,30,40,50]

    # Edge from initial to final, if preceding zero in input
    for i in zero:
        # f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('0','0', str(i), ())
        f.add_arc('4','6', str(i), ())
        f.add_arc('5','8', str(i), ())
        f.add_arc('0','9', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]])
        f.add_arc('12','13', str(i), [kFRENCH_TRANS[20]])
        f.add_arc('16','18', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[10]])
        f.add_arc('17','19', str(i), ())
        f.add_arc('19','9', str(i), ())

    for i in one:
        f.add_arc('0','2', str(i), ())
        f.add_arc('17','2', str(i), ())
        f.add_arc('0','17', str(i), [kFRENCH_TRANS[100]])
        f.add_arc('0','5', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('17','5', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('4','7', str(i), [kFRENCH_AND, kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_AND, kFRENCH_TRANS[i+10]])
        f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_AND, kFRENCH_TRANS[i+10]])

    for i in one_to_six:
        f.add_arc('2','3', str(i), [kFRENCH_TRANS[i+10]])

    for i in two_to_six:
        f.add_arc('0','4', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('17','4', str(i), [kFRENCH_TRANS[i*10]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[i+10]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20],kFRENCH_TRANS[i+10]])

    for i in singles:
        f.add_arc('4','7', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('0','17', str(i), [kFRENCH_TRANS[i],kFRENCH_TRANS[100]])
        f.add_arc('12','14', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[i]])

    for i in singles_all:
        f.add_arc('0','1', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('19','1', str(i), [kFRENCH_TRANS[i]])

    for i in seven_eight_nine:
        f.add_arc('5','8', str(i), [kFRENCH_TRANS[i]])
        f.add_arc('10','11', str(i), [kFRENCH_TRANS[10], kFRENCH_TRANS[i]])
        f.add_arc('16','20', str(i), [kFRENCH_TRANS[20], kFRENCH_TRANS[10], kFRENCH_TRANS[i]])

    for i in seven:
        f.add_arc('0','10',str(i), [kFRENCH_TRANS[60]])
        f.add_arc('17','10',str(i), [kFRENCH_TRANS[60]])

    for i in eight:
        f.add_arc('0','12',str(i), [kFRENCH_TRANS[4]])
        f.add_arc('17','12',str(i), [kFRENCH_TRANS[4]])

    for i in nine:
        f.add_arc('0','16',str(i), [kFRENCH_TRANS[4]])
        f.add_arc('17','16',str(i), [kFRENCH_TRANS[4]])

    return f
Ejemplo n.º 36
0
	return output_list

if __name__ == '__main__':
	f1 = FST('test-generate')

	# Indicate that '1' is the initial state
	f1.add_state('start')
	f1.add_state('next')
	f1.initial_state = 'start'

	# Set all the final states
	f1.set_final('next')

	# Add the rest of the arcs
	for letter in ['A','B','C','D']:
		f1.add_arc('start', 'next', letter, '1')
		f1.add_arc('next', 'next', letter, '0')

	f2 = FST('test-generate')
	f2.add_state('start')
	f2.add_state('next')
	f2.initial_state = 'start'
	f2.set_final('next')
	
	f2.add_arc('start', 'next', '1', 'a')
	f2.add_arc('start', 'next', '1', 'an')
	f2.add_arc('next', 'next', '0', 'b')

	output = compose(tuple('BAD'), f1, f2)
	print output
	for o in output:
Ejemplo n.º 37
0
def french_count():
    f = FST('french')

    f.add_state('start')
    f.initial_state = 'start'
    f.add_state('1stzero')
    f.add_state('tens')
    f.add_state('seventeen')
    f.add_state('final_seventeen')
    f.add_state('eighteen')
    f.add_state('final_eighteen')
    f.add_state('nineteen')
    f.add_state('final_nineteen')
    f.add_state('zero')
    f.add_state('ones')
    f.add_state('20-69')
    f.add_state('70-ten')
    f.add_state('80s')
    f.add_state('90s')
    f.add_state('100s')
    f.add_state('et')
    f.add_state('10-et')
    f.add_state('et-un')
    f.add_state('et-onze')

    f.set_final('zero')
    f.set_final('ones')
    f.set_final('tens')
    f.set_final('final_seventeen')
    f.set_final('final_eighteen')
    f.set_final('final_nineteen')
    f.set_final('20-69')
    f.set_final('70-ten')
    f.set_final('80s')
    f.set_final('90s')
    f.set_final('et-un')
    f.set_final('et-onze')


# 100 - 999
    f.add_arc('start', '1stzero', '1', [kFRENCH_TRANS[100]])
    for i in range(2, 10):
        f.add_arc('start', '100s', str(i), [kFRENCH_TRANS[i]])

    f.add_arc('100s', '1stzero', (), [kFRENCH_TRANS[100]])


# 0 - 9
    f.add_arc('start', '1stzero', '0', [])
    f.add_arc('1stzero', 'ones', '0', [])
    for ii in range(1, 10):
        f.add_arc('ones', 'ones', str(ii), [kFRENCH_TRANS[ii]])

    f.add_arc('ones', 'ones', '0', [])
    
    # for i in range(10):
    #     f.add_arc('ten-6', 'ten-6', str(i), kFRENCH_TRANS[(i+10])
# 10 - 16
    f.add_arc('1stzero', 'tens', '1', [])
    f.add_arc('tens', 'tens', '0', [kFRENCH_TRANS[10]])
    f.add_arc('tens', 'tens', '1', [kFRENCH_TRANS[11]])
    f.add_arc('tens', 'tens', '2', [kFRENCH_TRANS[12]])
    f.add_arc('tens', 'tens', '3', [kFRENCH_TRANS[13]])
    f.add_arc('tens', 'tens', '4', [kFRENCH_TRANS[14]])
    f.add_arc('tens', 'tens', '5', [kFRENCH_TRANS[15]])
    f.add_arc('tens', 'tens', '6', [kFRENCH_TRANS[16]])

    f.add_arc('tens', 'seventeen', '7', [kFRENCH_TRANS[10]])
    f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]])
    f.add_arc('tens', 'eighteen', '8', [kFRENCH_TRANS[10]])
    f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]])
    f.add_arc('tens', 'nineteen', '9', [kFRENCH_TRANS[10]])
    f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]])

# 20 - 69
    f.add_arc('1stzero', '20-69', '2', [kFRENCH_TRANS[20]])
    f.add_arc('1stzero', '20-69', '3', [kFRENCH_TRANS[30]])
    f.add_arc('1stzero', '20-69', '4', [kFRENCH_TRANS[40]])
    f.add_arc('1stzero', '20-69', '5', [kFRENCH_TRANS[50]])
    f.add_arc('1stzero', '20-69', '6', [kFRENCH_TRANS[60]])

    # special cases:
    for i in range(2, 10):
        f.add_arc('20-69', '20-69', str(i), [kFRENCH_TRANS[i]])

        # handles 20, 30 ... 60
    for i in range(20, 60, 10):
        f.add_arc('20-69', '20-69', '0', [])

        # handles 21, 31, ... 61
    f.add_arc('20-69', 'et', '1', [kFRENCH_AND])
    f.add_arc('et', 'et-un', (),[kFRENCH_TRANS[1]])

# 70 - 79
    f.add_arc('1stzero', '70-ten', '7', [kFRENCH_TRANS[60]])
    f.add_arc('70-ten', '70-ten', '0', [kFRENCH_TRANS[10]])
    # handle 71 here
    f.add_arc('70-ten', '10-et', '1', [kFRENCH_AND])
    f.add_arc('10-et', 'et-onze', (),[kFRENCH_TRANS[11]])
    f.add_arc('70-ten', '70-ten', '2', [kFRENCH_TRANS[12]])
    f.add_arc('70-ten', '70-ten', '3', [kFRENCH_TRANS[13]])
    f.add_arc('70-ten', '70-ten', '4', [kFRENCH_TRANS[14]])
    f.add_arc('70-ten', '70-ten', '5', [kFRENCH_TRANS[15]])
    f.add_arc('70-ten', '70-ten', '6', [kFRENCH_TRANS[16]])
    
    f.add_arc('70-ten', 'seventeen', '7', [kFRENCH_TRANS[10]])
    f.add_arc('seventeen', 'final_seventeen', (), [kFRENCH_TRANS[7]])
    f.add_arc('70-ten', 'eighteen', '8', [kFRENCH_TRANS[10]])
    f.add_arc('eighteen', 'final_eighteen', (), [kFRENCH_TRANS[8]])
    f.add_arc('70-ten', 'nineteen', '9', [kFRENCH_TRANS[10]])
    f.add_arc('nineteen', 'final_nineteen', (), [kFRENCH_TRANS[9]])

# 80 - 89
    f.add_arc('1stzero', '80s', '8', [kFRENCH_TRANS[4]])
    f.add_arc('80s', 'ones', (), [kFRENCH_TRANS[20]])
    f.add_arc('80s', '80s', '0', [kFRENCH_TRANS[20]])

# 90 - 99
    f.add_arc('1stzero', '90s', '9', [kFRENCH_TRANS[4]])
    f.add_arc('90s', 'tens', (), [kFRENCH_TRANS[20]])

    return f
Ejemplo n.º 38
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')
    aeoy = ['a','e','h','i','o','u','w','y']
    one = ['b','f','p','v']
    two = ['c','g','j','k','q','s','x','z']
    three = ['d','t']
    four = ['l'] 
    five = ['m','n']
    six = ['r']

    # Indicate that '1' is the initial state
    f1.add_state('initial')
    f1.add_state('0')
    f1.add_state('1')
    f1.add_state('2')
    f1.add_state('3')
    f1.add_state('4')
    f1.add_state('5')
    f1.add_state('6')
    f1.initial_state = 'initial'

    # Set all the final states
    f1.set_final('0')
    f1.set_final('1')
    f1.set_final('2')
    f1.set_final('3')
    f1.set_final('4')
    f1.set_final('5')
    f1.set_final('6')

    # Add the rest of the arcs
    for letter in string.ascii_letters:
        f1.add_arc('initial','0',(letter),(letter))
        if letter in aeoy:
            f1.add_arc('0','0', (letter), ())
            f1.add_arc('1','0', (letter), ())
            f1.add_arc('2','0', (letter), ())
            f1.add_arc('3','0', (letter), ())
            f1.add_arc('4','0', (letter), ())
            f1.add_arc('5','0', (letter), ())
            f1.add_arc('6','0', (letter), ())
        else:
            if letter in one:
                f1.add_arc('0','1', (letter), '1')
                f1.add_arc('2','1', (letter), '1')
                f1.add_arc('3','1', (letter), '1')
                f1.add_arc('4','1', (letter), '1')
                f1.add_arc('5','1', (letter), '1')
                f1.add_arc('6','1', (letter), '1')
                f1.add_arc('1','0', (letter), ())
            if letter in two:
                f1.add_arc('0','2', (letter), '2')
                f1.add_arc('1','2', (letter), '2')
                f1.add_arc('3','2', (letter), '2')
                f1.add_arc('4','2', (letter), '2')
                f1.add_arc('5','2', (letter), '2')
                f1.add_arc('6','2', (letter), '2')
                f1.add_arc('2','0', (letter), ())
            if letter in three:
                f1.add_arc('0','3', (letter), '3')
                f1.add_arc('1','3', (letter), '3')
                f1.add_arc('2','3', (letter), '3')
                f1.add_arc('4','3', (letter), '3')
                f1.add_arc('5','3', (letter), '3')
                f1.add_arc('6','3', (letter), '3')
                f1.add_arc('3','0', (letter), ())
            if letter in four:
                f1.add_arc('0','4', (letter), '4')
                f1.add_arc('1','4', (letter), '4')
                f1.add_arc('2','4', (letter), '4')
                f1.add_arc('3','4', (letter), '4')
                f1.add_arc('5','4', (letter), '4')
                f1.add_arc('6','4', (letter), '4')
                f1.add_arc('4','0', (letter), ())
            if letter in five:
                f1.add_arc('0','5', (letter), '5')
                f1.add_arc('1','5', (letter), '5')
                f1.add_arc('2','5', (letter), '5')
                f1.add_arc('3','5', (letter), '5')
                f1.add_arc('4','5', (letter), '5')
                f1.add_arc('6','5', (letter), '5')
                f1.add_arc('5','0', (letter), ())
            if letter in six:
                f1.add_arc('0','6', (letter), '6')
                f1.add_arc('1','6', (letter), '6')
                f1.add_arc('2','6', (letter), '6')
                f1.add_arc('3','6', (letter), '6')
                f1.add_arc('4','6', (letter), '6')
                f1.add_arc('5','6', (letter), '6')
                f1.add_arc('6','0', (letter), ())

    return f1
Ejemplo n.º 39
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    f1.add_state('start')
    f1.add_state('1')
    f1.add_state('2')
    f1.add_state('3')
    f1.add_state('4')
    f1.add_state('5')
    f1.add_state('6')
    f1.add_state('v')

    f1.initial_state = 'start'

    # Set all the final states
    f1.set_final('1')
    f1.set_final('2')
    f1.set_final('3')
    f1.set_final('4')
    f1.set_final('5')
    f1.set_final('6')
    f1.set_final('v')

    # Add the rest of the arcs
    #have loops for each of these lists
    list1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V']
    list2 = [
        'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S',
        'X', 'Z'
    ]
    list3 = ['d', 't', 'D', 'T']
    list4 = ['l', 'L']
    list5 = ['m', 'n', 'M', 'N']
    list6 = ['r', 'R']
    listv = [
        'a', 'e', 'i', 'o', 'u', 'h', 'w', 'y', 'A', 'E', 'I', 'O', 'U', 'H',
        'W', 'Y'
    ]

    for letter in list1:
        f1.add_arc('start', '1', (letter), (letter))
        f1.add_arc('1', '1', (letter), ())
        f1.add_arc('2', '1', (letter), '1')
        f1.add_arc('3', '1', (letter), '1')
        f1.add_arc('4', '1', (letter), '1')
        f1.add_arc('5', '1', (letter), '1')
        f1.add_arc('6', '1', (letter), '1')
        f1.add_arc('v', '1', (letter), '1')

    for letter in list2:
        f1.add_arc('start', '2', (letter), (letter))
        f1.add_arc('1', '2', (letter), '2')
        f1.add_arc('2', '2', (letter), ())
        f1.add_arc('3', '2', (letter), '2')
        f1.add_arc('4', '2', (letter), '2')
        f1.add_arc('5', '2', (letter), '2')
        f1.add_arc('6', '2', (letter), '2')
        f1.add_arc('v', '2', (letter), '2')

    for letter in list3:
        f1.add_arc('start', '3', (letter), (letter))
        f1.add_arc('1', '3', (letter), '3')
        f1.add_arc('2', '3', (letter), '3')
        f1.add_arc('3', '3', (letter), ())
        f1.add_arc('4', '3', (letter), '3')
        f1.add_arc('5', '3', (letter), '3')
        f1.add_arc('6', '3', (letter), '3')
        f1.add_arc('v', '3', (letter), '3')

    for letter in list3:
        f1.add_arc('start', '3', (letter), (letter))
        f1.add_arc('1', '3', (letter), '3')
        f1.add_arc('2', '3', (letter), '3')
        f1.add_arc('3', '3', (letter), ())
        f1.add_arc('4', '3', (letter), '3')
        f1.add_arc('5', '3', (letter), '3')
        f1.add_arc('6', '3', (letter), '3')
        f1.add_arc('v', '3', (letter), '3')

    for letter in list4:
        f1.add_arc('start', '4', (letter), (letter))
        f1.add_arc('1', '4', (letter), '4')
        f1.add_arc('2', '4', (letter), '4')
        f1.add_arc('3', '4', (letter), '4')
        f1.add_arc('4', '4', (letter), ())
        f1.add_arc('5', '4', (letter), '4')
        f1.add_arc('6', '4', (letter), '4')
        f1.add_arc('v', '4', (letter), '4')

    for letter in list5:
        f1.add_arc('start', '5', (letter), (letter))
        f1.add_arc('1', '5', (letter), '5')
        f1.add_arc('2', '5', (letter), '5')
        f1.add_arc('3', '5', (letter), '5')
        f1.add_arc('4', '5', (letter), '5')
        f1.add_arc('5', '5', (letter), ())
        f1.add_arc('6', '5', (letter), '5')
        f1.add_arc('v', '5', (letter), '5')

    for letter in list6:
        f1.add_arc('start', '6', (letter), (letter))
        f1.add_arc('1', '6', (letter), '6')
        f1.add_arc('2', '6', (letter), '6')
        f1.add_arc('3', '6', (letter), '6')
        f1.add_arc('4', '6', (letter), '6')
        f1.add_arc('5', '6', (letter), '6')
        f1.add_arc('6', '6', (letter), ())
        f1.add_arc('v', '6', (letter), '6')

    for letter in listv:
        f1.add_arc('start', 'v', (letter), (letter))
        f1.add_arc('1', 'v', (letter), ())
        f1.add_arc('2', 'v', (letter), ())
        f1.add_arc('3', 'v', (letter), ())
        f1.add_arc('4', 'v', (letter), ())
        f1.add_arc('5', 'v', (letter), ())
        f1.add_arc('6', 'v', (letter), ())
        f1.add_arc('v', 'v', (letter), ())

    return f1
Ejemplo n.º 40
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Add all states
    f1.add_state('0')
    f1.add_state('1')
    f1.add_state('2')
    f1.add_state('3')
    f1.add_state('4')
    f1.add_state('5')
    f1.add_state('6')
    f1.add_state('7')
    f1.add_state('2a')
    f1.add_state('3a')
    f1.add_state('4a')
    f1.add_state('5a')
    f1.add_state('6a')
    f1.add_state('7a')

    # Indicate that '0' is the initial state
    f1.initial_state = '0'

    # Set all the final states
    f1.set_final('1')
    f1.set_final('2')
    f1.set_final('3')
    f1.set_final('4')
    f1.set_final('5')
    f1.set_final('6')
    f1.set_final('7')
    f1.set_final('2a')
    f1.set_final('3a')
    f1.set_final('4a')
    f1.set_final('5a')
    f1.set_final('6a')
    f1.set_final('7a')

    # Define all lists
    remove_letters = [
        'a', 'e', 'h', 'i', 'o', 'u', 'w', 'y', 'A', 'E', 'H', 'I', 'O', 'U',
        'W', 'Y'
    ]
    g1 = ['b', 'f', 'p', 'v', 'B', 'F', 'P', 'V']
    g2 = [
        'c', 'g', 'j', 'k', 'q', 's', 'x', 'z', 'C', 'G', 'J', 'K', 'Q', 'S',
        'X', 'Z'
    ]
    g3 = ['d', 't', 'D', 'T']
    g4 = ['l', 'L']
    g5 = ['m', 'n', 'M', 'N']
    g6 = ['r', 'R']

    # Add the rest of the arcs
    for letter in string.ascii_letters:
        # Retain the first character
        if letter in g1:
            f1.add_arc('0', '2a', (letter), (letter))
            f1.add_arc('2a', '2', (letter), ())
            f1.add_arc('3a', '2', (letter), ('1'))
            f1.add_arc('4a', '2', (letter), ('1'))
            f1.add_arc('5a', '2', (letter), ('1'))
            f1.add_arc('6a', '2', (letter), ('1'))
            f1.add_arc('7a', '2', (letter), ('1'))
            f1.add_arc('1', '2', (letter), ('1'))
            f1.add_arc('2', '2', (letter), ())
            f1.add_arc('3', '2', (letter), ('1'))
            f1.add_arc('4', '2', (letter), ('1'))
            f1.add_arc('5', '2', (letter), ('1'))
            f1.add_arc('6', '2', (letter), ('1'))
            f1.add_arc('7', '2', (letter), ('1'))

        if letter in g2:
            f1.add_arc('0', '3a', (letter), (letter))
            f1.add_arc('3a', '3', (letter), ())
            f1.add_arc('2a', '3', (letter), ('2'))
            f1.add_arc('4a', '3', (letter), ('2'))
            f1.add_arc('5a', '3', (letter), ('2'))
            f1.add_arc('6a', '3', (letter), ('2'))
            f1.add_arc('7a', '3', (letter), ('2'))
            f1.add_arc('1', '3', (letter), ('2'))
            f1.add_arc('3', '3', (letter), ())
            f1.add_arc('2', '3', (letter), ('2'))
            f1.add_arc('4', '3', (letter), ('2'))
            f1.add_arc('5', '3', (letter), ('2'))
            f1.add_arc('6', '3', (letter), ('2'))
            f1.add_arc('7', '3', (letter), ('2'))

        if letter in g3:
            f1.add_arc('0', '4a', (letter), (letter))
            f1.add_arc('4a', '4', (letter), ())
            f1.add_arc('2a', '4', (letter), ('3'))
            f1.add_arc('3a', '4', (letter), ('3'))
            f1.add_arc('5a', '4', (letter), ('3'))
            f1.add_arc('6a', '4', (letter), ('3'))
            f1.add_arc('7a', '4', (letter), ('3'))
            f1.add_arc('1', '4', (letter), ('3'))
            f1.add_arc('4', '4', (letter), ())
            f1.add_arc('2', '4', (letter), ('3'))
            f1.add_arc('3', '4', (letter), ('3'))
            f1.add_arc('5', '4', (letter), ('3'))
            f1.add_arc('6', '4', (letter), ('3'))
            f1.add_arc('7', '4', (letter), ('3'))

        if letter in g4:
            f1.add_arc('0', '5a', (letter), (letter))
            f1.add_arc('5a', '5', (letter), ())
            f1.add_arc('2a', '5', (letter), ('4'))
            f1.add_arc('3a', '5', (letter), ('4'))
            f1.add_arc('4a', '5', (letter), ('4'))
            f1.add_arc('6a', '5', (letter), ('4'))
            f1.add_arc('7a', '5', (letter), ('4'))
            f1.add_arc('1', '5', (letter), ('4'))
            f1.add_arc('5', '5', (letter), ())
            f1.add_arc('2', '5', (letter), ('4'))
            f1.add_arc('3', '5', (letter), ('4'))
            f1.add_arc('4', '5', (letter), ('4'))
            f1.add_arc('6', '5', (letter), ('4'))
            f1.add_arc('7', '5', (letter), ('4'))

        if letter in g5:
            f1.add_arc('0', '6a', (letter), (letter))
            f1.add_arc('6a', '6', (letter), ())
            f1.add_arc('2a', '6', (letter), ('5'))
            f1.add_arc('3a', '6', (letter), ('5'))
            f1.add_arc('4a', '6', (letter), ('5'))
            f1.add_arc('5a', '6', (letter), ('5'))
            f1.add_arc('7a', '6', (letter), ('5'))
            f1.add_arc('1', '6', (letter), ('5'))
            f1.add_arc('6', '6', (letter), ())
            f1.add_arc('2', '6', (letter), ('5'))
            f1.add_arc('3', '6', (letter), ('5'))
            f1.add_arc('4', '6', (letter), ('5'))
            f1.add_arc('5', '6', (letter), ('5'))
            f1.add_arc('7', '6', (letter), ('5'))

        if letter in g6:
            f1.add_arc('0', '7a', (letter), (letter))
            f1.add_arc('7a', '7', (letter), ())
            f1.add_arc('2a', '7', (letter), ('6'))
            f1.add_arc('3a', '7', (letter), ('6'))
            f1.add_arc('4a', '7', (letter), ('6'))
            f1.add_arc('5a', '7', (letter), ('6'))
            f1.add_arc('6a', '7', (letter), ('6'))
            f1.add_arc('1', '7', (letter), ('6'))
            f1.add_arc('7', '7', (letter), ())
            f1.add_arc('2', '7', (letter), ('6'))
            f1.add_arc('3', '7', (letter), ('6'))
            f1.add_arc('4', '7', (letter), ('6'))
            f1.add_arc('5', '7', (letter), ('6'))
            f1.add_arc('6', '7', (letter), ('6'))

        # Remove letters
        if letter in remove_letters:
            f1.add_arc('0', '1', (letter), (letter))
            f1.add_arc('1', '1', (letter), ())
            f1.add_arc('2a', '1', (letter), ())
            f1.add_arc('3a', '1', (letter), ())
            f1.add_arc('4a', '1', (letter), ())
            f1.add_arc('5a', '1', (letter), ())
            f1.add_arc('6a', '1', (letter), ())
            f1.add_arc('7a', '1', (letter), ())
            f1.add_arc('2', '1', (letter), ())
            f1.add_arc('3', '1', (letter), ())
            f1.add_arc('4', '1', (letter), ())
            f1.add_arc('5', '1', (letter), ())
            f1.add_arc('6', '1', (letter), ())
            f1.add_arc('7', '1', (letter), ())

    return f1
Ejemplo n.º 41
0
def letters_to_numbers():
    """
    Returns an FST that converts letters to numbers as specified by
    the soundex algorithm
    """

    # Let's define our first FST
    f1 = FST('soundex-generate')

    # Indicate that '1' is the initial state
    states = ['q1', 'q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']
    for state in states:
        f1.add_state(state)

    f1.initial_state = 'q1'

    # Set all the final states
    for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
        f1.set_final(state)

    # Add the rest of the arcs
    for letter in string.ascii_lowercase:
        f1.add_arc('q1', 'q2', (letter), (letter))
        if letter in set('aehiouwy'):
            for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5', 'n6']:
                f1.add_arc(state, state, (letter), ())
        else:
            if letter in set('bfpv'):
                for state in ['q2', 'n2', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n1', (letter), ('1'))
                f1.add_arc('n1', 'n1', (letter), ())
            elif letter in set('cgjkqsxz'):
                for state in ['q2', 'n1', 'n3', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n2', (letter), ('2'))
                f1.add_arc('n2', 'n2', (letter), ())
            elif letter in set('dt'):
                for state in ['q2', 'n1', 'n2', 'n4', 'n5', 'n6']:
                    f1.add_arc(state, 'n3', (letter), ('3'))
                f1.add_arc('n3', 'n3', (letter), ())
            elif letter in set('l'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n5', 'n6']:
                    f1.add_arc(state, 'n4', (letter), ('4'))
                f1.add_arc('n4', 'n4', (letter), ())
            elif letter in set('mn'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n6']:
                    f1.add_arc(state, 'n5', (letter), ('5'))
                f1.add_arc('n5', 'n5', (letter), ())
            elif letter in set('r'):
                for state in ['q2', 'n1', 'n2', 'n3', 'n4', 'n5']:
                    f1.add_arc(state, 'n6', (letter), ('6'))
                f1.add_arc('n6', 'n6', (letter), ())
    return f1
def french_count():
    f = FST('french')

    f.add_state('1')
    f.add_state('2')
    f.add_state('3')
    f.add_state('4')
    f.add_state('5')
    f.add_state('6')
    f.add_state('7')
    f.add_state('8')
    f.add_state('9')
    f.add_state('10')
    f.initial_state = '1'
    f.set_final('4')

    #hundred's place
    for i in range(1):
        f.add_arc('1','2',[str(i)],())
    for i in range(1,2):
        f.add_arc('1','9',[str(i)],[kFRENCH_TRANS[100]])
    for i in range(2,10):
        f.add_arc('1','9',[str(i)],[kFRENCH_TRANS[i]]+[kFRENCH_TRANS[100]])
    
    #ten's place when hundred's place was 0   
    for i in range (0,10):
        if i==0:
            f.add_arc('2','3',[str(i)],())
        if i==1:
            f.add_arc('2','5',[str(i)],())
        if i>1 and i<7:
            f.add_arc('2','6',[str(i)],[kFRENCH_TRANS[i*10]])
        if i==7:
            f.add_arc('2','7',[str(i)],[kFRENCH_TRANS[60]])
        if i==8:
            f.add_arc('2','8',[str(i)],[kFRENCH_TRANS[4]]+ [kFRENCH_TRANS[20]])
        if i==9:
            f.add_arc('2','5',[str(i)],[kFRENCH_TRANS[4]]+ [kFRENCH_TRANS[20]])
    
    #ten's place when hundred's place was 1-9
    for i in range (0,10):
        if i==0:
            f.add_arc('9','10',[str(i)],())
        if i==1:
            f.add_arc('9','5',[str(i)],())
        if i>1 and i<7:
            f.add_arc('9','6',[str(i)],[kFRENCH_TRANS[i*10]])
        if i==7:
            f.add_arc('9','7',[str(i)],[kFRENCH_TRANS[60]])
        if i==8:
            f.add_arc('9','8',[str(i)],[kFRENCH_TRANS[4]]+ [kFRENCH_TRANS[20]])
        if i==9:
            f.add_arc('9','5',[str(i)],[kFRENCH_TRANS[4]]+ [kFRENCH_TRANS[20]])

    #one's place
    #state 10-->4
    for ii in xrange(0,10):
        if ii==0:
            f.add_arc('10', '4', [str(ii)], ())
        else:
            f.add_arc('10', '4', [str(ii)], [kFRENCH_TRANS[ii]])
    
    #state 3-->4
    for ii in xrange(0,10):
        f.add_arc('3', '4', [str(ii)], [kFRENCH_TRANS[ii]])
        
    #state 5-->4 
    for i in range(0,10):
        if i<7:
            f.add_arc('5','4',[str(i)],[kFRENCH_TRANS[10+i]])
        else:
            f.add_arc('5','4',[str(i)],[kFRENCH_TRANS[10]]+[kFRENCH_TRANS[i]])
    
    #state 6-->4
    for i in range(0,10):
        if i==0:
            f.add_arc('6','4',[str(i)],())
        if i==1:
            f.add_arc('6','4',[str(i)],[kFRENCH_AND]+[kFRENCH_TRANS[i]])
        if i>1:
            f.add_arc('6','4',[str(i)],[kFRENCH_TRANS[i]])
    #state 7-->4
    for i in range(0,10):
        if i==1:
            f.add_arc('7','4',[str(i)],[kFRENCH_AND]+[kFRENCH_TRANS[10+i]])
        elif i>1 and i<7:
            f.add_arc('7','4',[str(i)],[kFRENCH_TRANS[10+i]])
        elif i==0:
            f.add_arc('7','4',[str(i)],[kFRENCH_TRANS[10+i]])
        else:
            f.add_arc('7','4',[str(i)],[kFRENCH_TRANS[10]]+[kFRENCH_TRANS[i]])
    
    #state 8-->4
    for ii in xrange(0,10):
        if ii==0:
            f.add_arc('8', '4', [str(ii)], ())
        else:
            f.add_arc('8', '4', [str(ii)], [kFRENCH_TRANS[ii]])
    
    
    

    return f
Ejemplo n.º 43
0
def french_count():
    f = FST('french')

    f.add_state('start')
    # one number and two trailing unknowns
    f.add_state('n**')
    # exception from state n**
    f.add_state('n**+')
    # two numbers and one trailing unknown
    f.add_state('nn*')
    # zero and two uknown digits trailing and so on
    f.add_state('0**')
    f.add_state('00*')
    f.add_state('00n')
    f.add_state('0n*')
    f.add_state('0n*+')
    f.add_state('0nn')
    f.add_state('n00')
    f.add_state('nnn')
    f.add_state('nnn*')
    f.add_state('*et*')
    # vegasimal counting for 7 in ((0/n)n*)
    f.add_state('0n*Vega7+')
    f.add_state('0n*Vega7')
    f.add_state('0nnVega7')
    # vegasimal counting for 8 in ((0/n)n*)
    f.add_state('0n*Vega8')
    f.add_state('0n*Vega8+')
    f.add_state('0nnVega8')
    # vegasimal counting for 9 in ((0/n)n*)
    f.add_state('0n*Vega9')
    f.add_state('0n*Vega9+')
    f.add_state('0n*Vega9++')
    f.add_state('0nnVega9')

    # set final states
    f.set_final('00n')
    f.set_final('0nn')
    f.set_final('nnn')
    f.set_final('n00')
    f.set_final('0nnVega7')
    f.set_final('0nnVega8')
    f.set_final('0nnVega9')

    # initial state
    f.initial_state = 'start'
    # remove initial zeroes
    f.add_arc('start', '0**', '0', ())
    f.add_arc('0**', '00*', '0', ())
    
    for ii in xrange(10):
        #from '0n*Vega8' to '0nnVega8
        if ii != 0:
            f.add_arc('0n*Vega8+', '0nnVega8', str(ii), [kFRENCH_TRANS[ii]])
        elif ii == 0:
            f.add_arc('0n*Vega8+', '0nnVega8', str(ii), ())
        #from '0n*Vega7' to '0nnVega7' 7-9
        if ii == 0 or ii == 7 or ii ==8 or ii == 9:
            f.add_arc('0n*Vega7', '0n*Vega7+', (), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega7+', '0n*Vega7+', str(ii), [kFRENCH_TRANS[ii]])
            #
            f.add_arc('0n*Vega9+', '0n*Vega9++', (), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega9++', '0nnVega9', str(ii), [kFRENCH_TRANS[ii]])
    
            if ii == 0:
                f.add_arc('0n*Vega7+', '0nnVega7', '0', ())
                f.add_arc('0n*Vega9++', '0nnVega9', '0', ())
                
            elif ii == 7 or ii == 8 or ii == 9:
                f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii]])
        #from '0n*Vega' to '0nnVega' 2-6
        if ii == 2 or ii == 3 or ii ==4 or ii == 5 or ii == 6:
            f.add_arc('0n*Vega7', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]])
            f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]])
        if ii == 1:
            f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[10]])
            f.add_arc('n**','0n*', str(ii), [kFRENCH_TRANS[10]])
            f.add_arc('0n*Vega7', '0n*Vega7+', str(ii), [kFRENCH_AND])
            f.add_arc('0n*Vega7+', '0nnVega7', str(ii), [kFRENCH_TRANS[ii+10]])
            f.add_arc('0n*Vega9+', '0nnVega9', str(ii), [kFRENCH_TRANS[ii+10]])
            
        #from '00*' to '00n'
        f.add_arc('00*', '00n', str(ii), [kFRENCH_TRANS[ii]])
        #from '*n*' to '*nn' 2-9
        if ii != 0 and ii !=9:
            f.add_arc('0n*','0nn', str(ii+1), [kFRENCH_TRANS[ii+1]])
            f.add_arc('0n*+','0nn', str(ii), [kFRENCH_TRANS[ii]])
        #from 'start' to 'nnn' 200,300,...,900
        if ii != 0 and ii !=1:
            f.add_arc('start','n**+', str(ii), [kFRENCH_TRANS[ii]])
            f.add_arc('n**+', 'n**', (), [kFRENCH_TRANS[100]])
        #from 'n**' to 'n0*' 0
        if ii == 0:
            f.add_arc('n**', 'n00', '00', ())
        if ii == 1:
            f.add_arc('start', 'n**', '1', [kFRENCH_TRANS[100]])

        
    #from '*n*' to '*et*' 1
    f.add_arc('0n*','*et*', '1', [kFRENCH_AND])
    #from '*et*' to '*nn' 1
    f.add_arc('*et*','0nn', (), [kFRENCH_TRANS[1]])
    #from '0**' to '*nn' 10-16 
    for ii in xrange(10,17):
        f.add_arc('0**','0nn', str(ii), [kFRENCH_TRANS[ii]])
        f.add_arc('n**','0nn', str(ii), [kFRENCH_TRANS[ii]])
    #from '0**' to '*nn' 20-60
    for ii in xrange(2,7):
        f.add_arc('0**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]])
        f.add_arc('n**', '0nn', str(ii*10), [kFRENCH_TRANS[ii*10]])
        
        #from '0**', to *n*
        f.add_arc('0**','0n*', str(ii), [kFRENCH_TRANS[ii*10]])
        #from 'n**' to '0n*'
        f.add_arc('n**', '0n*+', str(ii), [kFRENCH_TRANS[ii*10]])
    for ii in xrange(7,10):
        if ii == 7:
            f.add_arc('0**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]])
            f.add_arc('n**', '0n*Vega7', str(ii), [kFRENCH_TRANS[60]])
        elif ii == 8:
            f.add_arc('0**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('n**', '0n*Vega8', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('0n*Vega8', '0n*Vega8+', (), [kFRENCH_TRANS[20]])
        elif ii == 9:
            f.add_arc('0**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('n**', '0n*Vega9', str(ii), [kFRENCH_TRANS[4]])
            f.add_arc('0n*Vega9', '0n*Vega9+', (), [kFRENCH_TRANS[20]])
            
    f.add_arc('n**', '0n*+', '0', ())

    return f
Ejemplo n.º 44
0
def french_count():

    f = FST('french')

    f.add_state('start')

    #states
    f.add_state('dig1zero')
    f.add_state('dig2zero')
    f.add_state('f_dig3')
    f.add_state('dig2_one')
    f.add_state('state5')
    f.add_state('state6')
    f.add_state('dig2_two')
    f.add_state('dig2_three')
    f.add_state('dig2_four')
    f.add_state('dig2_five')
    f.add_state('dig2_six')
    f.add_state('dig2_sev')
    f.add_state('dig2_eig')
    f.add_state('dig2_nine')
    f.add_state('dig1_nzero')
    f.add_state('sec_last')
    f.add_state('last')
    f.add_state('p1')
    f.add_state('p2')
    f.add_state('p3')

    f.initial_state = 'start'
    f.set_final('start')
    f.set_final('f_dig3')
    f.set_final('state5')
    f.set_final('state6')
    f.set_final('dig2_two')
    f.set_final('dig2_three')
    f.set_final('dig2_four')
    f.set_final('dig2_five')
    f.set_final('dig2_six')
    f.set_final('dig2_sev')
    f.set_final('dig2_eig')
    f.set_final('dig2_nine')
    f.set_final('last')
    f.set_final('p1')
    f.set_final('p2')
    f.set_final('p3')
    f.set_final('sec_last')

    # case for 09X
    f.add_arc('dig1zero', 'dig2_nine', '9', ())
    f.add_arc('dig2_nine', 'dig2_nine', '0',
              [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[10]])
    for k in range(1, 7):
        f.add_arc('dig2_nine', 'dig2_nine', str(k), [kFRENCH_TRANS[4]] +
                  [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[k + 10]])
    for k in range(7, 10):
        f.add_arc('dig2_nine', 'dig2_nine', str(k),
                  [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]] +
                  [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[k]])

    # case for 08X
    f.add_arc('dig1zero', 'dig2_eig', '8', ())
    f.add_arc('dig2_eig', 'dig2_eig', '0',
              [kFRENCH_TRANS[4]] + [kFRENCH_TRANS[20]])
    for j in range(1, 10):
        f.add_arc('dig2_eig', 'dig2_eig', str(j), [kFRENCH_TRANS[4]] +
                  [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[j]])

    # case for 07X
    f.add_arc('dig1zero', 'dig2_sev', '7', ())
    f.add_arc('dig2_sev', 'dig2_sev', '0',
              [kFRENCH_TRANS[60]] + [kFRENCH_TRANS[10]])
    f.add_arc('dig2_sev', 'dig2_sev', '1',
              [kFRENCH_TRANS[60]] + [kFRENCH_AND] + [kFRENCH_TRANS[11]])
    for k in range(2, 7):
        f.add_arc('dig2_sev', 'dig2_sev', str(k),
                  [kFRENCH_TRANS[60]] + [kFRENCH_TRANS[k + 10]])
    for k in range(7, 10):
        f.add_arc('dig2_sev', 'dig2_sev', str(k), [kFRENCH_TRANS[60]] +
                  [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[k]])

    #00X case in french
    f.add_arc('start', 'dig1zero', '0', ())
    f.add_arc('dig1zero', 'dig2zero', '0', ())
    for ii in range(10):
        f.add_arc('dig2zero', 'f_dig3', [str(ii)], [kFRENCH_TRANS[ii]])

    #case for 02X
    f.add_arc('dig1zero', 'dig2_two', '2', ())
    f.add_arc('dig2_two', 'dig2_two', '0', [kFRENCH_TRANS[20]])
    f.add_arc('dig2_two', 'dig2_two', '1',
              [kFRENCH_TRANS[20]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]])
    for j in range(2, 10):
        f.add_arc('dig2_two', 'dig2_two', str(j),
                  [kFRENCH_TRANS[20]] + [kFRENCH_TRANS[j]])

    #01X case
    f.add_arc('dig1zero', 'dig2_one', '1', ())
    for j in range(7):
        f.add_arc('dig2_one', 'state5', [str(j)], [kFRENCH_TRANS[j + 10]])
    for j in range(7, 10):
        f.add_arc('dig2_one', 'state6', [str(j)],
                  [kFRENCH_TRANS[10]] + [kFRENCH_TRANS[j]])

    # case for 04X
    f.add_arc('dig1zero', 'dig2_four', '4', ())
    f.add_arc('dig2_four', 'dig2_four', '0', [kFRENCH_TRANS[40]])
    f.add_arc('dig2_four', 'dig2_four', '1',
              [kFRENCH_TRANS[40]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]])
    for j in range(2, 10):
        f.add_arc('dig2_four', 'dig2_four', str(j),
                  [kFRENCH_TRANS[40]] + [kFRENCH_TRANS[j]])

    # case for 03X
    f.add_arc('dig1zero', 'dig2_three', '3', ())
    f.add_arc('dig2_three', 'dig2_three', '0', [kFRENCH_TRANS[30]])
    f.add_arc('dig2_three', 'dig2_three', '1',
              [kFRENCH_TRANS[30]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]])
    for j in range(2, 10):
        f.add_arc('dig2_three', 'dig2_three', str(j),
                  [kFRENCH_TRANS[30]] + [kFRENCH_TRANS[j]])

    # case for 05X
    f.add_arc('dig1zero', 'dig2_five', '5', ())
    f.add_arc('dig2_five', 'dig2_five', '0', [kFRENCH_TRANS[50]])
    f.add_arc('dig2_five', 'dig2_five', '1',
              [kFRENCH_TRANS[50]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]])
    for k in range(2, 10):
        f.add_arc('dig2_five', 'dig2_five', str(k),
                  [kFRENCH_TRANS[50]] + [kFRENCH_TRANS[k]])

    # case for 06X
    f.add_arc('dig1zero', 'dig2_six', '6', ())
    f.add_arc('dig2_six', 'dig2_six', '0', [kFRENCH_TRANS[60]])
    f.add_arc('dig2_six', 'dig2_six', '1',
              [kFRENCH_TRANS[60]] + [kFRENCH_AND] + [kFRENCH_TRANS[1]])
    for k in range(2, 10):
        f.add_arc('dig2_six', 'dig2_six', str(k),
                  [kFRENCH_TRANS[60]] + [kFRENCH_TRANS[k]])

    f.add_arc('start', 'dig1_nzero', '1', [kFRENCH_TRANS[100]])
    for j in range(2, 10):
        f.add_arc('start', 'dig1_nzero', str(j),
                  [kFRENCH_TRANS[j]] + [kFRENCH_TRANS[100]])

    for i in range(1, 10):
        f.add_arc('sec_last', 'sec_last', str(i), [kFRENCH_TRANS[i]])

    f.add_arc('dig1_nzero', 'dig2_six', '6', ())
    f.add_arc('dig1_nzero', 'dig2_sev', '7', ())
    f.add_arc('dig1_nzero', 'dig2_eig', '8', ())
    f.add_arc('dig1_nzero', 'dig2_nine', '9', ())
    f.add_arc('dig1_nzero', 'sec_last', '0', ())
    f.add_arc('dig1_nzero', 'dig2_one', '1', ())
    f.add_arc('dig1_nzero', 'dig2_two', '2', ())
    f.add_arc('dig1_nzero', 'dig2_three', '3', ())
    f.add_arc('dig1_nzero', 'dig2_four', '4', ())
    f.add_arc('dig1_nzero', 'dig2_five', '5', ())
    f.add_arc('sec_last', 'last', '0', ())

    return f