Ejemplos de FST en Python, ejemplos de fst.FST en Python

Ejemplo n.º 1

0

Mostrar archivo

 def __init__(self):
     self.old_train = list(open("data/train.old"))
     self.new_train = list(open("data/train.new"))
     self.old_test = list(open("data/test.old"))
     self.new_test = list(open("data/test.new"))
     self.num_lines = len(self.old_test) # used for the status bar
     self.fst_m = fst.FST() # composition of lm and tm

Ejemplo n.º 2

0

Mostrar archivo

Archivo: fst_wrapper.py Proyecto: annkeenan/nlp-homework

def get_fst_mtm(old_data, new_data, initialize=True):
    m = fst.FST()
    m.set_start("q0")
    # get the old and modern alphabets from the traning data
    for old_line in old_data:
        for w in old_line:
            output_alphabet.add(w)
    for new_line in new_data:
        for w in new_line:
            input_alphabet.add(w)
    # generate the typo model
    for output_w in output_alphabet:
        m.add_transition(fst.Transition("q0", (fst.EPSILON, output_w), "q0")) #insert
    for input_w in input_alphabet:
        m.add_transition(fst.Transition("q0", (input_w, fst.EPSILON), "q1")) #delete
        for output_w in output_alphabet: # substitute
            m.add_transition(fst.Transition("q1", (input_w, output_w), "q0"))
            m.add_transition(fst.Transition("q0", (input_w, output_w), "q0"))
    # add terminal transitions
    m.add_transition(fst.Transition("q0", (fst.STOP, fst.STOP), "q2"))
    m.add_transition(fst.Transition("q1", (fst.STOP, fst.STOP), "q2"))
    m.set_accept("q2")
    # initialize the weights
    if initialize:
        for state in m.states:
            for transition in m.transitions_from[state].keys():
                # higher probability if going to the same character
                if transition.a[0] == transition.a[1]:
                    m.reweight_transition(transition, 100)
                else:
                    m.reweight_transition(transition, 1)
    m.normalize_cond()
    return m

Ejemplo n.º 3

0

Mostrar archivo

def make_tm(t, testfile):
    tm = fst.FST()
    tm.set_start('q0')
    tm.set_accept('q1')
    tm.add_transition(fst.Transition('q0', (fst.STOP, fst.STOP), 'q1'))
    known_words = set()
    # Store the transitions in a new format
    top_trans = defaultdict(dict)
    for trans, prob in t.items():
        if trans[1] == '∅':
            top_trans[trans[0]][fst.EPSILON] = prob
        else:
            top_trans[trans[0]][trans[1]] = prob
        known_words.add(trans[0])
    # Find and insert the top 10 translations
    for fw, trans in top_trans.items():
        for i, (ew, prob) in enumerate(
                sorted(trans.items(), key=operator.itemgetter(1),
                       reverse=True)):
            if i > 10:
                break
            tm.add_transition(fst.Transition('q0', (fw, ew), 'q0'), prob)
    # Add unknown words from the test data
    with open(testfile) as f:
        prob = math.pow(10, -100)
        for line in f:
            for w in line.rstrip().split():
                if w not in known_words:
                    tm.add_transition(
                        fst.Transition('q0', (w, fst.EPSILON), 'q0'), prob)
    return tm

Ejemplo n.º 4

0

Mostrar archivo

 def __init__(self):
     self.old_train = list(open("data/train.old"))
     self.new_train = list(open("data/train.new"))
     self.old_test = list(open("data/test.old"))
     self.new_test = list(open("data/test.new"))
     self.fst_mtm = fst.FST()  # typo model
     self.fst_mlm = fst.make_ngram(self.new_train, 2)  # language model
     self.num_lines = 0  # used for the status bar

Ejemplo n.º 5

0

Mostrar archivo

Archivo: morphology.py Proyecto: jiaeyan/coursework_114_foundamentals_of_compling

	def generate(self, analysis):
		"""Generate the morphologically correct word 

		e.g.
		p = Parser()
		analysis = ['p','a','n','i','c','+past form']
		p.generate(analysis) 
		---> 'panicked'
		"""
		f1=fst.FST('word_generator')
		for i in range(1,34):
			f1.add_state(str(i))
		f1.initial_state = '1'	
		f1.add_arc('1','2','w','w')
		f1.add_arc('2','3','a','a')
		f1.add_arc('3','4','n','n')
		f1.add_arc('4','5','t','t')
		f1.add_arc('5','6','+past form','ed')
		f1.add_arc('5','7','+present participle','ing')
		f1.add_arc('1','8','s','s')
		f1.add_arc('8','9','y','y')
		f1.add_arc('9','10','n','n')
		f1.add_arc('10','11','c','c')
		f1.add_arc('11','12','+past form','ed')
		f1.add_arc('11','13','+present participle','ing')
		f1.add_arc('1','14','p','p')
		f1.add_arc('14','15','a','a')
		f1.add_arc('15','16','n','n')
		f1.add_arc('16','17','i','i')
		f1.add_arc('17','18','c','c')
		f1.add_arc('18','19','+past form','ked')
		f1.add_arc('18','20','+present participle','king')
		f1.add_arc('1','21','h','h')
		f1.add_arc('21','22','a','a')
		f1.add_arc('22','23','v','v')
		f1.add_arc('23','24','o','o')
		f1.add_arc('24','25','c','c')
		f1.add_arc('25','26','+past form','ked')
		f1.add_arc('25','27','+present participle','king')
		f1.add_arc('1','28','l','l')
		f1.add_arc('28','29','i','i')
		f1.add_arc('29','30','c','c')
		f1.add_arc('30','31','k','k')
		f1.add_arc('31','32','+past form','ed')
		f1.add_arc('31','33','+present participle','ing')
		f1.set_final('6')
		f1.set_final('7')
		f1.set_final('12')
		f1.set_final('13')
		f1.set_final('19')
		f1.set_final('20')
		f1.set_final('26')
		f1.set_final('27')
		f1.set_final('32')
		f1.set_final('33')
		result=''.join(f1.transduce(analysis)[0])
		return result

Ejemplo n.º 6

0

Mostrar archivo

def make_fm(f):
    fm = fst.FST()
    fm.set_start(0)
    for i, fw in enumerate(f):
        fm.add_transition(fst.Transition(i, (fw, fw), i + 1))
    fm.add_transition(
        fst.Transition(len(fs), (fst.STOP, fst.STOP),
                       len(fs) + 1))
    fm.set_accept(len(f) + 1)
    return fm

Ejemplo n.º 7

0

Mostrar archivo

Archivo: fst_wrapper.py Proyecto: annkeenan/nlp-homework

def get_fst_mw(word):
    m = fst.FST()
    m.set_start("q0")
    n = 1
    for w in word:
        m.add_transition(fst.Transition("q"+str(n-1), (w, w), "q"+str(n)))
        n += 1
    m.add_transition(fst.Transition("q"+str(n-1), (fst.STOP, fst.STOP), "q"+str(n)))
    m.set_accept("q"+str(n))
    return m

Ejemplo n.º 8

0

Mostrar archivo

Archivo: monotone.py Proyecto: amunch/Language-to-Latex

def make_f(f):
    # Adapted from Homework 2 Solutions
    f = f.split()
    m = fst.FST()
    m.set_start(0)
    for (i,a) in enumerate(f):
        m.add_transition(fst.Transition(i, (a, a), i+1))
    m.add_transition(fst.Transition(len(f), (fst.STOP, fst.STOP), len(f)+1))
    m.set_accept(len(f)+1)
    return m

Ejemplo n.º 9

0

Mostrar archivo

def make_kneserney(data, n):
    """Create a Kneser-Ney smoothed language model of order `n`, 
    trained on `data`, as a `FST`.

    Note that the returned FST has epsilon transitions. To iterate
    over states in topological order, sort them using `lambda q:
    -len(q)` as the key.
    """

    # Estimate KN-smoothed models for orders 1, ..., n
    kn = {}
    for i in range(1, n + 1):
        kn[i] = KneserNey(data, i)

    # Create the FST. It has a state for every possible k-gram for k = 0, ..., n-1.
    m = fst.FST()
    m.set_start(("<s>", ) * (n - 1))
    m.set_accept(("</s>", ))

    for i in range(1, n + 1):
        for u in kn[i]._prob:
            if i > 1:
                # Add an epsilon transition that backs off from the i-gram model to the (i-1)-gram model
                m.add_transition(
                    fst.Transition(u, (fst.EPSILON, fst.EPSILON), u[1:]),
                    kn[i]._bow[u])
            else:
                # Smooth 1-gram model with uniform distribution
                types = len(kn[i]._prob[u]) + 1
                for w in kn[i]._prob[u]:
                    m.add_transition(fst.Transition(u, (w, w), (w, )),
                                     1 / types)
                m.add_transition(fst.Transition(u, ("<unk>", "<unk>"), ()),
                                 1 / types)

            # Create transitions for word probabilities
            for w in kn[i]._prob[u]:
                # If we are in state u and read w, then v is the new state.
                # This should be the longest suffix of uw that is observed
                # in the training data.
                if w == "</s>":
                    v = ("</s>", )
                else:
                    v = u + (w, )
                    while len(v) > 0 and (len(v) >= n
                                          or v not in kn[len(v) + 1]._prob):
                        v = v[1:]
                m.add_transition(fst.Transition(u, (w, w), v),
                                 kn[i]._prob[u][w])
    return m

Ejemplo n.º 10

0

Mostrar archivo

Archivo: true_model2.py Proyecto: annkeenan/nlp-homework

def make_tm(t, testfile):
    tm = fst.FST()
    tm.set_start('q0')
    tm.set_accept('q1')
    tm.add_transition(fst.Transition('q0', (fst.STOP, fst.STOP), 'q1'))
    known_words = set()
    for trans, prob in t.items():
        known_words.add(trans[0])
        if trans[1] == '∅':
            trans = (trans[0], fst.EPSILON)
        tm.add_transition(fst.Transition('q0', trans, 'q0'), prob)
    # Add unknown words from the test data
    with open(testfile) as f:
        prob = math.pow(10, -100)
        for line in f:
            for w in line.rstrip().split():
                if w not in known_words:
                    tm.add_transition(
                        fst.Transition('q0', (w, fst.EPSILON), 'q0'), prob)
    return tm

Ejemplo n.º 11

0

Mostrar archivo

def tAutomata():
    f = fst.FST('epsilon_test')

    f.add_state('start')
    f.initial_state = 'start'
    f.add_state('1_state')
    f.add_state('ep_final_state')

    f.add_state('EPSILON_Intermediate')

    #f.set_final('start')
    f.set_final('1_state')
    f.set_final('ep_final_state')

    f.add_arc('start', 'EPSILON_Intermediate', [], ['ep_path'])

    f.add_arc('EPSILON_Intermediate', 'ep_final_state', ['E'], ['ep_to_Final'])

    f.add_arc('start', '1_state', ['1'], ['1_path'])

    #print f
    return f

Ejemplo n.º 12

0

Mostrar archivo

Archivo: monotone.py Proyecto: amunch/Language-to-Latex

def make_TM():
    # Code adapted from Homework 2 Solution
    translations = read_translations()
    
    tm = fst.FST()
    tm.set_start(0)
    tm.set_accept(1)
    
    tm.add_transition(fst.Transition(0, ("</s>", "</s>"), 1), wt=1)

    for t in translations:
        for prob in translations[t]:
            tm.add_transition(fst.Transition(0, (t, prob[0]), 0), wt=float(prob[1]))

    test = '../data/final_data/test.tr'
    test_set = set()
    for test_line in open(test):
        for char in test_line.strip().split():
            test_set.add(char)

    for char in test_set:
        tm.add_transition(fst.Transition(0, (char, 'ε'), 0), wt=float('1.0e-100'))

    return tm

Ejemplo n.º 13

0

Mostrar archivo

Archivo: morphology.py Proyecto: jpan0826/FST

    def generate(self, analysis):
        """Generate the morphologically correct word 

		e.g.
		p = Parser()
		analysis = ['p','a','n','i','c','+past form']
		p.generate(analysis)
		---> 'panicked'
		"""
        f1 = fst.FST('generator')
        f1.add_state('start')
        f1.add_state('a1')
        f1.add_state('a2')
        f1.add_state('a3')
        f1.add_state('b1')
        f1.add_state('b2')
        f1.add_state('b3')
        f1.add_state('c1')
        f1.add_state('c2')
        f1.add_state('c3')
        f1.add_state('c4')
        f1.add_state('d1')
        f1.add_state('d2')
        f1.add_state('d3')
        f1.add_state('d4')
        f1.add_state('e1')
        f1.add_state('e2')
        f1.add_state('e3')
        f1.add_state('insertion')
        f1.add_state('progressive')
        f1.add_state('past')
        f1.add_state('end')
        f1.initial_state = 'start'
        f1.set_final('end')

        f1.add_arc('start', 'a1', 'w', 'w')
        f1.add_arc('a1', 'a2', 'a', 'a')
        f1.add_arc('a2', 'a3', 'n', 'n')
        f1.add_arc('a3', 'past', 't', 't')
        f1.add_arc('a3', 'progressive', 't', 't')

        f1.add_arc('start', 'b1', 's', 's')
        f1.add_arc('b1', 'b2', 'y', 'y')
        f1.add_arc('b2', 'b3', 'n', 'n')
        f1.add_arc('b3', 'past', 'c', 'c')
        f1.add_arc('b3', 'progressive', 'c', 'c')

        f1.add_arc('start', 'c1', 'p', 'p')
        f1.add_arc('c1', 'c2', 'a', 'a')
        f1.add_arc('c2', 'c3', 'n', 'n')
        f1.add_arc('c3', 'c4', 'i', 'i')
        f1.add_arc('c4', 'insertion', 'c', 'c')
        f1.add_arc('insertion', 'past', '', 'k')
        f1.add_arc('insertion', 'progressive', '', 'k')

        f1.add_arc('start', 'd1', 'h', 'h')
        f1.add_arc('d1', 'd2', 'a', 'a')
        f1.add_arc('d2', 'd3', 'v', 'v')
        f1.add_arc('d3', 'd4', 'o', 'o')
        f1.add_arc('d4', 'insertion', 'c', 'c')

        f1.add_arc('start', 'e1', 'l', 'l')
        f1.add_arc('e1', 'e2', 'i', 'i')
        f1.add_arc('e2', 'e3', 'c', 'c')
        f1.add_arc('e3', 'past', 'k', 'k')
        f1.add_arc('e3', 'progressive', 'k', 'k')

        f1.add_arc('past', 'end', '+past form', 'ed')
        f1.add_arc('progressive', 'end', '+present participle', 'ing')

        # output = ['p','a','n','i','c','k','e','d']
        # return ''.join(output)
        #print(f1.transduce(analysis))
        return ''.join(f1.transduce(analysis)[0])

Ejemplo n.º 14

0

Mostrar archivo

Archivo: fsm_tp.py Proyecto: pratcooper/NLP

# import the fst module
import fst
# import the string module
import string
# Dene a list of all vowels for convenience
vowels = ['a', 'e', 'i', 'o', 'u']
# Instantiate an FST object with some name
f = fst.FST('devowelizer')
# All we need is a single state ...
f.add_state('1')
# and this same state is the initial and the nal state
f.initial_state = '1'
f.set_final('1')
# Now, we need to add an arc for each letter; if the letter is a vowel
# then the transition outputs nothing but otherwise it outputs the same
# letter that it consumed.
for letter in string.ascii_lowercase:
    if letter in vowels:
        _ = f.add_arc('1', '1', (letter), ())
    else:
        _ = f.add_arc('1', '1', (letter), (letter))
# Evaluate it on some example words
print ''.join(f.transduce(['v', 'o', 'w', 'e', 'l']))
print ''.join(f.transduce('e x c e p t i o n'.split()))
print ''.join(f.transduce('c o n s o n a n t'.split()))

print f.transduce(['a','w'])

from fsmutils import composechars
S = "vowels"
output = composechars(S, f, f, f)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: morphology.py Proyecto: jiaeyan/coursework_114_foundamentals_of_compling

	def parse(self, word):
		"""Parse a word morphologically want, sync, panic, havoc, and lick

		e.g. -ed and -ing
		p = Parser()   past form  \  present participle form
		word = ['p', 'a', 'n', 'i', 'c', 'k','e','d']
		p.parse(word)
		---> 'panic+past form'
		"""
		f2=fst.FST('lexicon')
		for i in range(1,56):
			f2.add_state(str(i))
		f2.initial_state = '1'
		f2.add_arc('1','2','w','w')
		f2.add_arc('2','3','a','a')
		f2.add_arc('3','4','n','n')
		f2.add_arc('4','5','t','t')
		f2.add_arc('5','6','e','^')
		f2.add_arc('6','7','d','d')
		f2.add_arc('7','8','','#')
		f2.add_arc('5','9','i','^')
		f2.add_arc('9','10','n','n')
		f2.add_arc('10','11','g','g')
		f2.add_arc('11','8','','#')
		f2.add_arc('1','12','s','s')
		f2.add_arc('12','13','y','y')
		f2.add_arc('13','14','n','n')
		f2.add_arc('14','15','c','c')
		f2.add_arc('15','16','e','^')
		f2.add_arc('16','17','d','d')
		f2.add_arc('17','18','','#')
		f2.add_arc('15','19','i','^')
		f2.add_arc('19','20','n','n')
		f2.add_arc('20','21','g','g')
		f2.add_arc('21','18','','#')
		f2.add_arc('1','22','p','p')
		f2.add_arc('22','23','a','a')
		f2.add_arc('23','24','n','n')
		f2.add_arc('24','25','i','i')
		f2.add_arc('25','26','c','c')
		f2.add_arc('26','27','k','^')
		f2.add_arc('27','28','e','e')
		f2.add_arc('28','29','d','d')
		f2.add_arc('29','30','','#')
		f2.add_arc('27','31','i','i')
		f2.add_arc('31','32','n','n')
		f2.add_arc('32','33','g','g')
		f2.add_arc('33','30','','#')
		f2.add_arc('1','34','h','h')
		f2.add_arc('34','35','a','a')
		f2.add_arc('35','36','v','v')
		f2.add_arc('36','37','o','o')
		f2.add_arc('37','38','c','c')
		f2.add_arc('38','39','k','^')
		f2.add_arc('39','40','e','e')
		f2.add_arc('40','41','d','d')
		f2.add_arc('41','42','','#')
		f2.add_arc('39','43','i','i')
		f2.add_arc('43','44','n','n')
		f2.add_arc('44','45','g','g')
		f2.add_arc('45','42','','#')
		f2.add_arc('1','46','l','l')
		f2.add_arc('46','47','i','i')
		f2.add_arc('47','48','c','c')
		f2.add_arc('48','49','k','k')
		f2.add_arc('49','50','e','^')
		f2.add_arc('50','51','d','d')
		f2.add_arc('51','52','','#')
		f2.add_arc('49','53','i','^')
		f2.add_arc('53','54','n','n')
		f2.add_arc('54','55','g','g')
		f2.add_arc('55','52','','#')
		f2.set_final('8')
		f2.set_final('18')
		f2.set_final('30')
		f2.set_final('42')
		f2.set_final('52')
		
		f3=fst.FST('rule')
		for i in range(1,53):
			f3.add_state(str(i))
		f3.initial_state = '1'
		f3.add_arc('1','2','p','p')
		f3.add_arc('2','3','a','a')
		f3.add_arc('3','4','n','n')
		f3.add_arc('4','5','i','i')
		f3.add_arc('5','6','c','c')
		f3.add_arc('6','7','^','')
		f3.add_arc('7','8','e','')
		f3.add_arc('8','9','d','+past form')
		f3.add_arc('9','10','#','')
		f3.add_arc('7','11','i','')
		f3.add_arc('11','12','n','')
		f3.add_arc('12','13','g','+present participle')
		f3.add_arc('13','10','#','')
		f3.add_arc('1','14','h','h')
		f3.add_arc('14','15','a','a')
		f3.add_arc('15','16','v','v')
		f3.add_arc('16','17','o','o')
		f3.add_arc('17','18','c','c')
		f3.add_arc('18','19','^','')
		f3.add_arc('19','20','e','')
		f3.add_arc('20','21','d','+past form')
		f3.add_arc('21','22','#','')
		f3.add_arc('19','23','i','')
		f3.add_arc('23','24','n','')
		f3.add_arc('24','25','g','+present participle')
		f3.add_arc('25','22','#','')
		f3.add_arc('1','26','l','l')
		f3.add_arc('26','27','i','i')
		f3.add_arc('27','28','c','c')
		f3.add_arc('28','29','k','k')
		f3.add_arc('29','30','^','')
		f3.add_arc('30','31','d','+past form')
		f3.add_arc('31','32','#','')
		f3.add_arc('30','33','n','')
		f3.add_arc('33','34','g','+present participle')
		f3.add_arc('34','32','#','')
		f3.add_arc('1','35','s','s')
		f3.add_arc('35','36','y','y')
		f3.add_arc('36','37','n','n')
		f3.add_arc('37','38','c','c')
		f3.add_arc('38','39','^','')
		f3.add_arc('39','40','d','+past form')
		f3.add_arc('40','41','#','')
		f3.add_arc('38','42','n','')
		f3.add_arc('42','43','g','+present participle')
		f3.add_arc('43','41','#','')
		f3.add_arc('1','44','w','w')
		f3.add_arc('44','45','a','a')
		f3.add_arc('45','46','n','n')
		f3.add_arc('46','47','t','t')
		f3.add_arc('47','48','^','')
		f3.add_arc('48','49','d','+past form')
		f3.add_arc('49','50','#','')
		f3.add_arc('47','51','n','')
		f3.add_arc('51','52','g','+present participle')
		f3.add_arc('52','50','#','')
		f3.set_final('10')
		f3.set_final('22')
		f3.set_final('32')
		f3.set_final('41')
		f3.set_final('50')
		
		output=''.join(fsmutils.compose(word,f2,f3)[0])
		return output
		'''output = ['p','a','n','i','c','+past form']

Ejemplo n.º 16

0

Mostrar archivo

Archivo: morphology.py Proyecto: jpan0826/FST

    def parse(self, word):
        """Parse a word morphologically 

		e.g.
		p = Parser()
		word = ['p', 'a', 'n', 'i', 'c', 'k','e','d']
		p.parse(word)
		---> 'panic+past form'
		"""
        f2 = fst.FST('parser')

        f2.add_state('start')
        f2.add_state('a1')
        f2.add_state('a2')
        f2.add_state('a3')
        f2.add_state('b1')
        f2.add_state('b2')
        f2.add_state('b3')
        f2.add_state('c1')
        f2.add_state('c2')
        f2.add_state('c3')
        f2.add_state('c4')
        f2.add_state('d1')
        f2.add_state('d2')
        f2.add_state('d3')
        f2.add_state('d4')
        f2.add_state('e1')
        f2.add_state('e2')
        f2.add_state('e3')
        f2.add_state('deletion')
        f2.add_state('progressive1')
        f2.add_state('progressive2')
        f2.add_state('progressive3')
        f2.add_state('past1')
        f2.add_state('past2')
        f2.add_state('end')
        f2.initial_state = 'start'
        f2.set_final('end')

        f2.add_arc('start', 'a1', 'w', 'w')
        f2.add_arc('a1', 'a2', 'a', 'a')
        f2.add_arc('a2', 'a3', 'n', 'n')
        f2.add_arc('a3', 'past1', 't', 't')
        f2.add_arc('a3', 'progressive1', 't', 't')

        f2.add_arc('start', 'b1', 's', 's')
        f2.add_arc('b1', 'b2', 'y', 'y')
        f2.add_arc('b2', 'b3', 'n', 'n')
        f2.add_arc('b3', 'past1', 'c', 'c')
        f2.add_arc('b3', 'progressive1', 'c', 'c')

        f2.add_arc('start', 'c1', 'p', 'p')
        f2.add_arc('c1', 'c2', 'a', 'a')
        f2.add_arc('c2', 'c3', 'n', 'n')
        f2.add_arc('c3', 'c4', 'i', 'i')
        f2.add_arc('c4', 'deletion', 'c', 'c')
        f2.add_arc('deletion', 'past1', 'k', '')
        f2.add_arc('deletion', 'progressive1', 'k', '')

        f2.add_arc('start', 'd1', 'h', 'h')
        f2.add_arc('d1', 'd2', 'a', 'a')
        f2.add_arc('d2', 'd3', 'v', 'v')
        f2.add_arc('d3', 'd4', 'o', 'o')
        f2.add_arc('d4', 'deletion', 'c', 'c')

        f2.add_arc('start', 'e1', 'l', 'l')
        f2.add_arc('e1', 'e2', 'i', 'i')
        f2.add_arc('e2', 'e3', 'c', 'c')
        f2.add_arc('e3', 'past1', 'k', 'k')
        f2.add_arc('e3', 'progressive1', 'k', 'k')

        f2.add_arc('past1', 'past2', 'e', '+')
        f2.add_arc('past2', 'end', 'd', 'past form')

        f2.add_arc('progressive1', 'progressive2', 'i', '+')
        f2.add_arc('progressive2', 'progressive3', 'n', '')
        f2.add_arc('progressive3', 'end', 'g', 'present participle')

        # output = ['p','a','n','i','c','+past form']
        # return ''.join(output)
        return ''.join(f2.transduce(word)[0])

Ejemplo n.º 17

0

Mostrar archivo

Archivo: fst_example.py Proyecto: aforwardz/algorithm

# import the fst module
import fst
# import the string module
import string
# Define a list of all vowels for convenience
vowels = ['a', 'e', 'i', 'o', 'u']
# Instantiate an FST object with some name
f = fst.FST('devowelizer')
# All we need is a single state ...
f.add_state('1')
# and this same state is the initial and the final state
f.initial_state = '1'
f.set_final('1')
# Now, we need to add an arc for each letter; if the letter is a vowel
# then the transition outputs nothing but otherwise it outputs the same
# letter that it consumed.
for letter in string.ascii_lowercase:
    if letter in vowels:
        _ = f.add_arc('1', '1', (letter), ())
    else:
        _ = f.add_arc('1', '1', (letter), (letter))
# Evaluate it on some example words
print(''.join(f.transduce(['v', 'o', 'w', 'e', 'l'])))
print(''.join(f.transduce('e x c e p t i o n'.split())))
print(''.join(f.transduce('c o n s o n a n t'.split())))