def parse(self, word): """Parse a word morphologically e.g. p = Parser() word = ['p', 'a', 'n', 'i', 'c', 'k','e','d'] p.parse(word) ---> 'panic+past form' """ lexicon = {'panic', 'havoc', 'sync', 'lick', 'want'} start_state = 'start' k_insertion = 'k_insertion' f = FST('parser') f.add_state(start_state) f.initial_state = start_state #Add paths for each word previous = start_state for vocab in lexicon: for char in vocab: current = vocab + '-' + char #uniquely identify f.add_state(current) f.add_arc(previous, current, char, char) previous = current f.add_state(k_insertion + '-' + vocab) f.add_arc(previous, k_insertion + '-' + vocab, 'k', '') self._add_ending_states(f, vocab, k_insertion + '-' + vocab, k=k_insertion) self._add_ending_states(f, vocab, previous) previous = start_state return ''.join(f.transduce(word)[0])
def generate(self, analysis): """Generate the morphologically correct word e.g. p = Parser() analysis = ['p','a','n','i','c','+past form'] p.generate(analysis) ---> 'panicked' """ # Let's define our first FST f1 = FST('morphology-generate') # Indicate initial and final states f1.add_state('start') f1.add_state('vowel') f1.add_state('consonant') f1.add_state('c') f1.add_state('form_1') f1.add_state('form_2') f1.initial_state = 'start' f1.set_final('form_1') f1.set_final('form_2') # Generate vowels = 'aeiou' for vowel in vowels: f1.add_arc('start', 'vowel', vowel, vowel) f1.add_arc('vowel', 'vowel', vowel, vowel) f1.add_arc('consonant', 'vowel', vowel, vowel) f1.add_arc('c', 'vowel', vowel, vowel) for letter in string.ascii_lowercase: if letter in vowels: continue if not letter == 'c': f1.add_arc('vowel', 'consonant', letter, letter) f1.add_arc('start', 'consonant', letter, letter) f1.add_arc('consonant', 'consonant', letter, letter) f1.add_arc('c', 'consonant', letter, letter) f1.add_arc('vowel', 'c', 'c', 'c') f1.add_arc('c', 'form_1', '+past form', 'ked') f1.add_arc('c', 'form_1', '+present participle form', 'king') f1.add_arc('consonant', 'form_2', '+past form', 'ed') f1.add_arc('consonant', 'form_2', '+present participle form', 'ing') output = f1.transduce(analysis)[0] return "".join(output)
def generate(self, analysis): """Generate the morphologically correct word e.g. p = Parser() analysis = ['p','a','n','i','c','+past form'] p.generate(analysis) ---> 'panicked' """ start_state = 'start' f = FST('generator') f.add_state(start_state) f.initial_state = start_state self._build_generator_fst(f, analysis, start_state) return ''.join(f.transduce(analysis)[0])
def parse(self, word): """Parse a word morphologically e.g. p = Parser() word = ['p','a','n','i','c','k','i','n','g'] p.parse(word) ---> 'panic+present participle form' """ # Ok so now let's do the second FST f2 = FST('morphology-parse') f2.add_state('start') f2.initial_state = 'start' #add states for the word lick for w in list('lick'): state_name = 'lick-' + w f2.add_state(state_name) #add first letter f2.add_arc('start', 'lick-l', 'l', 'l') #add arc for the word lick lick = list('lick') for w in range(0, len(lick) - 1): f2.add_arc('lick-' + lick[w], 'lick-' + lick[w + 1], lick[w + 1], lick[w + 1]) #add states for the word lick for w in list('want'): state_name = 'want-' + w f2.add_state(state_name) f2.add_arc('start', 'want-w', 'w', 'w') #add arc for the word want want = list('want') for w in range(0, len(want) - 1): f2.add_arc('want-' + want[w], 'want-' + want[w + 1], want[w + 1], want[w + 1]) #add states for the word sync sync = list('sync') for w in sync: state_name = 'sync-' + w f2.add_state(state_name) f2.add_arc('start', 'sync-s', 's', 's') #add arc for the word sync for w in range(0, len(sync) - 1): f2.add_arc('sync-' + sync[w], 'sync-' + sync[w + 1], sync[w + 1], sync[w + 1]) #add states for the word panic panic = list('panic') for w in panic: state_name = 'panic-' + w f2.add_state(state_name) f2.add_arc('start', 'panic-p', 'p', 'p') #add arc for the word panic for w in range(0, len(panic) - 1): f2.add_arc('panic-' + panic[w], 'panic-' + panic[w + 1], panic[w + 1], panic[w + 1]) #add states for the word havoc havoc = list('havoc') for w in havoc: state_name = 'havoc-' + w f2.add_state(state_name) f2.add_arc('start', 'havoc-h', 'h', 'h') #add arc for the word havoc for w in range(0, len(havoc) - 1): f2.add_arc('havoc-' + havoc[w], 'havoc-' + havoc[w + 1], havoc[w + 1], havoc[w + 1]) f2.add_state('intermediate1') f2.add_state('intermediate2') f2.add_state('pres1') f2.add_state('past1') f2.add_arc('lick-k', 'intermediate1', '', '') f2.add_arc('want-t', 'intermediate1', '', '') f2.add_arc('sync-c', 'intermediate1', '', '') f2.add_arc('panic-c', 'intermediate1', 'k', '') f2.add_arc('havoc-c', 'intermediate1', 'k', '') f2.add_arc('intermediate1', 'pres1', 'ing', '+present participle form') f2.add_arc('intermediate1', 'past1', 'ed', '+past form') f2.set_final('pres1') f2.set_final('past1') if ''.join(word[-3:]) == 'ing': inputs = word[:-3] inputs.append('ing') elif ''.join(word[-2:]) == 'ed': inputs = word[:-2] inputs.append('ed') else: inputs = word output = f2.transduce(inputs)[0] return ''.join(output)
def generate(self, analysis): """Generate the morphologically correct word e.g. p = Parser() analysis = ['p','a','n','i','c','+past form'] p.generate(analysis) ---> 'panicked' """ #analysis = ['p','a','n','i','c','+past form'] # Let's define our first FST f1 = FST('morphology-generate') f1.add_state('1') f1.add_state('2') f1.add_state('3') f1.add_state('4') f1.add_state('5') f1.add_state('6') #non-c state f1.add_state('7') #c state f1.add_state('8') #add k f1.add_state('9') #+present f1.add_state('10') #+past f1.initial_state = '1' #f1.set_final('8') f1.set_final('9') f1.set_final('10') #state 1 to 2, and 2 to 3. we don't care about vowel or consonant here for letter in list(string.ascii_letters): f1.add_arc('1', '2', letter, letter) f1.add_arc('2', '3', letter, letter) #3 to 5 input/output consonants vowels = ['a', 'e', 'i', 'o', 'u', 'A', 'E', 'I', 'O', 'U'] consonants = [c for c in list(string.ascii_letters) if c not in vowels] non_c_con = [c for c in consonants if c not in ['c', 'C']] for letter in consonants: f1.add_arc('3', '5', letter, letter) f1.add_arc('5', '5', letter, letter) #the third and fourth input should be a vowel for letter in vowels: f1.add_arc('3', '4', letter, letter) f1.add_arc('4', '4', letter, letter) #if the fourth input is a non c consonant, go to 5 for letter in non_c_con: f1.add_arc('4', '5', letter, letter) #if the input at state 5 is a vowel, go back to 4 for letter in vowels: f1.add_arc('5', '4', letter, letter) #if the second last letter is a c, go to 7 f1.add_arc('4', '7', 'c', 'c') #add k after 7 f1.add_arc('7', '8', '', 'k') #output nothing from 5 to 8 f1.add_arc('5', '8', '', '') f1.add_arc('8', '9', '+present participle form', 'ing') f1.add_arc('8', '10', '+past form', 'ed') output = f1.transduce(analysis)[0] return ''.join(output)
Created on Fri Sep 08 11:21:21 2017 @author: SHILPASHREE RAO """ import string import sys from fst import FST from fsmutils import trace vowels = ['a', 'e', 'i', 'o', 'u'] f = FST('devowelizer') f.add_state('1') f.initial_state = '1' f.set_final('1') for letter in string.ascii_lowercase: if letter in vowels: _ = f.add_arc('1', '1', (letter), ()) else: _ = f.add_arc('1', '1', (letter), (letter)) print ''.join(f.transduce(['v', 'o', 'w', 'e', 'l'])) print ''.join(f.transduce('e x c e p t i o n'.split())) print ''.join(f.transduce('c o n s o n a n t'.split())) t = trace(f, ['v', 'o', 'w', 'e', 'l']) print t