def do_stem(self, word): fsm = Fysom(initial='start', events=self.events) i = len(word) - 1 j = len(word) while (True): if i <= 0: break v = word[i:j] # print v res = fsm.can(v) if (res): if v == 'i' and fsm.can(word[i - 1:j]): i = i - 1 continue fsm.trigger(v) if fsm.current == 'h': if word[i - 1:i] == 'i': i = i - 1 # skip i if word[i - 1:i] == 'n': # ning qushimchasi fsm.current = 'start' continue elif fsm.current == 'b': fsm.current = 'start' j = i i = i - 1 return word[:j]
def do_stem(self, word): fsm = Fysom(initial='start', events=self.events) # FIXME: uncomment below and make sanitize functions support # both Python 2 and 3 versions # word = WordProcessor.sanitize(word) i = len(word) - 1 j = len(word) while(True): if i <= 0: break v = word[i:j] # print v res = fsm.can(v) if (res): if v == 'i' and fsm.can(word[i-1:j]): i = i - 1 continue fsm.trigger(v) if fsm.current == 'h': if word[i-1:i] == 'i': i = i - 1 # skip i if word[i-1:i] == 'n': # ning qushimchasi fsm.current = 'start' continue elif fsm.current == 'b': fsm.current = 'start' j = i i = i - 1 return word[:j]
def do_stem(self, word): fsm = Fysom(initial='start', events=self.events) # FIXME: uncomment below and make sanitize functions support # both Python 2 and 3 versions # word = WordProcessor.sanitize(word) i = len(word) - 1 j = len(word) while (True): if i <= 0: break v = word[i:j] # print v res = fsm.can(v) if (res): if v == 'i' and fsm.can(word[i - 1:j]): i = i - 1 continue fsm.trigger(v) if fsm.current == 'h': if word[i - 1:i] == 'i': i = i - 1 # skip i if word[i - 1:i] == 'n': # ning qushimchasi fsm.current = 'start' continue elif fsm.current == 'b': fsm.current = 'start' j = i i = i - 1 return word[:j]
def SenToPhrase (tagged_sentence): fsm = Fysom({'initial': '0', 'events': [ {'name': 'IN', 'src': '0', 'dst': '1'},{'name': 'NN', 'src': '1', 'dst': '3'},{'name': 'NNS', 'src': '1', 'dst': '3'}, {'name': 'NNP', 'src': '1', 'dst': '3'},{'name': 'NNPS', 'src': '1', 'dst': '3'},{'name': 'DT', 'src': '1', 'dst': '2'}, {'name': 'NN', 'src': '2', 'dst': '3'},{'name': 'NNS', 'src': '2', 'dst': '3'},{'name': 'NNP', 'src': '2', 'dst': '3'}, {'name': 'NNPS', 'src': '2', 'dst': '3'},{'name': 'PRP$', 'src': '1', 'dst': '4'},{'name': 'PRP$', 'src': '2', 'dst': '4'}, {'name': 'JJ', 'src': '1', 'dst': '5'},{'name': 'JJ', 'src': '2', 'dst': '5'},{'name': 'JJR', 'src': '1', 'dst': '6'}, {'name': 'JJR', 'src': '2', 'dst': '6'},{'name': 'JJS', 'src': '1', 'dst': '7'},{'name': 'JJS', 'src': '2', 'dst': '7'}, {'name': 'NN', 'src': '5', 'dst': '3'},{'name': 'NN', 'src': '6', 'dst': '3'},{'name': 'NN', 'src': '7', 'dst': '3'}, {'name': 'NNS', 'src': '5', 'dst': '3'},{'name': 'NNS', 'src': '6', 'dst': '3'},{'name': 'NNS', 'src': '7', 'dst': '3'}, {'name': 'NNP', 'src': '5', 'dst': '3'},{'name': 'NNP', 'src': '6', 'dst': '3'},{'name': 'NNP', 'src': '7', 'dst': '3'}, {'name': 'NNPS', 'src': '5', 'dst': '3'},{'name': 'NNPS', 'src': '6', 'dst': '3'},{'name': 'NNPS', 'src': '7', 'dst': '3'}, {'name': 'PRP', 'src': '1', 'dst': '4'},{'name': 'PRP', 'src': '2', 'dst': '4'},{'name': 'NN', 'src': '4', 'dst': '3'}, {'name': 'NNS', 'src': '4', 'dst': '3'},{'name': 'NNP', 'src': '4', 'dst': '3'},{'name': 'NNPS', 'src': '4', 'dst': '3'}, {'name': 'TO', 'src': '0', 'dst': '1'},{'name': 'NN', 'src': '3', 'dst': '4'},{'name': 'NNS', 'src': '3', 'dst': '4'}, {'name': 'NNP', 'src': '3', 'dst': '4'},{'name': 'NNPS', 'src': '3', 'dst': '4'}, #######VERB################ {'name': 'MD', 'src': '0', 'dst': '8'},{'name': 'VB', 'src': '8', 'dst': '9'},{'name': 'VBN', 'src': '9', 'dst': '21'}, {'name': 'VBG', 'src': '9', 'dst': '10'},{'name': 'JJ', 'src': '9', 'dst': '10'},{'name': 'RB', 'src': '9', 'dst': '11'}, {'name': 'VBD', 'src': '0', 'dst': '12'},{'name': 'VBG', 'src': '12', 'dst': '13'},{'name': 'RB', 'src': '13', 'dst': '11'}, {'name': 'RB', 'src': '12', 'dst': '11'},{'name': 'VBN', 'src': '12', 'dst': '14'},{'name': 'VBG', 'src': '14', 'dst': '15'}, {'name': 'JJ', 'src': '14', 'dst': '16'},{'name': 'VBZ', 'src': '0', 'dst': '17'},{'name': 'VBP', 'src': '0', 'dst': '17'}, {'name': 'RB', 'src': '17', 'dst': '11'},{'name': 'VBG', 'src': '17', 'dst': '18'},{'name': 'VBN', 'src': '17', 'dst': '19'}, {'name': 'RB', 'src': '18', 'dst': '11'},{'name': 'VBG', 'src': '19', 'dst': '20'},{'name': 'RB', 'src': '20', 'dst': '11'}, {'name': 'VBG', 'src': '21', 'dst': '22'},{'name': 'RB', 'src': '14', 'dst': '11'} ]}) high_final_states = ['3','4','9','10','13','14','15','16','18','19','20','21','22'] phras_rules={'R1':[],'R2':[],'R3':[],'R4':[],'R5':[],'R6':[],'R7':[],'R8':[],'R9':[],'R10':[],'R11':[],'R12':[]} to_rb = ['9','12','13','14','17','18','20'] fsm.current = '0' # new_temp = "" t=[] k = 0 phrase_count=0 while(k<len(tagged_sentence)): flag = 0 rbflag = 0 fsm.current = '0' temp_current='0' count = 0 # new_temp = "" t=[] j = k s=[] for j in range(k,len(tagged_sentence)): # print("-----For loop j----") # print(tagged_sentence[j]) try: fsm.trigger(tagged_sentence[j][1]) # print("\n",fsm.current) temp_current=fsm.current # new_temp += tagged_sentence[j][0] + " " t.append(tagged_sentence[j]) count += 1 except: break finally: if(fsm.current=='3' and j!=len(tagged_sentence)-1 and tagged_sentence[j][0][-1]!=","): try: fsm.trigger(tagged_sentence[j+1][1]) # print(tagged_sentence[j+1]) # print("\n",fsm.current) # new_temp += tagged_sentence[j+1][0] + " " t.append(tagged_sentence[j+1]) count += 1 except: oops = 2 if(fsm.current=='9' and j!=len(tagged_sentence)-1): try: fsm.trigger(tagged_sentence[j+1][1]) # print("\n",fsm.current) # print(tagged_sentence[j+1]) if(fsm.current=='11'): fsm.current = '9' else: # new_temp += i[j+1][0] + " " t.append(tagged_sentence[j+1]) count += 1 except: oops = 3 if(fsm.current=='14' and j!=len(tagged_sentence)-1): try: fsm.trigger(tagged_sentence[j+1][1]) # print("\n",fsm.current) # print(tagged_sentence[j+1]) if(fsm.current=='11'): fsm.current = '14' else: # new_temp += i[j+1][0] + " " t.append(tagged_sentence[j+1]) count += 1 except: oops = 3 if(fsm.current=='19' and j!=len(tagged_sentence)-1): try: fsm.trigger(tagged_sentence[j+1][1]) # print("\n",fsm.current) # print(tagged_sentence[j+1]) # new_temp += i[j+1][0] + " " temp_current=fsm.current t.append(tagged_sentence[j+1]) count += 1 except: oops = 1 if(fsm.current=='21' and j!=len(tagged_sentence)-1): try: fsm.trigger(tagged_sentence[j+1][1]) # print("\n",fsm.current) # print(tagged_sentence[j+1]) # new_temp += i[j+1][0] + " " t.append(tagged_sentence[j+1]) count += 1 except: oops = 1 c=int(fsm.current) if(fsm.current in high_final_states): phrase_count+=1 # new_temp = new_temp[:-1] # if(new_temp[-1]==','): # new_temp = new_temp[:-1] if(c==3 or c==4): phras_rules['R1'].append(t) #PREPOSITION elif(c==18): phras_rules['R2'].append(t) #PRESENT CONTINUOUS elif(c==19): phras_rules['R3'].append(t) #PRESENT PERFECT elif(c==20): phras_rules['R4'].append(t) #PRESENT PERFECT CONTINUOUS elif(c==13): phras_rules['R5'].append(t) #PAST CONTINUOUS elif(c==14): phras_rules['R6'].append(t) #PAST PERFECT elif(c==15): phras_rules['R7'].append(t) #PAST PERFECT CONTINUOUS elif(c==9): phras_rules['R8'].append(t) #SIMPLE FUTURE elif(c==10): phras_rules['R9'].append(t) #FUTURE CONTINUOUS elif(c==21): phras_rules['R10'].append(t) #FUTURE PERFECT elif(c==22): phras_rules['R11'].append(t) #FUTURE PERFECT CONTINUOUS t = [] fsm.current = '0' # backup_k = k k = count + k flag = 1 # break if(temp_current in to_rb and j!=len(tagged_sentence)-1): # print("----temp-----") # print("\n",temp_current) fsm.current = temp_current try: if(fsm.current=='20'): check = fsm.current s.append(tagged_sentence[j+1]) fsm.trigger(tagged_sentence[j+2][1]) else: check = fsm.current s.append(tagged_sentence[j]) fsm.trigger(tagged_sentence[j+1][1]) # print("-------ENTERED TRYYYYY-------" # print(tagged_sentence[j+1]) # print("\n",fsm.current) if(fsm.current=='11'): # print("----ENTERED IF----") if(check=='20'): s.append(tagged_sentence[j+2]) else: s.append(tagged_sentence[j+1]) # count += 1 else: # print("-----OOPS ELSEEE---") s=[] fsm.current =temp_current rbflag = 1 except: # print("----Uhohhhh------") oops = 1 t = [] fsm.current = '0' if(fsm.current=='11'): phrase_count+=1 phras_rules['R12'].append(s) fsm.current = '0' s = [] if(flag==0): k += 1 if(rbflag==1 and flag==1): break english_sentence_structure[sentence].append(phrase_count) english_sentence_structure[sentence].append(phras_rules)
def SenToPhrase(tagged_sentence): fsm = Fysom({ 'initial': '0', 'events': [{ 'name': 'IN', 'src': '0', 'dst': '1' }, { 'name': 'NN', 'src': '1', 'dst': '3' }, { 'name': 'NNS', 'src': '1', 'dst': '3' }, { 'name': 'NNP', 'src': '1', 'dst': '3' }, { 'name': 'NNPS', 'src': '1', 'dst': '3' }, { 'name': 'DT', 'src': '1', 'dst': '2' }, { 'name': 'NN', 'src': '2', 'dst': '3' }, { 'name': 'NNS', 'src': '2', 'dst': '3' }, { 'name': 'NNP', 'src': '2', 'dst': '3' }, { 'name': 'NNPS', 'src': '2', 'dst': '3' }, { 'name': 'PRP$', 'src': '1', 'dst': '4' }, { 'name': 'PRP$', 'src': '2', 'dst': '4' }, { 'name': 'JJ', 'src': '1', 'dst': '5' }, { 'name': 'JJ', 'src': '2', 'dst': '5' }, { 'name': 'JJR', 'src': '1', 'dst': '6' }, { 'name': 'JJR', 'src': '2', 'dst': '6' }, { 'name': 'JJS', 'src': '1', 'dst': '7' }, { 'name': 'JJS', 'src': '2', 'dst': '7' }, { 'name': 'NN', 'src': '5', 'dst': '3' }, { 'name': 'NN', 'src': '6', 'dst': '3' }, { 'name': 'NN', 'src': '7', 'dst': '3' }, { 'name': 'NNS', 'src': '5', 'dst': '3' }, { 'name': 'NNS', 'src': '6', 'dst': '3' }, { 'name': 'NNS', 'src': '7', 'dst': '3' }, { 'name': 'NNP', 'src': '5', 'dst': '3' }, { 'name': 'NNP', 'src': '6', 'dst': '3' }, { 'name': 'NNP', 'src': '7', 'dst': '3' }, { 'name': 'NNPS', 'src': '5', 'dst': '3' }, { 'name': 'NNPS', 'src': '6', 'dst': '3' }, { 'name': 'NNPS', 'src': '7', 'dst': '3' }, { 'name': 'PRP', 'src': '1', 'dst': '4' }, { 'name': 'PRP', 'src': '2', 'dst': '4' }, { 'name': 'NN', 'src': '4', 'dst': '3' }, { 'name': 'NNS', 'src': '4', 'dst': '3' }, { 'name': 'NNP', 'src': '4', 'dst': '3' }, { 'name': 'NNPS', 'src': '4', 'dst': '3' }, { 'name': 'TO', 'src': '0', 'dst': '1' }, { 'name': 'MD', 'src': '0', 'dst': '8' }, { 'name': 'VB', 'src': '8', 'dst': '9' }, { 'name': 'VBD', 'src': '8', 'dst': '10' }, { 'name': 'VBG', 'src': '8', 'dst': '11' }, { 'name': 'VBN', 'src': '8', 'dst': '12' }, { 'name': 'VBP', 'src': '8', 'dst': '13' }, { 'name': 'VBZ', 'src': '8', 'dst': '14' }, { 'name': 'VB', 'src': '0', 'dst': '9' }, { 'name': 'VBD', 'src': '0', 'dst': '10' }, { 'name': 'VBG', 'src': '0', 'dst': '11' }, { 'name': 'VBN', 'src': '0', 'dst': '12' }, { 'name': 'VBP', 'src': '0', 'dst': '13' }, { 'name': 'VBZ', 'src': '0', 'dst': '14' }, { 'name': 'RB', 'src': '9', 'dst': '15' }, { 'name': 'RB', 'src': '10', 'dst': '15' }, { 'name': 'RB', 'src': '11', 'dst': '15' }, { 'name': 'RB', 'src': '12', 'dst': '15' }, { 'name': 'RB', 'src': '13', 'dst': '15' }, { 'name': 'RB', 'src': '14', 'dst': '15' }, { 'name': 'JJ', 'src': '9', 'dst': '16' }, { 'name': 'JJ', 'src': '10', 'dst': '16' }, { 'name': 'JJ', 'src': '11', 'dst': '16' }, { 'name': 'JJ', 'src': '12', 'dst': '16' }, { 'name': 'JJ', 'src': '13', 'dst': '16' }, { 'name': 'JJ', 'src': '14', 'dst': '16' }, { 'name': 'VB', 'src': '15', 'dst': '17' }, { 'name': 'VBD', 'src': '15', 'dst': '18' }, { 'name': 'VBG', 'src': '15', 'dst': '19' }, { 'name': 'VBN', 'src': '15', 'dst': '20' }, { 'name': 'VBP', 'src': '15', 'dst': '21' }, { 'name': 'VBZ', 'src': '15', 'dst': '22' }, { 'name': 'VB', 'src': '9', 'dst': '17' }, { 'name': 'VBD', 'src': '9', 'dst': '18' }, { 'name': 'VBG', 'src': '9', 'dst': '19' }, { 'name': 'VBN', 'src': '9', 'dst': '20' }, { 'name': 'VBP', 'src': '9', 'dst': '21' }, { 'name': 'VBZ', 'src': '9', 'dst': '22' }, { 'name': 'VB', 'src': '10', 'dst': '17' }, { 'name': 'VBD', 'src': '10', 'dst': '18' }, { 'name': 'VBG', 'src': '10', 'dst': '19' }, { 'name': 'VBN', 'src': '10', 'dst': '20' }, { 'name': 'VBP', 'src': '10', 'dst': '21' }, { 'name': 'VBZ', 'src': '10', 'dst': '22' }, { 'name': 'VB', 'src': '11', 'dst': '17' }, { 'name': 'VBD', 'src': '11', 'dst': '18' }, { 'name': 'VBG', 'src': '11', 'dst': '19' }, { 'name': 'VBN', 'src': '11', 'dst': '20' }, { 'name': 'VBP', 'src': '11', 'dst': '21' }, { 'name': 'VBZ', 'src': '11', 'dst': '22' }, { 'name': 'VB', 'src': '12', 'dst': '17' }, { 'name': 'VBD', 'src': '12', 'dst': '18' }, { 'name': 'VBG', 'src': '12', 'dst': '19' }, { 'name': 'VBN', 'src': '12', 'dst': '20' }, { 'name': 'VBP', 'src': '12', 'dst': '21' }, { 'name': 'VBZ', 'src': '12', 'dst': '22' }, { 'name': 'VB', 'src': '13', 'dst': '17' }, { 'name': 'VBD', 'src': '13', 'dst': '18' }, { 'name': 'VBG', 'src': '13', 'dst': '19' }, { 'name': 'VBN', 'src': '13', 'dst': '20' }, { 'name': 'VBP', 'src': '13', 'dst': '21' }, { 'name': 'VBZ', 'src': '13', 'dst': '22' }, { 'name': 'VB', 'src': '14', 'dst': '17' }, { 'name': 'VBD', 'src': '14', 'dst': '18' }, { 'name': 'VBG', 'src': '14', 'dst': '19' }, { 'name': 'VBN', 'src': '14', 'dst': '20' }, { 'name': 'VBP', 'src': '14', 'dst': '21' }, { 'name': 'VBZ', 'src': '14', 'dst': '22' }] }) high_final_states = ['3', '15', '16', '17', '18', '19', '20', '21', '22'] low_final_states = ['4'] final = [] fsm.current = '0' new_temp = "" for j in tagged_sentence: try: fsm.trigger(j[1]) new_temp += j[0] + " " except: fsm.current = '0' new_temp = "" finally: if (fsm.current in high_final_states): fsm.current = "0" new_temp = new_temp[:-1] #final.append(new_temp) final.append(english_postagger.tag(new_temp.split())) new_temp = "" elif (fsm.current in low_final_states): fsm.current = "0" new_temp = new_temp[:-1] #final.append(new_temp) final.append(english_postagger.tag(new_temp.split())) new_temp = "" s = " " return final