def test1(): nt1 = Nonterminal('NP') nt2 = Nonterminal('VP') print nt1.symbol() S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') prod1 = Production(S, [NP, VP]) prod2 = Production(NP, [DT, NP]) print prod1.lhs() print prod1.rhs() print prod1 == Production(S, [NP, VP]) print prod1 == prod2
def convert2_nltk_CFG(G): terminals, NTs, P, S = G Prod = copy(P) # this is here to ensure full coverage of terminals # when parsing the grammar for testing Prod["DUMMY"] = [list(map(lambda x: (x, ), terminals))] assert len(S) > 0 # need a start symbol if len(S) > 1: if "NT0" not in Prod.keys(): Prod["NT0"] = [] for Si in S: Prod["NT0"].append([(Si, )]) assert "NT0" in S start = nltk.Nonterminal("NT0") nltk_nts = nltk.nonterminals(" ".join(list(NTs))) productions = [] # only look at nonterminals with productions for NT in Prod.keys(): for rule in Prod[NT]: rhs = rule_to_tuple(rule, NTs) #print("convert", NT, rhs) prod = nltk.Production(nltk.Nonterminal(NT), rhs) productions.append(prod) # production is empty here... return nltk.grammar.CFG(start, productions)
def update_grammar(productions, unknown): lis = pos_tagger.tag(unknown) for i in range(len(lis)): pos = nonterminals(lis[i][1])[0] production_ = Production(pos, [unknown[i]]) productions.append(production_) print production_, "added to productions" S = Nonterminal('SENT') grammar = induce_pcfg(S, productions) return grammar
def cfg_demo(): """ A demonstration showing how C{ContextFreeGrammar}s can be created and used. """ from nltk import nonterminals, Production, parse_cfg # Create some nonterminals S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, Det = nonterminals('N, V, P, Det') VP_slash_NP = VP / NP print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP] print ' S.symbol() =>', ` S.symbol() ` print print Production(S, [NP]) # Create some Grammar Productions grammar = parse_cfg(""" S -> NP VP PP -> P NP NP -> Det N | NP PP VP -> V NP | VP PP Det -> 'a' | 'the' N -> 'dog' | 'cat' V -> 'chased' | 'sat' P -> 'on' | 'in' """) print 'A Grammar:', ` grammar ` print ' grammar.start() =>', ` grammar.start() ` print ' grammar.productions() =>', # Use string.replace(...) is to line-wrap the output. print ` grammar.productions() `.replace(',', ',\n' + ' ' * 25) print print 'Coverage of input words by a grammar:' print grammar.covers(['a', 'dog']) print grammar.covers(['a', 'toy'])
def CFG_grammar(): GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE') usr_goal = ENTITY_PLACE usr_find = GOAL_FIND VP,NP,O = nonterminals('VP,NP,O') # Build a CFG based on the symbols that generated above. grammar = CFG.fromstring(""" VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE NP -> P ENTITY_PLACE | ENTITY_PLACE GOAL_FIND -> 'find' GOAL_FIND -> 'show' GOAL_FIND -> 'tell' O -> 'me' P -> 'in' ENTITY_PLACE -> 'starbucks' ENTITY_PLACE -> 'the starbucks' ENTITY_PLACE -> 'a starbucks' ENTITY_PLACE -> 'coffee bean' ENTITY_PLACE -> 'the coffee bean' ENTITY_PLACE -> 'a coffee bean' """) return grammar
def Theona(): intro1, intro2, intro3 = sentence_generation('open') audio_play('boost.wav') os.system(intro1) train_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP']) print('Training data... It will take 2-4 minutes.') chunker = ConsecutiveNPChunker(train_sents) os.system(intro2) # Theona Introduction audio_play('start_up.wav') os.system(intro3) # Step1. ASR # Use recognizer to record the speech. recorder = sr.Recognizer() starting = sentence_generation('hello') with sr.Microphone() as mike: print('Hello. Please speaking.') audio_play('pong.wav') os.system(starting) my_sound = recorder.listen(mike) print('Processing...') # Speech signal to text. Supported by google Speech api: Internet needs to be connected. tmp_words = recorder.recognize_google(my_sound) words = str(tmp_words) # test printing... print(words) # Step2. SLU # 1. find the specific places to users. #words = 'show me starbucks' # Tokenize the sentence. tokenized = word_tokenize(words) # Parsing the sentence to find out goal and entity clearly. pos_tagged = nltk.pos_tag(tokenized) chunk_words = chunker.parse(pos_tagged) reorder_words = tree_reconstruct(chunk_words) # Build the grammar for parsing. GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE') usr_goal = ENTITY_PLACE usr_find = GOAL_FIND VP,NP,O = nonterminals('VP,NP,O') grammar = CFG_grammar() rd_parser = RecursiveDescentParser(grammar) # Parsing the sentence. parsed_words = [] for parsing in rd_parser.parse(reorder_words): print(parsing) # Find GOAL and ENTITY for detect in parsing: if detect.label() == 'GOAL_FIND': usr_goal = detect.leaves()[0] if detect.label() == 'ENTITY_PLACE': usr_place = detect.leaves()[0] finding = sentence_generation('finding') finding = re.sub('<place>',usr_place,finding) audio_play('tone.wav') os.system(finding) # 2. Provide weather information to users. # Step3. DM # Collect information from the internet. # Location google_url = "https://www.google.co.kr/?gfe_rd=cr&ei=8YoTV-OdF8WL8AWGp5DgDg&gws_rd=ssl#newwindow=1&q=" daum_url = 'http://search.daum.net/search?w=tot&DA=YZR&t__nil_searchbox=btn&sug=&sugo=&sq=&o=&q=' # Connect to the internet to proceed the users' request: goal and entity. if usr_goal == 'find': # Searching in Daum. usr_request_url = daum_url + usr_place + '&tltm=1' request = requests.get(usr_request_url) soup = BeautifulSoup(request.content,'html.parser') # Searching in Google. #usr_request_url = google_url + usr_place #request = requests.get(usr_request_url) #soup = BeautifulSoup(request) # Collect information. # Find the closest 5 places around the location in which you start to request. all_data = soup.find_all('div',{'class','cont_place'}) first_data = all_data[0] # Address address_info = all_data[0].find_all('a',{'class','more_address'})[0].text # Phone Number phone_info = all_data[0].find_all('span',{'class','f_url'})[0].text # Location (map) map_info = all_data[0].find('a').get('href') # Weather # Step4. NLG # Generate an appropriate sentence. answer_text = NLG_transoformation('find') # Adjust the words if it is Korean. address_info = lang_adjust(address_info) # Substitude the markers to proper words answer_text = re.sub('<place>',usr_place,answer_text) answer_text = re.sub('<address>',address_info,answer_text) answer_text = re.sub('<phone>',phone_info,answer_text) # Step5. TTS audio_play('tone.wav') os.system('say ' + answer_text)
# -*- coding: utf-8 -*- """ Created on Wed Dec 9 13:04:57 2020 @author: Rahul Kothuri, Isak Nyberg """ import nltk from nltk import Nonterminal, nonterminals, Production, CFG w1 = Nonterminal("NP") w2 = Nonterminal("VP") S, NP, VP = nonterminals('S,NP,VP') NLN, LN, V, LNP, DT, VBP, Adj, VBZ, RB = nonterminals( 'NLN,LN,V,LNP, DT, VBP, Adj,VBZ,RB') prod1 = Production(S, [NP, VP]) prod2 = Production(NP, [DT, NP]) grammar = CFG.fromstring(""" S -> NP VP NP -> Det LN | Det NLN | Det LNP VP -> V NP | VBP Adj | VBZ Adj | V RB | V | VBZ NP Det -> 'The' Det -> 'A' Det -> 'the' Det -> 'that' Det -> 'Those' LN -> 'girl' | 'boy' | 'dog' LNP -> 'boys' NLN -> 'house' | 'crackers' V -> 'eats' V -> 'run' | 'runs' VBP -> 'are' VBZ -> 'is'
上下文无关文法取名为“上下文无关”的原因就是因为字符 A 总可以被字符串 α 自由替换,而无需考虑字符 A 出现的上下文。 一个CFG由以下部分组成: 非终结符的有限集合(N) 终结符的有限集合(T) 开始符号(S) 产生式的有限集合(P),形如:A->a """ # 非终结符 nonterminal1 = Nonterminal('NP') nonterminal2 = Nonterminal('VP') nonterminal3 = Nonterminal('PP') print((nonterminal1 == nonterminal2)) print((nonterminal2 == nonterminal3)) print((nonterminal1 == nonterminal3)) S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') # 产生式 production1 = Production(S, [NP, VP]) production2 = Production(NP, [DT, NP]) production3 = Production(VP, [V, NP, NP, PP]) print(production1.lhs(), production1.rhs()) print(production2.lhs(), production2.rhs()) print(production3.lhs(), production3.rhs()) # 语法解析 gram1 = nltk.data.load('grammars/large_grammars/atis.cfg') # print(gram1) sent = nltk.data.load('grammars/large_grammars/atis_sentences.txt') sent = nltk.parse.util.extract_test_sentences(sent) testingsent = sent[25]
import nltk from nltk import Nonterminal, nonterminals, Production, CFG nonterminal1 = Nonterminal('NP') nonterminal2 = Nonterminal('VP') nonterminal3 = Nonterminal('PP') print(nonterminal1.symbol()) print(nonterminal2.symbol()) print(nonterminal3.symbol()) print(nonterminal1==nonterminal2) print(nonterminal2==nonterminal3) print(nonterminal1==nonterminal3) S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') production1 = Production(S, [NP, VP]) production2 = Production(NP, [DT, NP]) production3 = Production(VP, [V, NP,NP,PP]) print(production1.lhs()) print(production1.rhs()) print(production3.lhs()) print(production3.rhs()) print(production3 == Production(VP, [V,NP,NP,PP])) print(production2 == production3)
# Speech signal to text. Supported by google Speech api: Internet needs to be connected. tmp_words = recorder.recognize_google(my_sound) words = str(tmp_words) # test printing... print(words) # Step2. SLU # 1. find the specific places to users. #words = 'show me starbucks' # Tokenize the sentence. tokenized = word_tokenize(words) # Build the grammar for parsing. GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE') usr_goal = ENTITY_PLACE usr_find = GOAL_FIND VP,NP,O = nonterminals('VP,NP,O') grammar = CFG.fromstring(""" VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE NP -> P ENTITY_PLACE | ENTITY_PLACE GOAL_FIND -> 'find' GOAL_FIND -> 'show' GOAL_FIND -> 'tell' O -> 'me' P -> 'in' ENTITY_PLACE -> 'starbucks' ENTITY_PLACE -> 'Starbucks' ENTITY_PLACE -> 'Coffee Bean'
from nltk import nonterminals, Production, parse_cfg import generate # Create some nonterminals S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, Det = nonterminals('N, V, P, Det') VP_slash_NP = VP/NP # Create some Grammar Productions grammar = parse_cfg( """ S -> NP VP PP -> P NP NP -> Det N | NP PP VP -> V NP | VP PP Det -> 'a' | 'the' N -> 'boy' | 'girl' V -> 'chased' | 'sat' P -> 'on' | 'in' | 'to' """)
from nltk import PCFG, Tree from nltk import nonterminals, Nonterminal, Production import random from generator import generate # Create some nonterminals S, NP, VP, AdjP, NP_pron, N, V, P, Det, Adj, Pron, PLex, NP_PP, NPSg, NPPl, NSg, NPl, Vsg, Vpl, VPSg, VPPl, PronSg, PronPl, DetSg, DetPl, NPobj, PronObj, PPSg, PPPl, NPObjSg, NPObjPl = nonterminals('S, NP, VP, AdjP, NP_pron, N, V, P, Det, Adj, Pron, PLex, NP_PP, NPSg, NPPl, NSg, NPl, Vsg, Vpl, VPSg, VPPl, PronSg, PronPl, DetSg, DetPl, NPobj, PronObj, PPSg, PPPl, NPObjSg, NPObjPl') pcfg_agreement_pp = PCFG.fromstring(""" S -> PP NPSg VSg [0.1] | PP NPPl VPl [0.1] S -> NPSg VSg [0.4] S -> NPPl VPl [0.4] VSg -> 'laughs' [0.4] | 'dances' [0.2] | 'hopes' [0.15] | 'burps' [0.1] | 'coughs' [0.1] | 'dies' [0.05] VPl -> 'laugh' [0.4] | 'dance' [0.2] | 'hope' [0.15] | 'burp' [0.1] | 'cough' [0.1] | 'die' [0.05] P -> 'near' [0.7] | 'with' [0.3] PP -> P NPObj [1.0] NPObj -> PronObj [0.2] | DetSg NSg [0.2] | DetSg AdjP NSg [0.1] | DetSg NSg PP [0.1] | DetPl NPl [0.2] | DetPl AdjP NPl [0.1] | DetPl NPl PP [0.1] NPSg -> PronSg [0.2] | DetSg NSg [0.4] | DetSg AdjP NSg [0.2] | DetSg NSg PP [0.2] NPPl -> PronPl [0.2] | DetPl NPl [0.4] | DetPl AdjP NPl [0.2] | DetPl NPl PP [0.2] DetSg -> 'the' [0.5] | 'a' [0.5] DetPl -> 'the' [0.8] | 'most' [0.2] NSg -> 'zebra' [0.4] | 'badger' [0.2] | 'chicken' [0.15] | 'dog' [0.1] | 'robin' [0.1] | 'frog' [0.05] NPl -> 'zebras' [0.4] | 'badgers' [0.2] | 'chickens' [0.15] | 'dogs' [0.1] | 'robins' [0.1] | 'frogs' [0.05] AdjP -> Adj [0.7] | Adj AdjP [0.3] Adj -> 'gentle' [0.4] | 'humble' [0.2] | 'clever' [0.15] | 'jocular' [0.1] | 'kindly' [0.1] | 'lovely' [0.05] PronSg -> 'he' [0.5] | 'she' [0.5] PronPl -> 'they' [1.0] PronObj -> 'him' [.33] | 'her' [.33] | 'them' [.34] """)
print("Probabilistic Tree:") print(pt) pt.draw() # In[ ]: ## Grammar tools import nltk from nltk import Nonterminal, nonterminals, Production, CFG nonterminal1 = Nonterminal('NP') nonterminal2 = Nonterminal('VP') nonterminal3 = Nonterminal('PP') print(nonterminal1.symbol()) print(nonterminal2.symbol()) print(nonterminal1 == nonterminal2) S, NP, VP, PP = nonterminals( 'S, NP, VP, PP') ## use nonterminals to generate a list N, V, P, DT = nonterminals('N, V, P, DT') production1 = Production(S, [NP, VP]) production2 = Production(NP, [DT, NP]) production3 = Production(VP, [V, NP, NP, PP]) print(production1.lhs()) print(production1.rhs()) print(production3 == Production(VP, [V, NP, NP, PP])) # In[ ]: nltk.download('large_grammars') # In[ ]: ### ATIS grammer