Exemple #1
0
def test1():
    nt1 = Nonterminal('NP')
    nt2 = Nonterminal('VP')
     	
    print nt1.symbol()
     	
    S, NP, VP, PP = nonterminals('S, NP, VP, PP')
    N, V, P, DT = nonterminals('N, V, P, DT')
     	
    prod1 = Production(S, [NP, VP])
    prod2 = Production(NP, [DT, NP])
     	
    print prod1.lhs() 	
    print prod1.rhs() 	
    print prod1 == Production(S, [NP, VP]) 	
    print prod1 == prod2
Exemple #2
0
def convert2_nltk_CFG(G):
    terminals, NTs, P, S = G
    Prod = copy(P)
    # this is here to ensure full coverage of terminals
    # when parsing the grammar for testing
    Prod["DUMMY"] = [list(map(lambda x: (x, ), terminals))]
    assert len(S) > 0  # need a start symbol
    if len(S) > 1:
        if "NT0" not in Prod.keys():
            Prod["NT0"] = []
        for Si in S:
            Prod["NT0"].append([(Si, )])
    assert "NT0" in S
    start = nltk.Nonterminal("NT0")
    nltk_nts = nltk.nonterminals(" ".join(list(NTs)))
    productions = []
    # only look at nonterminals with productions
    for NT in Prod.keys():
        for rule in Prod[NT]:
            rhs = rule_to_tuple(rule, NTs)
            #print("convert", NT, rhs)
            prod = nltk.Production(nltk.Nonterminal(NT), rhs)
            productions.append(prod)
    # production is empty here...
    return nltk.grammar.CFG(start, productions)
Exemple #3
0
def update_grammar(productions, unknown):
    lis = pos_tagger.tag(unknown)
    for i in range(len(lis)):
        pos = nonterminals(lis[i][1])[0]
        production_ = Production(pos, [unknown[i]])
        productions.append(production_)
        print production_, "added to productions"

    S = Nonterminal('SENT')
    grammar = induce_pcfg(S, productions)

    return grammar
Exemple #4
0
def cfg_demo():
    """
    A demonstration showing how C{ContextFreeGrammar}s can be created and used.
    """

    from nltk import nonterminals, Production, parse_cfg

    # Create some nonterminals
    S, NP, VP, PP = nonterminals('S, NP, VP, PP')
    N, V, P, Det = nonterminals('N, V, P, Det')
    VP_slash_NP = VP / NP

    print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP]
    print '    S.symbol() =>', ` S.symbol() `
    print

    print Production(S, [NP])

    # Create some Grammar Productions
    grammar = parse_cfg("""
      S -> NP VP
      PP -> P NP
      NP -> Det N | NP PP
      VP -> V NP | VP PP
      Det -> 'a' | 'the'
      N -> 'dog' | 'cat'
      V -> 'chased' | 'sat'
      P -> 'on' | 'in'
    """)

    print 'A Grammar:', ` grammar `
    print '    grammar.start()       =>', ` grammar.start() `
    print '    grammar.productions() =>',
    # Use string.replace(...) is to line-wrap the output.
    print ` grammar.productions() `.replace(',', ',\n' + ' ' * 25)
    print

    print 'Coverage of input words by a grammar:'
    print grammar.covers(['a', 'dog'])
    print grammar.covers(['a', 'toy'])
def CFG_grammar():
    GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE')
    usr_goal = ENTITY_PLACE
    usr_find = GOAL_FIND
    VP,NP,O = nonterminals('VP,NP,O')

    # Build a CFG based on the symbols that generated above.
    grammar = CFG.fromstring("""
    VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE
    NP -> P ENTITY_PLACE | ENTITY_PLACE
    GOAL_FIND -> 'find'
    GOAL_FIND  -> 'show'
    GOAL_FIND  -> 'tell'
    O -> 'me'
    P -> 'in'
    ENTITY_PLACE -> 'starbucks'
    ENTITY_PLACE -> 'the starbucks'
    ENTITY_PLACE -> 'a starbucks'
    ENTITY_PLACE -> 'coffee bean'
    ENTITY_PLACE -> 'the coffee bean'
    ENTITY_PLACE -> 'a coffee bean'

    """)
    return grammar
def Theona():

    intro1, intro2, intro3 = sentence_generation('open')
    audio_play('boost.wav')
    os.system(intro1)

    train_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
    print('Training data... It will take 2-4 minutes.')
    chunker = ConsecutiveNPChunker(train_sents)
    os.system(intro2)

    # Theona Introduction
    audio_play('start_up.wav')
    os.system(intro3)

    # Step1. ASR
    # Use recognizer to record the speech.
    recorder = sr.Recognizer()
    starting = sentence_generation('hello')
    with sr.Microphone() as mike:
        print('Hello. Please speaking.')
        audio_play('pong.wav')
        os.system(starting)
        my_sound = recorder.listen(mike)

    print('Processing...')

    # Speech signal to text. Supported by google Speech api: Internet needs to be connected.
    tmp_words = recorder.recognize_google(my_sound)
    words = str(tmp_words)

    # test printing...
    print(words)

    # Step2. SLU
    # 1. find the specific places to users.
    #words = 'show me starbucks'

    # Tokenize the sentence.
    tokenized = word_tokenize(words)

    # Parsing the sentence to find out goal and entity clearly.
    pos_tagged = nltk.pos_tag(tokenized)
    chunk_words = chunker.parse(pos_tagged)
    reorder_words = tree_reconstruct(chunk_words)

    # Build the grammar for parsing.
    GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE')
    usr_goal = ENTITY_PLACE
    usr_find = GOAL_FIND
    VP,NP,O = nonterminals('VP,NP,O')

    grammar = CFG_grammar()
    rd_parser = RecursiveDescentParser(grammar)

    # Parsing the sentence.
    parsed_words = []
    for parsing in rd_parser.parse(reorder_words):
        print(parsing)

    # Find GOAL and ENTITY
    for detect in parsing:
        if detect.label() == 'GOAL_FIND':
            usr_goal = detect.leaves()[0]
        if detect.label() == 'ENTITY_PLACE':
            usr_place = detect.leaves()[0]

    finding = sentence_generation('finding')
    finding = re.sub('<place>',usr_place,finding)
    audio_play('tone.wav')
    os.system(finding)

    # 2. Provide weather information to users.

    # Step3. DM
    # Collect information from the internet.
    # Location
    google_url = "https://www.google.co.kr/?gfe_rd=cr&ei=8YoTV-OdF8WL8AWGp5DgDg&gws_rd=ssl#newwindow=1&q="
    daum_url = 'http://search.daum.net/search?w=tot&DA=YZR&t__nil_searchbox=btn&sug=&sugo=&sq=&o=&q='

    # Connect to the internet to proceed the users' request: goal and entity.
    if usr_goal == 'find':
        # Searching in Daum.
        usr_request_url = daum_url + usr_place + '&tltm=1'
        request = requests.get(usr_request_url)
        soup = BeautifulSoup(request.content,'html.parser')

        # Searching in Google.
        #usr_request_url = google_url + usr_place
        #request = requests.get(usr_request_url)
        #soup = BeautifulSoup(request)

    # Collect information.
    # Find the closest 5 places around the location in which you start to request.
    all_data = soup.find_all('div',{'class','cont_place'})

    first_data = all_data[0]

    # Address
    address_info = all_data[0].find_all('a',{'class','more_address'})[0].text
    # Phone Number
    phone_info = all_data[0].find_all('span',{'class','f_url'})[0].text
    # Location (map)
    map_info = all_data[0].find('a').get('href')

    # Weather



    # Step4. NLG
    # Generate an appropriate sentence.
    answer_text = NLG_transoformation('find')

    # Adjust the words if it is Korean.
    address_info = lang_adjust(address_info)

    # Substitude the markers to proper words
    answer_text = re.sub('<place>',usr_place,answer_text)
    answer_text = re.sub('<address>',address_info,answer_text)
    answer_text = re.sub('<phone>',phone_info,answer_text)

    # Step5. TTS
    audio_play('tone.wav')
    os.system('say ' + answer_text)
# -*- coding: utf-8 -*-
"""
Created on Wed Dec  9 13:04:57 2020

@author: Rahul Kothuri, Isak Nyberg
"""
import nltk
from nltk import Nonterminal, nonterminals, Production, CFG
w1 = Nonterminal("NP")
w2 = Nonterminal("VP")
S, NP, VP = nonterminals('S,NP,VP')
NLN, LN, V, LNP, DT, VBP, Adj, VBZ, RB = nonterminals(
    'NLN,LN,V,LNP, DT, VBP, Adj,VBZ,RB')
prod1 = Production(S, [NP, VP])
prod2 = Production(NP, [DT, NP])
grammar = CFG.fromstring("""
    S ->  NP VP 
    NP -> Det LN | Det NLN | Det LNP
    VP -> V NP | VBP Adj | VBZ Adj | V RB | V | VBZ NP
    Det -> 'The'
    Det -> 'A'
    Det -> 'the'
    Det -> 'that'
    Det -> 'Those'
    LN -> 'girl' | 'boy' | 'dog' 
    LNP -> 'boys'
    NLN -> 'house' | 'crackers'
    V -> 'eats'
    V -> 'run' | 'runs'
    VBP -> 'are'
    VBZ -> 'is'
上下文无关文法取名为“上下文无关”的原因就是因为字符 A 总可以被字符串 α 自由替换,而无需考虑字符 A 出现的上下文。
一个CFG由以下部分组成:
    非终结符的有限集合(N)
    终结符的有限集合(T)
    开始符号(S)
    产生式的有限集合(P),形如:A->a
"""
# 非终结符
nonterminal1 = Nonterminal('NP')
nonterminal2 = Nonterminal('VP')
nonterminal3 = Nonterminal('PP')
print((nonterminal1 == nonterminal2))
print((nonterminal2 == nonterminal3))
print((nonterminal1 == nonterminal3))

S, NP, VP, PP = nonterminals('S, NP, VP, PP')
N, V, P, DT = nonterminals('N, V, P, DT')
# 产生式
production1 = Production(S, [NP, VP])
production2 = Production(NP, [DT, NP])
production3 = Production(VP, [V, NP, NP, PP])
print(production1.lhs(), production1.rhs())
print(production2.lhs(), production2.rhs())
print(production3.lhs(), production3.rhs())

# 语法解析
gram1 = nltk.data.load('grammars/large_grammars/atis.cfg')
# print(gram1)
sent = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sent = nltk.parse.util.extract_test_sentences(sent)
testingsent = sent[25]
Exemple #9
0
import nltk
from nltk import Nonterminal, nonterminals, Production, CFG
nonterminal1 = Nonterminal('NP')
nonterminal2 = Nonterminal('VP')
nonterminal3 = Nonterminal('PP')
print(nonterminal1.symbol())
print(nonterminal2.symbol())
print(nonterminal3.symbol())
print(nonterminal1==nonterminal2)
print(nonterminal2==nonterminal3)
print(nonterminal1==nonterminal3)
S, NP, VP, PP = nonterminals('S, NP, VP, PP')
N, V, P, DT = nonterminals('N, V, P, DT')
production1 = Production(S, [NP, VP])
production2 = Production(NP, [DT, NP])
production3 = Production(VP, [V, NP,NP,PP])
print(production1.lhs())
print(production1.rhs())
print(production3.lhs())
print(production3.rhs())
print(production3 == Production(VP, [V,NP,NP,PP]))
print(production2 == production3)

Exemple #10
0
# Speech signal to text. Supported by google Speech api: Internet needs to be connected.
tmp_words = recorder.recognize_google(my_sound)
words = str(tmp_words)

# test printing...
print(words)

# Step2. SLU
# 1. find the specific places to users.
#words = 'show me starbucks'

# Tokenize the sentence.
tokenized = word_tokenize(words)

# Build the grammar for parsing.
GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE')
usr_goal = ENTITY_PLACE
usr_find = GOAL_FIND
VP,NP,O = nonterminals('VP,NP,O')

grammar = CFG.fromstring("""
VP -> GOAL_FIND O ENTITY_PLACE | GOAL_FIND ENTITY_PLACE
NP -> P ENTITY_PLACE | ENTITY_PLACE
GOAL_FIND -> 'find'
GOAL_FIND  -> 'show'
GOAL_FIND  -> 'tell'
O -> 'me'
P -> 'in'
ENTITY_PLACE -> 'starbucks'
ENTITY_PLACE -> 'Starbucks'
ENTITY_PLACE -> 'Coffee Bean'
from nltk import nonterminals, Production, parse_cfg
import generate
# Create some nonterminals
S, NP, VP, PP = nonterminals('S, NP, VP, PP')
N, V, P, Det = nonterminals('N, V, P, Det')
VP_slash_NP = VP/NP
# Create some Grammar Productions
grammar = parse_cfg(
    """
    S -> NP VP
    PP -> P NP
    NP -> Det N | NP PP
    VP -> V NP | VP PP
    Det -> 'a' | 'the'
    N -> 'boy' | 'girl'
    V -> 'chased' | 'sat'
    P -> 'on' | 'in' | 'to'
    """)

Exemple #12
0
from nltk import PCFG, Tree
from nltk import nonterminals, Nonterminal, Production

import random
from generator import generate

# Create some nonterminals
S, NP, VP, AdjP, NP_pron, N, V, P, Det, Adj, Pron, PLex, NP_PP, NPSg, NPPl, NSg, NPl, Vsg, Vpl, VPSg, VPPl, PronSg, PronPl, DetSg, DetPl, NPobj, PronObj, PPSg, PPPl, NPObjSg, NPObjPl = nonterminals('S, NP, VP, AdjP, NP_pron, N, V, P, Det, Adj, Pron, PLex, NP_PP, NPSg, NPPl, NSg, NPl, Vsg, Vpl, VPSg, VPPl, PronSg, PronPl, DetSg, DetPl, NPobj, PronObj, PPSg, PPPl, NPObjSg, NPObjPl')

pcfg_agreement_pp = PCFG.fromstring("""
    S -> PP NPSg VSg [0.1] | PP NPPl VPl [0.1]
    S -> NPSg VSg [0.4]
    S -> NPPl VPl [0.4]    
    VSg -> 'laughs' [0.4] | 'dances' [0.2] | 'hopes' [0.15] | 'burps' [0.1] | 'coughs' [0.1] | 'dies' [0.05]
    VPl -> 'laugh' [0.4] | 'dance' [0.2] | 'hope' [0.15] | 'burp' [0.1] | 'cough' [0.1] | 'die' [0.05]
    P -> 'near' [0.7] | 'with' [0.3]
    PP -> P NPObj [1.0]
    NPObj -> PronObj [0.2] | DetSg NSg [0.2] | DetSg AdjP NSg [0.1] | DetSg NSg PP [0.1] | DetPl NPl [0.2] | DetPl AdjP NPl [0.1] | DetPl NPl PP [0.1] 
    NPSg -> PronSg [0.2] | DetSg NSg [0.4] | DetSg AdjP NSg [0.2] | DetSg NSg PP [0.2]
    NPPl -> PronPl [0.2] | DetPl NPl [0.4] | DetPl AdjP NPl [0.2] | DetPl NPl PP [0.2]
    DetSg -> 'the' [0.5] | 'a' [0.5]
    DetPl -> 'the' [0.8] | 'most' [0.2]
    NSg -> 'zebra' [0.4] | 'badger' [0.2] | 'chicken' [0.15] | 'dog' [0.1] | 'robin' [0.1] | 'frog' [0.05]
    NPl -> 'zebras' [0.4] | 'badgers' [0.2] | 'chickens' [0.15] | 'dogs' [0.1] | 'robins' [0.1] | 'frogs' [0.05]
    AdjP -> Adj [0.7] | Adj AdjP [0.3]
    Adj -> 'gentle' [0.4] | 'humble' [0.2] | 'clever' [0.15]  | 'jocular' [0.1] | 'kindly' [0.1] | 'lovely' [0.05]
    PronSg -> 'he' [0.5] |  'she' [0.5] 
    PronPl -> 'they' [1.0] 
    PronObj -> 'him' [.33] | 'her' [.33] | 'them' [.34]
""")
Exemple #13
0
print("Probabilistic Tree:")
print(pt)
pt.draw()

# In[ ]:

## Grammar tools
import nltk
from nltk import Nonterminal, nonterminals, Production, CFG
nonterminal1 = Nonterminal('NP')
nonterminal2 = Nonterminal('VP')
nonterminal3 = Nonterminal('PP')
print(nonterminal1.symbol())
print(nonterminal2.symbol())
print(nonterminal1 == nonterminal2)
S, NP, VP, PP = nonterminals(
    'S, NP, VP, PP')  ## use nonterminals to generate a list
N, V, P, DT = nonterminals('N, V, P, DT')
production1 = Production(S, [NP, VP])
production2 = Production(NP, [DT, NP])
production3 = Production(VP, [V, NP, NP, PP])
print(production1.lhs())
print(production1.rhs())
print(production3 == Production(VP, [V, NP, NP, PP]))

# In[ ]:

nltk.download('large_grammars')

# In[ ]:

### ATIS grammer