コード例 #1
0
ファイル: try_parse1.py プロジェクト: folagit/resumatcher
def test2():
    
    from nltk.parse import RecursiveDescentParser
    rd = RecursiveDescentParser(grammar)
    sentence1 = 'the cat chased the dog'.split()
    sentence2 = 'the cat chased the dog on the rug'.split()

 
    print rd.parse(sentence2)
コード例 #2
0
class GridGen():
    def __init__(self):
        PrWd_rules = []
        PrWd_rules_old = ['']
        for i in range(7):  # set maximum level-1 length
            PrWd_rules_new = [x +' '+ y for x in PrWd_rules_old \
                for y in ['x', 'o']] # level-1 grid
            PrWd_rules += PrWd_rules_new
            PrWd_rules_old = PrWd_rules_new[:]

        PrWd_rules = ['PrWd -> ' + x for x in PrWd_rules]
        # Culminativity (at least one level-1 grid mark)
        PrWd_rules = [x for x in PrWd_rules if re.search('x', x)]

        # Expansions of syllable preterminals
        Term_rules = ['x -> "σ"', 'o -> "σ"']

        grammar_rules = PrWd_rules + Term_rules
        grammar_rulestr = '\n'.join(grammar_rules)
        grammar = CFG.fromstring(grammar_rulestr)
        print(f'# of productions in grammar: {len(grammar.productions())}')

        self.grammar = grammar
        self.parser = RecursiveDescentParser(grammar)

    def parses(self, inpt):
        T = [t for t in self.parser.parse(inpt.split())]
        T = [ParentedTree.convert(t) for t in T]
        return T
コード例 #3
0
ファイル: cfg.py プロジェクト: WangGitHubWei/learn_nlp
    def cfg_en(self):
        print("test_nltk_cfg_en")  # 定义英文语法规则
        grammar = nltk.CFG.fromstring("""
         S -> NP VP
         VP -> V NP | V NP PP
         V -> "saw" | "ate"
         NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
         Det -> "a" | "an" | "the" | "my"
         N -> "dog" | "cat" | "cookie" | "park"
         PP -> P NP
         P -> "in" | "on" | "by" | "with"
         """)

        sent = "Mary saw Bob".split()

        rd_parser = RecursiveDescentParser(grammar)

        result = []

        for i, tree in enumerate(rd_parser.parse(sent)):
            result.append(tree)

        assert len(result) > 0, " CFG tree parse fail."

        print(result)
コード例 #4
0
ファイル: cfg.py プロジェクト: WangGitHubWei/learn_nlp
    def test_nltk_cfg_qtype(self):
        print("test_nltk_cfg_qtype")
        gfile = os.path.join(curdir, os.path.pardir, "config",
                             "grammar.question-type.cfg")
        question_grammar = nltk.data.load('file:%s' % gfile)

        def get_missing_words(grammar, tokens):
            """
            Find list of missing tokens not covered by grammar
            """
            missing = [
                tok for tok in tokens if not grammar._lexical_index.get(tok)
            ]
            return missing

        sentence = "what is your name"

        sent = sentence.split()
        missing = get_missing_words(question_grammar, sent)
        target = []
        for x in sent:
            if x in missing:
                continue
            target.append(x)

        rd_parser = RecursiveDescentParser(question_grammar)
        result = []
        print("target: ", target)
        for tree in rd_parser.parse(target):
            result.append(x)
            print("Question Type\n", tree)

        if len(result) == 0:
            print("Not Question Type")
コード例 #5
0
ファイル: cfg.py プロジェクト: WangGitHubWei/learn_nlp
    def cfg_zh(self):

        grammar = nltk.CFG.fromstring("""
             S -> N VP
             VP -> V NP | V NP | V N
             V -> "尊敬"
             N -> "我们" | "老师" 
             """)

        sent = "我们 尊敬 老师".split()
        rd_parser = RecursiveDescentParser(grammar)

        result = []

        for i, tree in enumerate(rd_parser.parse(sent)):
            result.append(tree)
            print("Tree [%s]: %s" % (i + 1, tree))

        assert len(result) > 0, "Can not recognize CFG tree."
        if len(result) == 1:
            print("Draw tree with Display ...")
            result[0].draw()
        else:
            print("WARN: Get more then one trees.")

        print(result)
コード例 #6
0
def Process(str, file):
    sr = RecursiveDescentParser(pgrammar)
    r = list(sr.parse(str.split()))
    if len(r) > 0:
        cadResult = GenerateCadFile(ParentedTree.convert(r[0]))
        cadResult.write(file)
    else:
        print("************* " + str)
コード例 #7
0
    def check_syntax(text):
# here we list all possible languages that can be used in the grammar        
        lang_pos = {}

        for l in pycountry.languages:
            p = pycountry.languages.get(name=l.name)
            try:
                alpha_kind = p.alpha_2
                lang_pos[p.name]=alpha_kind
            except:
                pass
 
        lang_command = '''LANG ->'''
        for lg in lang_pos:
            if list(lang_pos.keys()).index(lg) == 0:
                lang_command+=' '+str(lang_pos[lg])+' '
            else:
                lang_command+='''| '{x}' '''.format(x=lang_pos[lg])

# here we list all possible functions , given that the system was already re-written to accomodate 
# the changes        
        func_command = '''FUNC ->'''
        for attr in dir(node_func):
            if dir(node_func).index(attr) == 0:
                func_command+=' '+str(attr)+' '
            else:
                func_command+='''| '{x}' '''.format(x=attr)
# here we substitute the pre made rules in the grammar itself

        grammar = CFG.fromstring(('''
            S -> 'plug' '<' FUNC '>' 'as' LANG | 'unplug' '<' LANG '>'
            command1
            command2
            
            '''.replace('command1',lang_command)).replace('command2',func_command))

        grammar_rd = RecursiveDescentParser(grammar)
# here we check the syntax and the lexical using the already described cfg
        for t in text.split('\n'):
            parsed = []
            try:
                for tree in grammar_rd.parse(t.split()):
                    parsed.append(tree)
                
                    if len(parsed) != 0:
                        print(parsed)
                        pass  
                    else:
                        return 'syntax error'

                return 'parsed'
            except:
                return 'syntax/lexical error'
def sensibility_test(transcribeText, backdoor):
    if backdoor:
        print('Sentence is sensible')
        return 1
    else:
        grammar = nltk.data.load('grammars/book_grammars/drt.cfg')
        # sr = ShiftReduceParser(grammar=grammar)
        rd = RecursiveDescentParser()
        try:
            for t in rd.parse(transcribeText):
                print(t)
            print('Sentence is sensible')
        except:
            print('Sentence is not sensible')
コード例 #9
0
class FootGen():
    def __init__(self):
        # Expansions of PrWd
        PrWd_rules = []
        PrWd_rules_old = ['']
        for i in range(5):
            PrWd_rules_new = [x+' '+y for x in PrWd_rules_old \
                for y in ['MainFt', 'Ft', 'Syll']]
            PrWd_rules += PrWd_rules_new
            PrWd_rules_old = PrWd_rules_new[:]

        PrWd_rules = ['PrWd -> ' + x for x in PrWd_rules]
        # Culminativity (exactly one main-stress foot)
        PrWd_rules = [x for x in PrWd_rules if re.search('Main', x) \
                                and not re.search('Main.*Main', x)]
        #print(len(PrWd_rules))

        # Expansions of (Main)Ft
        MainFt_rules = ['MainFt -> '+y for y in \
            ['MainStressSyll', 'MainStressSyll Syll', 'Syll MainStressSyll']]

        Ft_rules = ['Ft -> '+y for y in \
            ['StressSyll', 'StressSyll Syll', 'Syll StressSyll']]

        # Expansions of (Main)(Stress)Syll
        Syll_rules = ['MainStressSyll -> s1', 'StressSyll -> s2', 'Syll -> s0']

        # Expansions of syllable preterminals
        Term_rules = ['s1 -> "σ"', 's2 -> "σ"', 's0 -> "σ"']

        grammar_rules = PrWd_rules + MainFt_rules \
                        + Ft_rules + Syll_rules + Term_rules
        grammar_rulestr = '\n'.join(grammar_rules)
        grammar = CFG.fromstring(grammar_rulestr)
        print(f'# of productions in grammar: {len(grammar.productions())}')

        self.grammar = grammar
        self.parser = RecursiveDescentParser(grammar)

    def parses(self, inpt):
        T = [t for t in self.parser.parse(inpt.split())]
        T = [ParentedTree.convert(t) for t in T]
        return T
コード例 #10
0
ファイル: cfg.py プロジェクト: WangGitHubWei/learn_nlp
    def test_sample(self):
        print("test_sample")
        # This is a CFG grammar, where:
        # Start Symbol : S
        # Nonterminal : NP,VP,DT,NN,VB
        # Terminal : "I", "a" ,"saw" ,"dog"
        grammar = nltk.grammar.CFG.fromstring("""
            S -> NP VP
            NP -> DT NN | NN
            VP -> VB NP
            DT -> "a"
            NN -> "I" | "dog"
            VB -> "saw"
        """)
        sentence = "I saw a dog".split()
        parser = RecursiveDescentParser(grammar)
        final_tree = parser.parse(sentence)

        for i in final_tree:
            print(i)
コード例 #11
0
def Theona():

    intro1, intro2, intro3 = sentence_generation('open')
    audio_play('boost.wav')
    os.system(intro1)

    train_sents = conll2000.chunked_sents('train.txt', chunk_types=['NP'])
    print('Training data... It will take 2-4 minutes.')
    chunker = ConsecutiveNPChunker(train_sents)
    os.system(intro2)

    # Theona Introduction
    audio_play('start_up.wav')
    os.system(intro3)

    # Step1. ASR
    # Use recognizer to record the speech.
    recorder = sr.Recognizer()
    starting = sentence_generation('hello')
    with sr.Microphone() as mike:
        print('Hello. Please speaking.')
        audio_play('pong.wav')
        os.system(starting)
        my_sound = recorder.listen(mike)

    print('Processing...')

    # Speech signal to text. Supported by google Speech api: Internet needs to be connected.
    tmp_words = recorder.recognize_google(my_sound)
    words = str(tmp_words)

    # test printing...
    print(words)

    # Step2. SLU
    # 1. find the specific places to users.
    #words = 'show me starbucks'

    # Tokenize the sentence.
    tokenized = word_tokenize(words)

    # Parsing the sentence to find out goal and entity clearly.
    pos_tagged = nltk.pos_tag(tokenized)
    chunk_words = chunker.parse(pos_tagged)
    reorder_words = tree_reconstruct(chunk_words)

    # Build the grammar for parsing.
    GOAL_FIND,ENTITY_PLACE = nonterminals('GOAL_FIND,ENTITY_PLACE')
    usr_goal = ENTITY_PLACE
    usr_find = GOAL_FIND
    VP,NP,O = nonterminals('VP,NP,O')

    grammar = CFG_grammar()
    rd_parser = RecursiveDescentParser(grammar)

    # Parsing the sentence.
    parsed_words = []
    for parsing in rd_parser.parse(reorder_words):
        print(parsing)

    # Find GOAL and ENTITY
    for detect in parsing:
        if detect.label() == 'GOAL_FIND':
            usr_goal = detect.leaves()[0]
        if detect.label() == 'ENTITY_PLACE':
            usr_place = detect.leaves()[0]

    finding = sentence_generation('finding')
    finding = re.sub('<place>',usr_place,finding)
    audio_play('tone.wav')
    os.system(finding)

    # 2. Provide weather information to users.

    # Step3. DM
    # Collect information from the internet.
    # Location
    google_url = "https://www.google.co.kr/?gfe_rd=cr&ei=8YoTV-OdF8WL8AWGp5DgDg&gws_rd=ssl#newwindow=1&q="
    daum_url = 'http://search.daum.net/search?w=tot&DA=YZR&t__nil_searchbox=btn&sug=&sugo=&sq=&o=&q='

    # Connect to the internet to proceed the users' request: goal and entity.
    if usr_goal == 'find':
        # Searching in Daum.
        usr_request_url = daum_url + usr_place + '&tltm=1'
        request = requests.get(usr_request_url)
        soup = BeautifulSoup(request.content,'html.parser')

        # Searching in Google.
        #usr_request_url = google_url + usr_place
        #request = requests.get(usr_request_url)
        #soup = BeautifulSoup(request)

    # Collect information.
    # Find the closest 5 places around the location in which you start to request.
    all_data = soup.find_all('div',{'class','cont_place'})

    first_data = all_data[0]

    # Address
    address_info = all_data[0].find_all('a',{'class','more_address'})[0].text
    # Phone Number
    phone_info = all_data[0].find_all('span',{'class','f_url'})[0].text
    # Location (map)
    map_info = all_data[0].find('a').get('href')

    # Weather



    # Step4. NLG
    # Generate an appropriate sentence.
    answer_text = NLG_transoformation('find')

    # Adjust the words if it is Korean.
    address_info = lang_adjust(address_info)

    # Substitude the markers to proper words
    answer_text = re.sub('<place>',usr_place,answer_text)
    answer_text = re.sub('<address>',address_info,answer_text)
    answer_text = re.sub('<phone>',phone_info,answer_text)

    # Step5. TTS
    audio_play('tone.wav')
    os.system('say ' + answer_text)
コード例 #12
0
ファイル: simple_sds.py プロジェクト: jaekookang/Programs
GOAL_FIND -> 'find'
GOAL_FIND  -> 'show'
GOAL_FIND  -> 'tell'
O -> 'me'
P -> 'in'
ENTITY_PLACE -> 'starbucks'
ENTITY_PLACE -> 'Starbucks'
ENTITY_PLACE -> 'Coffee Bean'
ENTITY_PLACE -> 'Coffeebean'

""")
rd_parser = RecursiveDescentParser(grammar)

# Parsing the sentence.
parsed_words = []
for parsing in rd_parser.parse(tokenized):
    print(parsing)

# Find GOAL and ENTITY
for detect in parsing:
    if detect.label() == 'GOAL_FIND':
        usr_goal = detect.leaves()[0]
    if detect.label() == 'ENTITY_PLACE':
        usr_place = detect.leaves()[0]

finding = sentence_generation('finding')
finding = re.sub('<place>',usr_place,finding)
os.system(finding)

# 2. Provide weather information to users.
コード例 #13
0
ファイル: parse tree.py プロジェクト: RekhaSundar/Parse-Trees
N -> 'tree'
N -> 'fish'
Adj -> 'angry'
Adj -> 'frightened'
Adj -> 'little'
Adj -> 'tall'
V -> 'chased'
V -> 'said'
V -> 'thought'
V -> 'was'
V -> 'put'
P -> 'on'
""")

# In[4]:

rd = RecursiveDescentParser(grammar1)
sentence1 = 'mary saw a telescope in the park'.split()
for t in rd.parse(sentence1):
    print(t)
t.draw()

# In[ ]:

#before executing this line restart the kernel and clear all outputs
rd = RecursiveDescentParser(grammar2)
sentence2 = 'the bear chased the frightened squirrel'.split()
for s in rd.parse(sentence2):
    print(s)
s.draw()
コード例 #14
0
ファイル: fuzzy_script.py プロジェクト: synth-me/FuzzyScript
    def parser(plain_text,
               set_name={
                   'name': ['x', 'y', 'z'],
                   'iten': ['a', 'b', 'c'],
                   'def': ['m', 'n', 'o']
               }):
        #first we define the cfg that will generate all possible sentences for the language
        # based on the names of set's and ite's name the user have choosen
        # formatting the functions names
        line_grammar = "MEM_FUNC -> "
        counter = 0
        while counter < len(set_name['def']):

            if counter == 0:
                formated_newstring = " '{a}' ".format(
                    a=set_name['def'][counter])
            else:
                formated_newstring = " | '{a}' ".format(
                    a=set_name['def'][counter])

            line_grammar += formated_newstring

            counter += 1

        # formatting for set names
        line_grammar_0 = "NAME -> "
        counter = 0
        while counter < len(set_name['name']):

            if counter == 0:
                formated_newstring = " '{a}' ".format(
                    a=set_name['name'][counter])
            else:
                formated_newstring = " | '{a}' ".format(
                    a=set_name['name'][counter])

            line_grammar_0 += formated_newstring

            counter += 1

        line_grammar_1 = "NAME_I -> "
        counter = 0
        while counter < len(set_name['iten']):

            if counter == 0:
                formated_newstring = " '{a}' ".format(
                    a=set_name['iten'][counter])
            else:
                formated_newstring = " | '{a}' ".format(
                    a=set_name['iten'][counter])

            line_grammar_1 += formated_newstring

            counter += 1

        prime_cloudy_grammar = ((("""

            T ->  COM_D END | INIT_A COM_A END | 'start_cloud{' | '}end_cloud'  
            
            COM_D -> 'name::=' NAME '{' ITEN ';' MEM ';' 
            lacune_1
            lacune_2
            ATTR -> ITEN ';' MEM ';'
            ITEN -> 'iten::=' NAME_I  


            MEM -> 'membership::=' '(' MEM_FUNC ')' 
            lacune_3


            INIT_A -> 'active=>' '{'
            COM_A -> NAME Q NAME | NAME_I O NAME |'plot=>' CONJ 'using:' PLOT_S
            PLOT_S -> 'line' | 'venn'

            CONJ -> NAME Q NAME | NAME 
            Q -> '-u' | '-i' | '-c'
            O -> 'in' | 'out' | '<m>'


            END -> '}end'
            """.replace('lacune_1', line_grammar_0)).replace(
            'lacune_2', line_grammar_1)).replace('lacune_3', line_grammar))

        # using the nltk's tool to generate , we create the formal cfg
        _cloudy_grammar = CFG.fromstring(prime_cloudy_grammar)
        #for sentences_test in generate(_cloudy_grammar,n=200):
        #print(' '.join(sentences_test))
        # then we create the parser for this grammar
        cloudy_rd = RecursiveDescentParser(_cloudy_grammar)
        # split the input text into lines
        code_total = plain_text.split('\n')

        counter = 0
        while counter < len(code_total):
            test = code_total[counter].split()
            # all code must start and end with specific sample of code as follows
            if counter == 0 and 'start_cloud{' in test:
                print("starting parsing")
                pass
            elif counter != 0:
                pass
            else:
                return 'start_cloud statment not found'
# the end cloud statment determines where the parser will stop parsing the code
            if "}end_cloud" in test:
                print('end of parsing')
                return 'end of parsing'
            else:
                pass

            try:
                parsed_check = []
                for parsed in cloudy_rd.parse(test):
                    parsed_check.append(parsed)
# if the length of the list which contain the parsed sentences is equal to 0 then
# it means that the sentence wasnt well, so there's a some syntax error
                if len(parsed_check) != 0:
                    pass
                else:
                    return 'Syntax error on: (' + str(
                        code_total[counter]) + ' ) at line : ' + str(counter)

            except:
                # if some lexical component not allowed is used then the system can recognize it faster
                return 'Lexical error on : (' + str(
                    code_total[counter]) + ') at line : ' + str(counter)

            counter += 1
コード例 #15
0
 def createTree2(self):
     grammar1 = self.makeGrammar()
     rd = RecursiveDescentParser(grammar1)
     for tree in rd.parse(self.myWords):
         print(tree)
コード例 #16
0
ファイル: p1.py プロジェクト: iamchanthu/code
from nltk.corpus import treebank
from nltk import PCFG, CFG
cfg_grammar = CFG.fromstring("""
 S -> NP VP
 NP -> ART N | N N | N | NP PP
 VP -> V | V NP | V NP PP
 PP -> P NP
 ART -> 'a'
 N -> 'flower' | 'a' | 'blooms'
 V -> 'blooms' | 'flower'
 """)
pcfg_grammar = PCFG.fromstring("""
 S -> NP VP [1.0]
 NP -> ART N [0.53] | N N [0.09] | N [0.14] | NP PP [0.24]
 VP -> V [0.386] | V NP [0.393] | V NP PP [0.22]
 PP -> P NP [1.0]
 ART -> 'a' [1.0]
 N -> 'flower' [0.8] | 'a' [0.1] | 'blooms' [0.1]
 V -> 'blooms' [0.8] | 'flower' [0.2]
 """)

from nltk.parse import RecursiveDescentParser

print(cfg_grammar)
rd = RecursiveDescentParser(pcfg_grammar)
text = "a flower blooms".split()
for t in rd.parse(text):
    print(t)

#rd.draw()