コード例 #1
0
    def test_simple(self):
        grammar = CFG.fromstring(
            """
          S -> NP VP
          PP -> P NP
          NP -> Det N | NP PP P
          VP -> V NP | VP PP
          VP -> Det
          Det -> 'a' | 'the'
          N -> 'dog' | 'cat'
          V -> 'chased' | 'sat'
          P -> 'on' | 'in'
        """
        )
        self.assertFalse(grammar.is_flexible_chomsky_normal_form())
        self.assertFalse(grammar.is_chomsky_normal_form())
        grammar = grammar.chomsky_normal_form(flexible=True)
        self.assertTrue(grammar.is_flexible_chomsky_normal_form())
        self.assertFalse(grammar.is_chomsky_normal_form())

        grammar2 = CFG.fromstring(
            """
          S -> NP VP
          NP -> VP N P
          VP -> P
          N -> 'dog' | 'cat'
          P -> 'on' | 'in'
        """
        )
        self.assertFalse(grammar2.is_flexible_chomsky_normal_form())
        self.assertFalse(grammar2.is_chomsky_normal_form())
        grammar2 = grammar2.chomsky_normal_form()
        self.assertTrue(grammar2.is_flexible_chomsky_normal_form())
        self.assertTrue(grammar2.is_chomsky_normal_form())
コード例 #2
0
ファイル: test_cfg2chomsky.py プロジェクト: rmalouf/nltk
    def test_simple(self):
        grammar = CFG.fromstring(
            """
          S -> NP VP
          PP -> P NP
          NP -> Det N | NP PP P
          VP -> V NP | VP PP
          VP -> Det
          Det -> 'a' | 'the'
          N -> 'dog' | 'cat'
          V -> 'chased' | 'sat'
          P -> 'on' | 'in'
        """
        )
        self.assertFalse(grammar.is_flexible_chomsky_normal_form())
        self.assertFalse(grammar.is_chomsky_normal_form())
        grammar = grammar.chomsky_normal_form(flexible=True)
        self.assertTrue(grammar.is_flexible_chomsky_normal_form())
        self.assertFalse(grammar.is_chomsky_normal_form())

        grammar2 = CFG.fromstring(
            """
          S -> NP VP
          NP -> VP N P
          VP -> P
          N -> 'dog' | 'cat'
          P -> 'on' | 'in'
        """
        )
        self.assertFalse(grammar2.is_flexible_chomsky_normal_form())
        self.assertFalse(grammar2.is_chomsky_normal_form())
        grammar2 = grammar2.chomsky_normal_form()
        self.assertTrue(grammar2.is_flexible_chomsky_normal_form())
        self.assertTrue(grammar2.is_chomsky_normal_form())
コード例 #3
0
ファイル: cfg.py プロジェクト: thzll2001/grammar-checker
class Grammar(object):
    def __init__(self, dev=False):
        super(Grammar, self).__init__()
        self.dev = dev

    grammar1 = CFG.fromstring("""
        S  -> NP VP
        NP -> "DT" Nom | "NNP" | "PRP"
        Nom -> "JJ" Nom | N
        VP -> V "JJ" | V NP | V S | V NP PP | V "RB"
        V -> "VBD" | "VB" | "VBG" | "VBN" | "VBP" | "VBZ"
        N -> "NN" | "NNP" | "NNS" | "NNPS"
        PP -> "IN" NP
    """)

    grammar2 = CFG.fromstring("""
        S  -> NP VP
        NP -> "DT" Nom | "NNP" | "PRP"
        Nom -> "JJ" Nom | N | Nom N
        VP -> V "JJ" | V NP | V S | V NP PP | V "RB" | V PP | V
        V -> "VBD" | "VB" | "VBG" | "VBN" | "VBP" | "VBZ"
        N -> "NN" | "NNP" | "NNS" | "NNPS"
        PP -> "IN" NP | "TO" NP
    """)

    def buildFromTreebank(self):
        """ Build a Context-Free-Grammar based on UPenn treebank """
        tbank_productions = set()
        for sent in treebank.parsed_sents():
            for production in sent.productions():
                if production.is_lexical():
                    new_rhs = [str(production._lhs)]
                    production = Production(production._lhs, new_rhs)
                tbank_productions.add(production)

        tbank_grammar = CFG(Nonterminal('S'), list(tbank_productions))

        return tbank_grammar

    def verify(self, grammar, tags):
        """ Verify tag sequence as grammatically correct or not """
        # rd_parser = RecursiveDescentParser(grammar)
        rd_parser = ChartParser(grammar)
        valid = False

        try:
            for tree in rd_parser.parse(tags):
                valid = True
                break
        except ValueError:
            print "This is a grammatical structure I don't understand yet."
            return

        if valid:
            print "Valid"
            return True
        else:
            print "Invalid"
            return False
コード例 #4
0
def chartParser():
    """
    线图句法分析
    """
    from nltk.grammar import CFG
    from nltk.parse.chart import ChartParser, BU_LC_STRATEGY

    # BNF格式文法 开始符号:S 终结符号:单词
    grammar = CFG.fromstring("""
    S -> T1 T4
    T1 -> NNP VBZ
    T2 -> DT NN
    T3 ->IN NNP
    T4 -> T3 | T2 T3
    NNP -> 'Tajmahal' | 'Agra' | 'Bangalore' | 'Karnataka'
    VBZ -> 'is'
    IN -> 'in' | 'of'
    DT -> 'the'
    NN -> 'capital'
    """)

    cp = ChartParser(grammar, BU_LC_STRATEGY, trace=True)
    # trace=True可以看见分析过程
    # strategy=BU_LC_STRATEGY是默认的,不写好像也行

    sentence = 'Bangalore is the capital of Karnataka'
    tokens = sentence.split()
    chart = cp.chart_parse(tokens)  # 对单词列表分析,并存到chart对象
    parses = list(chart.parses(grammar.start()))  # 将chart取到的所有分析树赋给parses
    print('Total Edges:', len(chart.edges()))  # 输出chart对象所有边的数量
    for tree in parses:
        print(tree)
        tree.draw()
コード例 #5
0
ファイル: cfg.py プロジェクト: CaptainAL/Spyder
def demo():
    from nltk import Nonterminal, CFG
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
                                           for s in nonterminals.split()]

    grammar = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N
    NP -> NP PP
    VP -> V NP
    VP -> VP PP
    Det -> 'a'
    Det -> 'the'
    Det -> 'my'
    NP -> 'I'
    N -> 'dog'
    N -> 'man'
    N -> 'park'
    N -> 'statue'
    V -> 'saw'
    P -> 'in'
    P -> 'up'
    P -> 'over'
    P -> 'with'
    """)

    def cb(grammar): print(grammar)
    top = Tk()
    editor = CFGEditor(top, grammar, cb)
    Label(top, text='\nTesting CFG Editor\n').pack()
    Button(top, text='Quit', command=top.destroy).pack()
    top.mainloop()
コード例 #6
0
def app():
    """
    Create a recursive descent parser demo, using a simple grammar and
    text.
    """
    from nltk.grammar import CFG

    grammar = CFG.fromstring(
        """
    # Grammatical productions.
        S -> NP VP
        NP -> Det N PP | Det N
        VP -> V NP PP | V NP | V
        PP -> P NP
    # Lexical productions.
        NP -> 'I'
        Det -> 'the' | 'a'
        N -> 'man' | 'park' | 'dog' | 'telescope'
        V -> 'ate' | 'saw'
        P -> 'in' | 'under' | 'with'
    """
    )

    sent = 'the dog saw a man in the park'.split()

    RecursiveDescentApp(grammar, sent).mainloop()
コード例 #7
0
ファイル: interact.py プロジェクト: encs-humanoid/ai
def demo(N=23):
	from nltk.grammar import CFG
	print('Generating the first %d sentences for demo grammar:' % (N,))
	print(demo_grammar)
	grammar = CFG.fromstring(demo_grammar)
	for n, sent in enumerate(generate(grammar, n=N), 1):
		print('%3d. %s' % (n, ' '.join(sent)))
コード例 #8
0
ファイル: cfg.py プロジェクト: EricChh20/EZ-Mail
def demo():
    from nltk import Nonterminal, CFG
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
                                           for s in nonterminals.split()]

    grammar = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N
    NP -> NP PP
    VP -> V NP
    VP -> VP PP
    Det -> 'a'
    Det -> 'the'
    Det -> 'my'
    NP -> 'I'
    N -> 'dog'
    N -> 'man'
    N -> 'park'
    N -> 'statue'
    V -> 'saw'
    P -> 'in'
    P -> 'up'
    P -> 'over'
    P -> 'with'
    """)

    def cb(grammar): print(grammar)
    top = Tk()
    editor = CFGEditor(top, grammar, cb)
    Label(top, text='\nTesting CFG Editor\n').pack()
    Button(top, text='Quit', command=top.destroy).pack()
    top.mainloop()
コード例 #9
0
ファイル: interact.py プロジェクト: encs-humanoid/ai
def demo(N=23):
    from nltk.grammar import CFG
    print('Generating the first %d sentences for demo grammar:' % (N, ))
    print(demo_grammar)
    grammar = CFG.fromstring(demo_grammar)
    for n, sent in enumerate(generate(grammar, n=N), 1):
        print('%3d. %s' % (n, ' '.join(sent)))
コード例 #10
0
def demo(N=26):
	from nltk.grammar import CFG
	senteneces = list()
	#print('Generating the first %d sentences for demo grammar:' % (N,))
	#print(demo_grammar)
	grammar = CFG.fromstring(demo_grammar)
	for n, sent in enumerate(generate(grammar, n=N), 1):
		print(' '.join(sent))
コード例 #11
0
def banjoify(rules, song):
    arrangement = []
    for pitch, duration in parse_abc(song):
        grammar = CFG.fromstring(rules.format(pitch=pitch))
        options = list(generate(grammar, start=Nonterminal(duration)))
        phrase = random.choice(options)
        arrangement.append(''.join(phrase))
    return ' '.join(arrangement)
コード例 #12
0
def gensentence(N=20):
    print('Generating the first %d sentences for gensentence grammar:' % (N, ))
    #print(gensentence_grammar)
    grammar = CFG.fromstring(gensentence_grammar)
    for n, sent in enumerate(generate(grammar, n=N), 1):
        global abc
        abc = {}
        abc = ' '.join(sent)
        print('%3d. %s' % (n, abc))
コード例 #13
0
ファイル: generator.py プロジェクト: jbgage/Portfolio
 def generate_context_free_grammar_novel_text(
     self, number_of_words_in_sentence=0, number_of_sentences_per_record=0, number_of_records=0
 ):
     """
     This method utilizes NLTK's Context Free Grammar parser objects to parse an available .*cfg file and generate
     novel text from it.
     
     @param number_of_words_in_sentence: An indicator as to the number of words to generate in each novel sentence.
     @type number_of_words_in_sentence: int
     @param number_of_sentences_per_record: An indicator as to the number of sentences per record to generate.
     @type number_of_sentences_per_record: int
     @param number_of_records: An indicator as to the total number of records to generate.
     @type number_of_records: int
     @return: str
     """
     words = []
     punct_selector = [". ", "! ", "? "]
     punctuation_stop_symbols = dict((ord(char), None) for char in string.punctuation)
     parser = None
     grammar = None
     try:
         if isinstance(self._corpus, CFG):
             _grammar = self._corpus
             if _grammar is not None:
                 parser = ChartParser(_grammar)
                 grammar = parser.grammar
         elif isinstance(self._corpus, FeatureGrammar):
             _grammar = self._corpus
             if _grammar is not None:
                 parser = FeatureChartParser(_grammar)
                 grammar = parser.grammar()
         elif isinstance(self._corpus, PCFG):
             _grammar = self._corpus
             if _grammar is not None:
                 parser = InsideChartParser(_grammar)
                 grammar = parser.grammar()
         else:
             grammar = CFG.fromstring(self._corpus)
         if grammar is not None:
             for _ in range(number_of_records):
                 novel_sentence = []
                 for _ in range(number_of_sentences_per_record):
                     sentence = " ".join(
                         [
                             sent
                             for _, sent in enumerate(generate_text(grammar, depth=2, n=number_of_words_in_sentence))
                         ]
                     )
                     sentence = sentence.translate(punctuation_stop_symbols) + random.choice(punct_selector)
                     sentence = sentence[0:].capitalize()
                     novel_sentence.append(sentence)
                 words.append("".join(novel_sentence))
     except Exception, error:
         self.logger.error(
             "TextGenerator.generate_context_free_grammar_novel_text: Error occurred - {0}".format(str(error))
         )
コード例 #14
0
def construct_cfg_from_string():
    '''
    Reads CFG rules from cfg.txt
    Uses nltk to make a grammar from the given rules
    :return: CFG (nltk.grammar.CFG)
    '''
    f = open("cfg.txt", "r")
    grammar_string = f.readlines()
    grammar = CFG.fromstring(grammar_string)
    f.close()
    return grammar
コード例 #15
0
ファイル: util.py プロジェクト: BelkacemB/nltk
def generate_text(grammar,N):
    from nltk.grammar import CFG
    import nltk.parse.generate as gen

    print('Generating the first %d sentences for demo grammar:' % (N,))
    print(grammar)
    grammar = CFG.fromstring(grammar)

    grm_list = gen.generate(grammar, n=N)
    for n, sent in enumerate(grm_list):
        print('%3d. %s' % (n, ' '.join(sent)))
コード例 #16
0
def generateRawTemplates(depth):
    gram = CFG.fromstring(grammarstring)
    rawTemplates = generate(gram, depth=depth)
    templatefiles = []

    for index, state in enumerate(rawTemplates):
        filename = os.path.join("./templates","template"+str(index))
        with open(filename, 'w') as templatefile:
            templatefile.write(' '.join(state))
            templatefiles.append(filename)

    print str(len(templatefiles))+" template files generated"

    return templatefiles
コード例 #17
0
def generateRawTemplates(depth):
    gram = CFG.fromstring(grammarstring)
    rawTemplates = generate(gram, depth=depth)
    templatefiles = []

    for index, state in enumerate(rawTemplates):
        filename = os.path.join("./templates", "template" + str(index))
        with open(filename, 'w') as templatefile:
            templatefile.write(' '.join(state))
            templatefiles.append(filename)

    print str(len(templatefiles)) + " template files generated"

    return templatefiles
コード例 #18
0
ファイル: util.py プロジェクト: BelkacemB/nltk
def generate_tweet(grammar):
    from nltk.grammar import CFG
    import nltk.parse.generate as gen

    print(grammar)
    grammar = CFG.fromstring(grammar)
    grm_list = gen.generate(grammar, n=SIZE) # TODO voir la taille max ? moyen de la recuperer ?
    from random import randint
    rd = randint(0,SIZE)
    cpt = 0
    for n, sent in enumerate(grm_list):
        if rd == cpt:
            print ("Your tweet : ")
            print('%3d. %s' % (n, ' '.join(sent)))
        cpt += 1
コード例 #19
0
def generate(filename, start=None, depth=None, n=None):
    """
    Generates an iterator of all sentences from a CFG.

    :param filename: path to file containing grammar.
    :param start: The Nonterminal from which to start generate sentences.
    :param depth: The maximal depth of the generated tree.
    :param n: The maximum number of sentences to return.
    :return: An iterator of lists of terminal tokens.
    """
    grammar = CFG.fromstring(_read_grammar(filename))
    if not start:
        start = grammar.start()
    if depth is None:
        depth = sys.maxsize

    iter = _generate_all(grammar, [start], depth)

    if n:
        iter = itertools.islice(iter, n)

    return [' '.join(string).replace(' , ', ', ') for string in list(iter)]
コード例 #20
0
def create_learner():
    if isfile("imp_learner.p"):
        learner_file = open("imp_learner.p", "rb")
        learner = pickle.load(learner_file)
        learner_file.close()
        return learner
    else:
        grammar = CFG.fromstring("""
                Pgm -> Id ',' Pgm | Stmt
                Stmt -> Block | Id '=' Aexp ';' | Stmt Stmt
                Stmt -> 'if(' Bexp ')' Block 'else' Block
                Stmt -> 'while(' Bexp ')' Block
                Block -> '{}' | '{' Stmt '}'
                Bexp -> 'true' | Aexp '<=' Aexp | '!' Bexp
                Bexp -> Bexp '&&' Bexp | Bexp '||' Bexp | '(' Bexp ')'
                Aexp -> Int | Id | Aexp '+' Aexp | Aexp '-' Aexp
                Aexp -> Aexp '*' Aexp | Aexp '/' Aexp | '(' Aexp ')'
                Id -> 'a' | 'b' 
                Bool -> 'true' | 'false'
                Int -> '0' | '1' 
            """)
        return PrimalLearner.from_grammar(grammar, k=1)
コード例 #21
0
    def generate_context_free_grammar_novel_text(
            self, corpus, number_of_words_in_sentence,
            number_of_sentences_per_record, number_of_records):
        '''
        This method utilizes NLTK's Context Free Grammar parser objects to
        parse an available .*cfg file and generate novel text from it.

        Params:
        -------
        - number_of_words_in_sentence (int): An indicator as to the number of
        words to generate in each novel sentence.
        - number_of_sentences_per_record (int): An indicator as to the number
        of sentences per record to generate.
        - number_of_records (int): An indicator as to the total number of
        records to generate.

        Returns: str
        '''
        words = []
        punct_selector = ['. ', '! ', '? ']
        punctuation_stop_symbols = dict(
            (ord(char), None) for char in string.punctuation)
        parser = None
        grammar = None
        try:
            if isinstance(corpus, CFG):
                _grammar = corpus
                if _grammar is not None:
                    parser = ChartParser(_grammar)
                    grammar = parser.grammar
            elif isinstance(corpus, FeatureGrammar):
                _grammar = corpus
                if _grammar is not None:
                    parser = FeatureChartParser(_grammar)
                    grammar = parser.grammar()
            elif isinstance(corpus, PCFG):
                _grammar = corpus
                if _grammar is not None:
                    parser = InsideChartParser(_grammar)
                    grammar = parser.grammar()
            else:
                grammar = CFG.fromstring(corpus)
            if grammar is not None:
                for _ in range(number_of_records):
                    novel_sentence = []
                    for _ in range(number_of_sentences_per_record):
                        sentence = ' '.join([
                            sent for _, sent in enumerate(
                                generate_text(grammar,
                                              depth=2,
                                              n=number_of_words_in_sentence))
                        ])
                        sentence = sentence.translate(
                            punctuation_stop_symbols) + random.choice(
                                punct_selector)
                        sentence = sentence[0:].capitalize()
                        novel_sentence.append(sentence)
                    words.append(''.join(novel_sentence))
        except Exception, error:
            logging.error('TextGenerator: Error occurred - {0}'.format(
                str(error)))
コード例 #22
0
# -*- coding: utf-8 -*-
import pytest
from nltk.grammar import CFG
from nltk.parse.chart import BottomUpChartParser

with open("subject-verb.grammar") as f:
    grammar = CFG.fromstring(f.read(), encoding="utf-8")

tests = {
    "subject_verb_agreement": [
        "Je regarde la television", "Tu regardes la television",
        "Il regarde la television", "Nous regardons la television",
        "Vous regardez la television", "Ils regardent la television"
    ],
    "test_noun_phrases_and_proper_names": [
        "le chat", "la television", "les chats", "les televisions", "Jackie",
        "Montreal"
    ],
    "test_direct_object_pronouns": ["il la regarde"],
    "test_attribute_adjectives": [
        "le chat noir", "le chat heureux", "le beau chat", "le joli chat",
        "la derniere semaine", "la semaine derniere", "les chats noirs",
        "la television noire", "les televisions noires"
    ]
}


@pytest.mark.parametrize("test", ((test_name, sentence)
                                  for test_name, sentences in tests.items()
                                  for sentence in sentences))
def test(test):
コード例 #23
0
grammar = CFG.fromstring("""
S -> Fallback Err Fallback
S -> Fallback
Fallback -> AllTags Fallback
Fallback ->
S -> AllTags
AllTags -> 'END' | 'QUOT' | '(' | ')' | ',' | '--' | '.' | 'CC' | 'CD' | 'DT' | 'EX' | 'FW' | 'IN' | 'JJ' | 'JJR' | 'JJS' | 'LS' | 'MD' | 'NN' | 'NNP' | 'NNPS' | 'NNS' | 'PDT' | 'POS' | 'PRP' | 'PRP$' | 'RB' | 'RBR' | 'RBS' | 'RP' | 'SYM' | 'TO' | 'UH' | 'VB' | 'VBD' | 'VBG' | 'VBN' | 'VBP' | 'VBZ' | 'WDT' | 'WP' | 'WP$' | 'WRB' | '``' | Det | ':'
Det -> DetPl | DetSg | DetNeut
DetNeut -> 'the' | 'some' | 'another' | 'no' | 'his' | 'her' | 'his/her' | 'any'
DetSg -> 'a' | 'an' | 'this' | 'every' | 'another' | 'that' | 'each' | 'neither'
DetPl -> 'all' | 'both' | 'these' | 'those'
Err -> ErrUD | ErrAGD | ErrFD | ErrAGV

NotNPHead -> 'END' | 'QUOT' | '(' | ')' | ',' | '--' | '.' | 'CC' | 'DT' | 'EX' | 'FW' | 'IN' | 'LS' | 'MD' | 'NN' | 'NNP' | 'NNPS' | 'NNS' | 'PDT' | 'POS' | 'PRP' | 'PRP$' | 'RB' | 'RBR' | 'RBS' | 'RP' | 'SYM' | 'TO' | 'UH' | 'VB' | 'VBD' | 'VBG' | 'VBN' | 'VBP' | 'VBZ' | 'WDT' | 'WP' | 'WP$' | 'WRB' | '``' | ':'

CDList -> 'CD' CDList
CDList ->

JJList -> 'JJ' JJList
JJList -> 'JJR' JJList
JJList -> 'JJS' JJList
JJList ->


ErrAGD -> DetPl JJList 'NN'
ErrAGD -> DetSg JJList CDList JJList 'NNS'

ErrFD -> 'a' AllTags
ErrFD -> 'an' AllTags

ErrUD -> Det JJList 'NNP'
ErrUD -> Det JJList CDList JJList 'NNPS'

""")
コード例 #24
0
# S -> NP VP        # Start state S
# A -> B | C        # Arrow and vbar
# C -> "a" | "b"    # non-terminals in quotes
#
##################################################

# Replace with your file name here
filename = "a2q2.txt"

with open(filename) as f:
    content = f.read()
    # Spent too long on this and gave up; I just manually converted accents within the grammar file
    content = content.lower().replace('é', 'e').replace('è', 'e').replace('ê', 'e') \
        .replace('á', 'a').replace('à', 'a').replace('â', 'a') \
        .replace('ó', 'o').replace('ò', 'o').replace('ô', 'o')
    grammar = CFG.fromstring(content, encoding="utf-8")

parser = BottomUpChartParser(grammar)


def parse(sentence, nonempty):
    trees = parser.parse(sentence.lower().split())
    data = list(trees)
    if nonempty:
        print(data)
        assert len(data) > 0
    else:
        assert len(data) == 0


validSentences = [
コード例 #25
0
ファイル: main.py プロジェクト: rezardes/NLIDBSpasial
grammar = CFG.fromstring("""
S -> COMMAND QUERY
COMMAND -> COMMAND1 | COMMAND2 | COMMAND3
COMMAND1 -> 'tampil'
COMMAND2 -> 'tunjuk' | 'lihat'
COMMAND3 -> 'hitung' | 'kalkulasi'
QUERY -> RELATION | CONDITION | CONDITION CONDITION | CONDITION CONJ CONDITION | CONDITION QUERY | CONDITION CONJ QUERY
CONJ -> AND | OR
AND -> 'dan' | 'serta'
OR -> 'atau'
CONDITION -> FIELDS OPERATOR NUMBER | FIELDS RELATION | FIELDS RELATION SPATIALOP RELCOND | FIELDS RELATION NOT SPATIALOP RELCOND | FIELDS RELCOND | PART RELATION SPATIALOP GEOCOND | RELCOND | RELATION SPATIALOP GEOCOND | RELATION NOT SPATIALOP GEOCOND | RELATION SPATIALOP RELCOND | RELATION NOT SPATIALOP RELCOND | SPATIALOP RELATION SPATIALOP RELCOND | SPATIALOP RELATION NOT SPATIALOP RELCOND | SPATIALOP RELCOND |  SPATIALOP RELCOND RELCOND | SPATIALOP OPERATOR NUMBER | VALUES
PART -> 'daerah' | 'bagian' | 'potong'
GEOCOND -> GEOMETRY POINT COOR CONJ POINT COOR | GEOMETRY COOR SIZE NUMBER
GEOMETRY -> SQUARE | RECTANGLE
SQUARE -> 'persegi'
RECTANGLE -> 'segiempat' | 'persegi' 'panjang'
POINT -> LU | RU | LB | RB
LU -> 'titik' 'kiri' 'atas'
RB -> 'titik' 'kanan' 'bawah'
RELCOND -> RELATION VALUES | RELATION FIELDS VALUE | RELATION FIELDS NUMBER | RELATION
OPERATOR -> 'lebih' 'dari' | 'kurang' 'dari' | 'sama' 'dengan' | 'lebih' 'dari 'sama 'dengan' | 'kurang' 'dari' 'sama' 'dengan'
NOT -> 'tidak' | 'bukan'
SPATIALOP -> PANJANG | LUAS | KELILING | INSIDE | OUTSIDE | JARAK
JARAK -> 'jarak'
INSIDE -> 'dalam' | 'pada'
OUTSIDE -> 'luar'
PANJANG -> 'panjang'
LUAS -> 'luas'
KELILING -> 'keliling'
FIELDS -> FIELD FIELD | FIELD | FIELD FIELDS | FIELD CONJ FIELDS
VALUES -> VALUE VALUE | VALUE | VALUE VALUES
""")
コード例 #26
0
ファイル: ParsingChart.py プロジェクト: babasahebpinjar/NLP
from nltk.grammar import CFG
from nltk.parse.chart import ChartParser, BU_LC_STRATEGY

grammar = CFG.fromstring("""
S -> T1 T4
T1 -> NNP VBZ
T2 -> DT NN
T3 -> IN NNP
T4 -> T3 | T2 T3
NNP -> 'Tajmahal' | 'Agra' | 'Bangalore' | 'Karnataka'
VBZ -> 'is'
IN -> 'in' | 'of'
DT -> 'the'
NN -> 'capital'
""")

cp = ChartParser(grammar, BU_LC_STRATEGY, trace=True)

sentence = "Bangalore is the capital of Karnataka"
tokens = sentence.split()
chart = cp.chart_parse(tokens)
parses = list(chart.parses(grammar.start()))
print("Total Edges :", len(chart.edges()))
for tree in parses:
    print(tree)
tree.draw()
コード例 #27
0
from nltk.grammar import CFG
from nltk.parse import EarleyChartParser
cfg = CFG.fromstring("""
S -> NP VP
NP -> DET NN
NP -> DET NP
NP -> JJ NN
VP -> VB NP
DET -> 'a' | 'the'
JJ -> 'lucky'
NN -> 'man' | 'woman'
VB -> 'loves' | 'shoots'
""")

cfgparser = EarleyChartParser(cfg)

s = 'a man loves a woman'.split()
for tree in cfgparser.parse(s):
    print(tree.pformat())
    tree.draw()
s = 'the man shoots a woman'.split()
for tree in cfgparser.parse(s):
    print(tree.pformat())
    tree.draw()
s = 'a lucky woman loves a man'.split()
for tree in cfgparser.parse(s):
    print(tree.pformat())
    tree.draw()
コード例 #28
0
def load_grammar(grammar_path):
    logger.info('Loading grammar in %s' % grammar_path)
    with open(grammar_path) as fin:
        grammar_string = fin.read()

    return CFG.fromstring(grammar_string)