Esempio n. 1
0
File: logic.py Progetto: stas/-junk
    def buildgrammar(self):
        g = Grammar()
        g.nonterminals = self.states
        g.terminals = self.symbols
        g.startsymbol = str(self.initialstate)
        
        tf = self.transitions
        if len(tf) > 0:
            for t in tf:
                if len(t) == 3:
                    g.add_production(t[0], t[1] + t[2])
                    if t[2] in self.finalstates:
                        g.add_production(t[0], t[1] + '')
        
        if g.starsymbol in self.finalstates:
            g.add_production(g.starsymbol, 'e')
        
        self.grammar = g

        print 'Nonterminals: ', self.grammar.nonterminals
        print 'Terminals: ', self.grammar.terminals
        print 'Start symbol: ', self.grammar.startsymbol
        print 'Productions: ', self.grammar.productions

        return
class TestGrammar(unittest.TestCase):
    def setUp(self):
        self.grammar = Grammar()

    def test_create_empty_grammar(self):
        grammar = Grammar()
        self.assertSetEqual(set(), grammar.productions())

    def test_create_production(self):
        production = Production('S', 'aS')
        self.assertEqual('S', production.left())
        self.assertEqual('aS', production.right())

    def test_add_production(self):
        self.grammar.add_production(Production("S", "aS"))
        self.assertEqual(1, self.grammar.productions_quantity())

    def test_grammar_conversion_ndfa_fa_aaab(self):
        # S -> aS | b
        self.grammar.add_production(Production('S', 'aS'))
        self.grammar.add_production(Production('S', 'b'))
        fa = self.grammar.to_finite_automaton()
        # Should accept
        self.assertEqual(True, fa.recognize_sentence('b'))
        self.assertEqual(True, fa.recognize_sentence('ab'))
        self.assertEqual(True, fa.recognize_sentence('aab'))
        self.assertEqual(True, fa.recognize_sentence('aaaaaaaaaaaaab'))
        # Shouldn't accept
        self.assertEqual(False, fa.recognize_sentence(''))
        self.assertEqual(False, fa.recognize_sentence('a'))
        self.assertEqual(False, fa.recognize_sentence('aa'))
        self.assertEqual(False, fa.recognize_sentence('aaaaaaaaaaaaa'))
        self.assertEqual(False, fa.recognize_sentence('ba'))
        self.assertEqual(False, fa.recognize_sentence('abb'))
        self.assertEqual(False, fa.recognize_sentence('abaaaaaaab'))

    def test_grammar_conversion_ndfa_fa_aabbccd(self):
        # S -> aS | bB
        # B -> bB | cC
        # C -> cC | d
        self.grammar.add_production(Production('S', 'aS'))
        self.grammar.add_production(Production('S', 'bB'))
        self.grammar.add_production(Production('B', 'bB'))
        self.grammar.add_production(Production('B', 'cC'))
        self.grammar.add_production(Production('C', 'cC'))
        self.grammar.add_production(Production('C', 'd'))
        fa = self.grammar.to_finite_automaton()
        # Should accept
        self.assertEqual(True, fa.recognize_sentence('abcd'))
        self.assertEqual(True, fa.recognize_sentence('bcd'))
        self.assertEqual(True, fa.recognize_sentence('bbbcccd'))
        self.assertEqual(True, fa.recognize_sentence('aaabbbcccd'))
        self.assertEqual(True, fa.recognize_sentence('aaaabccccd'))
        self.assertEqual(True, fa.recognize_sentence('aaaabcd'))
        # Shouldn't accept
        self.assertEqual(False, fa.recognize_sentence(''))
        self.assertEqual(False, fa.recognize_sentence('abc'))
        self.assertEqual(False, fa.recognize_sentence('acd'))
        self.assertEqual(False, fa.recognize_sentence('abd'))
        self.assertEqual(False, fa.recognize_sentence('aaaaabbbbbcccc'))
        self.assertEqual(False, fa.recognize_sentence('dabc'))
        self.assertEqual(False, fa.recognize_sentence('abdc'))
        self.assertEqual(False, fa.recognize_sentence('adbc'))
        self.assertEqual(False, fa.recognize_sentence('aadbbccd'))
        self.assertEqual(False, fa.recognize_sentence('dabcd'))
        self.assertEqual(False, fa.recognize_sentence('abcdd'))

    def test_grammar_conversion_ndfa_fa_ccababba(self):
        # S -> cS | cA
        # A -> aA | bA | a | b
        self.grammar.add_production(Production('S', 'cS'))
        self.grammar.add_production(Production('S', 'cA'))
        self.grammar.add_production(Production('A', 'aA'))
        self.grammar.add_production(Production('A', 'bA'))
        self.grammar.add_production(Production('A', 'a'))
        self.grammar.add_production(Production('A', 'b'))
        fa = self.grammar.to_finite_automaton()
        # Should accept
        self.assertEqual(True, fa.recognize_sentence('ca'))
        self.assertEqual(True, fa.recognize_sentence('cb'))
        self.assertEqual(True, fa.recognize_sentence('ccccca'))
        self.assertEqual(True, fa.recognize_sentence('cccccb'))
        self.assertEqual(True, fa.recognize_sentence('cab'))
        self.assertEqual(True, fa.recognize_sentence('cba'))
        self.assertEqual(True, fa.recognize_sentence('cbababba'))
        self.assertEqual(True,
                         fa.recognize_sentence('ccccababaaaabbbbbbabaabaabbb'))
        # Shouldn't accept
        self.assertEqual(False, fa.recognize_sentence(''))
        self.assertEqual(False, fa.recognize_sentence('c'))
        self.assertEqual(False, fa.recognize_sentence('cccccc'))
        self.assertEqual(False, fa.recognize_sentence('a'))
        self.assertEqual(False, fa.recognize_sentence('b'))
        self.assertEqual(False, fa.recognize_sentence('babaaab'))
        self.assertEqual(False, fa.recognize_sentence('babababaabc'))
        self.assertEqual(False, fa.recognize_sentence('bababcabaab'))

    def test_text_to_grammar(self):
        text = "S -> aA | a | bS\nA -> aS | bA | b"
        grammar = Grammar.text_to_grammar(text)
        fa = grammar.to_finite_automaton()

        self.assertTrue(fa.recognize_sentence("babababbbbaa"))
        self.assertFalse(fa.recognize_sentence("abbbbaabaabbba"))

    def test_text_to_grammar_epsilon(self):
        text = "S -> aA\nA -> aS | bB\nB->bB | &"
        grammar = Grammar.text_to_grammar(text)
        fa = grammar.to_finite_automaton()

        self.assertTrue(fa.recognize_sentence("aaaaabbb"))
        self.assertFalse(fa.recognize_sentence("aaaabbb"))

    def test_text_to_grammar_epsilon_2(self):
        text = "S -> aS | a | bS | b"
        grammar = Grammar.text_to_grammar(text)
        fa = grammar.to_finite_automaton()

        self.assertTrue(fa.recognize_sentence("abbabaaababbabab"))
        self.assertFalse(fa.recognize_sentence("babbababcabab"))

    def test_text_to_grammar_2(self):
        text = "S->aA\nA->b|&"
        grammar = Grammar.text_to_grammar(text)
        fa = grammar.to_finite_automaton()

        #pdb.set_trace()
        fa.rename_states()

        self.assertTrue(fa.recognize_sentence("ab"))
        self.assertFalse(fa.recognize_sentence("b"))
Esempio n. 3
0
    def build(self):
        g = Grammar(self.tokens)

        for level, (assoc, terms) in enumerate(self.precedence, 1):
            for term in terms:
                g.set_precedence(term, assoc, level)

        for prod_name, syms, func, precedence in self.productions:
            g.add_production(prod_name, syms, func, precedence)

        g.set_start()

        for unused_term in g.unused_terminals():
            warnings.warn(
                "Token %r is unused" % unused_term,
                ParserGeneratorWarning,
                stacklevel=2
            )
        for unused_prod in g.unused_productions():
            warnings.warn(
                "Production %r is not reachable" % unused_prod,
                ParserGeneratorWarning,
                stacklevel=2
            )

        g.build_lritems()
        g.compute_first()
        g.compute_follow()

        # cache_dir = AppDirs("rply").user_cache_dir
        cache_file = cache_dir = 'zgrammar.txt'

        table = None
        if os.path.exists(cache_file):
            with open(cache_file, 'r') as f:
                data = json.load(f)
            if self.data_is_valid(g, data):
                table = LRTable.from_cache(g, data)
        if table is None:
            table = LRTable.from_grammar(g)
            serial = self.serialize_table(table)
            try:
                with open(cache_file, "w") as f:
                    json.dump(serial, f)
            except IOError as e:
                print(e.message)

        if table.sr_conflicts:
            warnings.warn(
                "%d shift/reduce conflict%s" % (
                    len(table.sr_conflicts),
                    "s" if len(table.sr_conflicts) > 1 else ""
                ),
                ParserGeneratorWarning,
                stacklevel=2,
            )
        if table.rr_conflicts:
            warnings.warn(
                "%d reduce/reduce conflict%s" % (
                    len(table.rr_conflicts),
                    "s" if len(table.rr_conflicts) > 1 else ""
                ),
                ParserGeneratorWarning,
                stacklevel=2,
            )
        return LRParser(table, self.error_handler)
Esempio n. 4
0
class _BNFParser:
    '''
    bnf  := prod end | prod bnf
    prod := nterm ':=' rhs
    syms := sym | sym syms
    rhs  := syms | syms '|' rhs
    '''
    class Token(Enum):
        END = 1
        SYM = 2
        KEYWORD = 3

    def __init__(self, buf: str):
        self.pos = 0
        self.tokens = self.tokenize(buf)
        self.grammar = Grammar()
        self.parse_bnf()
        self.fix_grammar()

    def peek(self):
        if self.pos < len(self.tokens):
            return self.tokens[self.pos]
        return (self.Token.END, None)

    def get(self, n=1):
        result = self.peek()
        self.pos += n
        return result

    def unget(self, n=1):
        self.pos -= n

    def fix_grammar(self):
        all_syms = set()
        for prodlist in self.grammar.prods.values():
            for prod in prodlist:
                all_syms.update(prod.syms)
        self.grammar.terms = all_syms - self.grammar.prods.keys() - set('@')

    def parse_rhs(self):
        result = list()
        while True:
            token = self.get()
            if token[0] == self.Token.END:
                self.unget()
                return result
            if token[0] == self.Token.KEYWORD:
                if token[1] == '|':
                    return result
                else:
                    raise SyntaxError("Unexpected token " + str(token))
            result.append(token[1])

    def parse_prod(self):
        token = self.get()
        if token[0] != self.Token.SYM:
            raise SyntaxError("Nonterminal expected, got " + str(token))
        nterm = token[1]
        if not self.grammar.start:
            self.grammar.start = nterm

        token = self.get()
        if token[0] != self.Token.KEYWORD or token[1] != ':=':
            raise SyntaxError("Keyword ':=' expected, got " + str(token))

        while True:
            if self.peek()[0] == self.Token.END:
                return
            prod_list = self.parse_rhs()
            if not len(prod_list):
                raise SyntaxError("Empty right hand side of production "
                                  "for nonterminal " + nterm)
            self.grammar.add_production(nterm, prod_list)

    def parse_bnf(self):
        while True:
            token = self.peek()
            if token[0] == self.Token.END:
                if token[1] is None:
                    return
                else:
                    self.get()
            else:
                self.parse_prod()

    def tokenize(self, buf: str) -> list:
        result = list()
        import re
        regexp_space = re.compile(r"[ \t]+")
        regexps = (
            (self.Token.KEYWORD, re.compile(r"\||:=")),
            (self.Token.SYM, re.compile(r"[^ \t\r\n]+")),
            (self.Token.END, re.compile(r"[\r\n]+")),
        )

        i = 0
        while i < len(buf):
            # Skip spaces
            m = regexp_space.match(buf, i)
            if m:
                i = m.end()
            if i == len(buf):
                break

            for token, regexp in regexps:
                m = regexp.match(buf, i)
                if m:
                    if token == self.Token.SYM:
                        if m.group() == r"'\''":
                            result.append((self.Token.SYM, "'"))
                        else:
                            result.append((token, m.group()))
                    else:
                        result.append((token, m.group()))
                    i = m.end()
                    break
            else:
                raise SyntaxError("Unknown token at pos {} ({})".format(
                    i, buf[i:i + 10]))
        return result
Esempio n. 5
0
class Parser:
	def __init__(self, source):
		self.token = ''
		self.rule = []
		self.rule_list = []
		self.terminal_name = {}
		self.terminal_group = {}
	
		self.start_token = ''
		self.grammar = None
		
		self.l = Lexer(source)
		self.parse()
	
	def match(self, m):
		if m == self.token[0]:
			self.token = self.l.next()
		else:
			print 'match error', m
	
	def parse(self):
		self.grammar = Grammar()
		self.token = self.l.next()
		self.terminals()
		self.start()
		self.productions()
	
	def terminals(self):	
		group = 0
		while self.token[0] == 'TOKEN':
			self.match('TOKEN')
			while self.token[0] == 'TERM':
				self.grammar.add_terminal(self.token[1], group)
				self.match('TERM')
			group += 1
		
	def start(self):	
		self.match('START')
		self.start_token = self.token[1]
		self.match('NONTERM')
		
	def productions(self):
		self.match('BLOCK')
		while self.token[0] != 'BLOCK':
			self.left_side()
		self.grammar.start_token = self.start_token
		
	def left_side(self):
		self.rule_list = []
		production_name = self.token[1]
		self.match('NONTERM')
		self.match('COLON')
		self.right_side()
		self.grammar.add_production(production_name, self.rule_list)
		self.match('SEMI')

	def right_side(self):
		rule = []
		
		first = True
		while self.token[0] != 'SEMI':
			if self.token[0] == 'TERM':
				if first == True:
					first = False
				rule.append(self.token[1])
				self.match('TERM')
			elif self.token[0] == 'NONTERM':
				if first == True:
					first = False
				rule.append(self.token[1])
				self.match('NONTERM')
			elif self.token[0] == 'CHAR':
				if first == True:
					first = False
				rule.append(self.token[1])
				self.match('CHAR')
			elif self.token[0] == 'PIPE':
				if first == True:
					self.rule_list.append(Rule(['']))
					first = False
					self.match('PIPE')
				else:
					self.rule_list.append(Rule(rule))
					rule = []
					self.match('PIPE')
					if self.token[0] == 'PIPE':
						self.rule_list.append(Rule(['']))
						self.match(self.token[0])
					elif self.token[0] == 'SEMI':
						rule = ['']
			else:
				print 'right_side:', self.token
		self.rule_list.append(Rule(rule))
	
	def printer(self, fo = None):
		pp = PrettyPrint(self.grammar)
		pp.printer(fo)