class TestGLRList(unittest.TestCase):
    def setUp(self):
        S, L = CFGNonterminal.create("S", "L")
        self.op, self.cp, self.c, self.x = CFGTerminal.create("(", ")", ",", "x")

        S.production(lambda i, j, k: j, self.op, L, self.cp)
        S.production(lambda i: i, self.x)
        L.production(lambda i: [i], S)

        def append(l, i):
            l.append(i)
            return l

        L.production(lambda i, j, k: append(i, k), L, self.c, S)
        self.parser = CFGParser(S, (self.op, self.cp, self.c, self.x), (S, L))

    def test_single_list(self):
        l = self.parser.parse_items(self.op, self.x, self.cp)
        self.assertEqual(l, [self.x])

        l = self.parser.parse_items(self.op, self.x, self.c, self.x, self.cp)
        self.assertEqual(l, [self.x, self.x])

    def test_nested(self):
        l = self.parser.parse_items(self.op, self.x, self.c, self.op, self.x, self.cp, self.cp)
        self.assertEqual(l, [self.x, [self.x]])
class TestGLRLexer(unittest.TestCase):
	def setUp( self ):
		plus, times, num = CFGTerminal.create("+", "*", "num")
		E, T, F = CFGNonterminal.create("E", "T", "F")

		E.production( lambda i: i, T )
		T.production( lambda i, j, k: i.add(k), T, plus, F )
		T.production( lambda i: i, F )
		F.production( lambda i, j, k: i.mul(k), F, times, num )
		F.production( lambda i: i, num )

		tokens = {
			'\\+': plus, '\\*': times,
			"[0-9][0-9]*": num.data_wrapper(Number)
			}

		self.lexer = CFGLexer( tokens )
		self.parser = CFGParser( E, (plus, times, num), (E, T, F) )

	def _n( self, s ):
		return self.parser.parse( self.lexer.tokenize( s ) ).value

	def test_add( self ):
		self.assertEqual( self._n("5+4"), 9 )
		self.assertEqual( self._n("1+3+0"), 4 )

	def test_mul( self ):
		self.assertEqual( self._n("3*2"), 6 )
		self.assertEqual( self._n("1*5*13"), 65 )
		self.assertEqual( self._n("4*0"), 0 )

	def test_oop( self ):
		self.assertEqual( self._n("4+2*8"), 20 )
		self.assertEqual( self._n("5+0*12+4*3*2"), 29 )
Ejemplo n.º 3
0
	def __init__( self ):
		self._build_parser()
		self.lexer = CFGLexer( self.tokens, 
			eof_token = self.eof.data_wrapper(EOF) )
		self.parser = CFGParser( 
			self.goal, self.terminals, self.nonterminals )
		self.reset()
class TestGLRValuesExpr(unittest.TestCase):
    def setUp(self):
        self.plus, self.mul, self.num = CFGTerminal.create("+", "*", "num")
        E, T, F = CFGNonterminal.create("E", "T", "F")

        E.production(lambda i: i, T)
        T.production(lambda i, j, k: i + k, T, self.plus, F)
        T.production(lambda i: i, F)
        F.production(lambda i, j, k: i * k, F, self.mul, self.num)
        F.production(lambda i: i, self.num)
        self.parser = CFGParser(E, (self.plus, self.mul, self.num), (E, T, F))

    def _n(self, v):
        return self.num.with_data(v)

    def test_add(self):
        v = self.parser.parse_items(self._n(5), self.plus, self._n(2))
        self.assertEqual(v, 7)

        v = self.parser.parse_items(self._n(1), self.plus, self._n(4), self.plus, self._n(10))
        self.assertEqual(v, 15)

    def test_mul(self):
        v = self.parser.parse_items(self._n(5), self.mul, self._n(2))
        self.assertEqual(v, 10)

        v = self.parser.parse_items(self._n(1), self.mul, self._n(4), self.mul, self._n(10))
        self.assertEqual(v, 40)

    def test_oop(self):
        v = self.parser.parse_items(self._n(7), self.mul, self._n(3), self.plus, self._n(2))
        self.assertEqual(v, 23)

        v = self.parser.parse_items(self._n(1), self.plus, self._n(4), self.mul, self._n(10))
        self.assertEqual(v, 41)

    def test_complex(self):
        v = self.parser.parse_items(self._n(7), self.mul, self._n(3), self.plus, self._n(3), self.mul, self._n(2))
        self.assertEqual(v, 27)

        v = self.parser.parse_items(
            self._n(1),
            self.plus,
            self._n(4),
            self.mul,
            self._n(5),
            self.plus,
            self._n(1),
            self.mul,
            self._n(9),
            self.mul,
            self._n(2),
        )
        self.assertEqual(v, 39)
    def setUp(self):
        self.plus, self.mul, self.num = CFGTerminal.create("+", "*", "num")
        E, T, F = CFGNonterminal.create("E", "T", "F")

        E.production(lambda i: i, T)
        T.production(lambda i, j, k: i + k, T, self.plus, F)
        T.production(lambda i: i, F)
        F.production(lambda i, j, k: i * k, F, self.mul, self.num)
        F.production(lambda i: i, self.num)
        self.parser = CFGParser(E, (self.plus, self.mul, self.num), (E, T, F))
    def setUp(self):
        S, L = CFGNonterminal.create("S", "L")
        self.op, self.cp, self.c, self.x = CFGTerminal.create("(", ")", ",", "x")

        S.production(lambda i, j, k: j, self.op, L, self.cp)
        S.production(lambda i: i, self.x)
        L.production(lambda i: [i], S)

        def append(l, i):
            l.append(i)
            return l

        L.production(lambda i, j, k: append(i, k), L, self.c, S)
        self.parser = CFGParser(S, (self.op, self.cp, self.c, self.x), (S, L))
	def setUp( self ):
		plus, times, num = CFGTerminal.create("+", "*", "num")
		E, T, F = CFGNonterminal.create("E", "T", "F")

		E.production( lambda i: i, T )
		T.production( lambda i, j, k: i.add(k), T, plus, F )
		T.production( lambda i: i, F )
		F.production( lambda i, j, k: i.mul(k), F, times, num )
		F.production( lambda i: i, num )

		tokens = {
			'\\+': plus, '\\*': times,
			"[0-9][0-9]*": num.data_wrapper(Number)
			}

		self.lexer = CFGLexer( tokens )
		self.parser = CFGParser( E, (plus, times, num), (E, T, F) )
Ejemplo n.º 8
0
class CFGFileParser:
	def __init__( self ):
		self._build_parser()
		self.lexer = CFGLexer( self.tokens, 
			eof_token = self.eof.data_wrapper(EOF) )
		self.parser = CFGParser( 
			self.goal, self.terminals, self.nonterminals )
		self.reset()

	def _build_parser( self ):
		self.terminals = ( start_section, end_section,
			tokens_id, productions_id,
			start_name, end_name, produces, colon,
			newline, indent, dedent, operator, comment, identifier,
			literal, number, eof ) = (
			CFGTerminal.create( "start_section", "end_section",
					"tokens_id", "productions_id",
					"start_name", "end_name", "produces", 
					"colon", "newline", "indent", "dedent",
					"operator", "comment", "identifier", "literal", "number",
					"eof" ) )
		for t in self.terminals:
			setattr( self, t.identifier, t )

		make_tuple = lambda *args: tuple(args)

		self.tokens = []
		self.tokens.append(CFGToken(r"{%", start_section))
		self.tokens.append(CFGToken(r"%}", end_section))
		self.tokens.append(CFGToken(r"tokens", 
			tokens_id.data_wrapper(Identifier)))
		self.tokens.append(CFGToken(r"productions", 
			productions_id.data_wrapper(Identifier)))

		self.tokens.append(CFGToken(r"\(%", start_name))
		self.tokens.append(CFGToken(r"%\)", end_name))
		self.tokens.append(CFGToken(r"->", produces))
		self.tokens.append(CFGToken(r":", 
			colon.data_wrapper(Operator)))

		operators = [
			",", ";", "@", 
			"+", "-", "\\*", "/", "//", "!", "\\|", "&",
			"<<", ">>", "<", ">", "=", "\\.",
			"%", "`", "~", "\\^"
			]
		for o in operators:
			self.tokens.append(CFGToken(o, operator.data_wrapper(Operator)))

		assign_operators = [
			"+=", "-=", "\\*=", "/=", "//=",
			"!=", "\\|=", "&=", "<=", ">=", "==", "%=",
			"<<=", ">>=" ]
		for o in assign_operators:
			self.tokens.append(CFGToken(o, operator.data_wrapper(Operator)))
		
		paren_operators = [ "\(", "\[", "{", "\)", "\]", "}" ]
		for p in paren_operators[:3]:
			self.tokens.append(CFGToken(p, self._paren_open))
		for p in paren_operators[3:]:
			self.tokens.append(CFGToken(p, self._paren_close))

		self.tokens.append(CFGToken(r"[a-zA-Z_][a-zA-Z0-9_]*",
			identifier.data_wrapper( Identifier )))
		self.tokens.append(CFGToken(
			r"([0-9][0-9]*(\.[0-9]*)?|\.[0-9][0-9]*)([eE][+-]?[0-9][0-9]*)?",
			number.data_wrapper( Number )))
		
		self.tokens.append(CFGToken(r'"([^\\"\n]*(\\.)?)*"', 
			literal.data_wrapper( Literal )))
		self.tokens.append(CFGToken(r"'([^\\'\n]*(\\.)?)*'", 
			literal.data_wrapper( Literal )))

		self.tokens.append(CFGToken(r'r"([^"\n]*(\\")?)*"', 
			literal.data_wrapper( Literal )))
		self.tokens.append(CFGToken(r"r'([^'\n]*(\\')?)*'", 
			literal.data_wrapper( Literal )))
		
		self.tokens.append(CFGToken(
			r'"""([^"]*("|"")?)*"""', 
			literal.data_wrapper( Literal )))
		self.tokens.append(CFGToken(
			r"'''([^']*('|'')?)*'''", 
			literal.data_wrapper( Literal )))
	
		self.tokens.append(CFGToken(r"^[ \t]*\n", self._newline_handler(True))) 
		self.tokens.append(CFGToken(r"^[ \t]*", self._indent_handler, True))
		self.tokens.append(CFGToken(r"\n", self._newline_handler(False)))
		
		self.tokens.append(CFGToken(r"[ \t\r]", CFGToken.NoToken))
		self.tokens.append(CFGToken(r"\\\n^", CFGToken.NoToken))

		self.tokens.append(CFGToken(r"^[ \t]*#[^\n]*\n",
			self._comment_line_handler, True))
		self.tokens.append(CFGToken(r"#[^\n]*", 
			comment.data_wrapper( Comment )))

		self.nonterminals = ( goal, cfg_file, section, 
			code, code_line, code_lines, code_bits,
			naming, regexes, products, productions,
			regex, production ) = ( 
			CFGNonterminal.create(
				"goal", "cfg_file", "section", 
				"code", "code_line", "code_lines", "code_bits",
				"naming", "regexes", "products", "productions",
				"regex", "production"
				) )
		for t in self.nonterminals:
			setattr( self, t.identifier, t )

		make_list = lambda *args: list(args)
		def append( l, i ): l.append(i); return l
		def append_tuple( l, *a ): l.append( tuple(a) ); return l

		first = lambda a, *args: a
		second = lambda a, b, *args: b
		third = lambda a, b, c, *args: c
		fourth = lambda a, b, c, d, *args: d

		goal.production( first, cfg_file, eof )
		cfg_file.production( File.append, cfg_file, section, code_lines )
		cfg_file.production( File, code_lines )

		code_toks = [ operator, identifier, number, literal, colon,
			tokens_id, productions_id ]
		for t in code_toks:
			code_bits.production( append, code_bits, t )
			code_bits.production( make_list, t )

		code_line.production( append, code_bits, newline )
		code_line.production( lambda i: [], newline )

		code_lines.production( Code.append, code_lines, code_line )
		code_lines.production( Code.add_block, 
			code_lines, indent, code_lines, dedent )
		code_lines.production( lambda: Code() )

		code.production( lambda _, c, __: Code(c), colon, code_bits, newline )
		code.production( fourth, colon, newline, indent, code_lines, dedent )
		code.production( lambda n: Code(), newline )

		section.production( TokenSection.create, 
			start_section, tokens_id, regexes, end_section )
		section.production( ProductionSection.create, 
			start_section, productions_id, productions, end_section )

		naming.production( second, start_name, identifier, end_name )
		naming.production( lambda: Identifier("", None, None) )

		regex.production( Regex, literal, naming, code )
		regexes.production( append, regexes, regex )
		for white in [ newline, indent, dedent ]:
			regexes.production( first, regexes, white )
		regexes.production( lambda: [] )

		products.production( append_tuple, products, identifier, naming )
		products.production( lambda: [] )

		production.production( Production.create, 
			identifier, produces, products, code )
		productions.production( append, productions, production )
		for white in [ newline, indent, dedent ]:
			productions.production( first, productions, white )
		productions.production( lambda: [] )

	def _paren_open( self, parsed, start, end ):
		self.paren_level += 1
		data = Operator(parsed, start, end)
		return self.operator.with_data(data)

	def _paren_close( self, parsed, start, end ):
		self.paren_level -= 1
		data = Operator(parsed, start, end)
		return self.operator.with_data(data)

	def _indent_handler( self, parsed, start, end ):
		indent = 0
		for ch in parsed:
			if ch == ' ':		indent += 1
			elif ch == '\t':	indent = (indent/4 +1) * 4

		line, col = start
		if self.paren_level == 0:
			if indent > self.indent_levels[-1]:
				self.indent_levels.append( indent )
				data = Indent( parsed, start, end )
				yield self.indent.with_data( data )
			while indent < self.indent_levels[-1]:
				self.indent_levels.pop()
				data = Dedent( '', end, end )
				yield self.dedent.with_data( data )
	
	def _newline_handler( self, empty ):
		def cb( parsed, start, end ):
			line, col = start
			for ch in parsed[:-1]:
				col += 1
			paren = self.paren_level != 0
			data = Newline( '\n', (line, col), (line, col+1), empty, paren )
			return self.newline.with_data( data )
		return cb

	def _comment_line_handler( self, parsed, start, end ):
		line, col = start
		index = parsed.index( '#' )
		line, col = line, col+index
		end = (line, col+len(parsed[index:-1]))
		data = Comment( parsed[index:-1], (line, col), end )
		yield self.comment.with_data( data )

		line, col = end
		data = Newline( '\n', end, (line, col+1), True )
		yield self.newline.with_data( data )
	
	def reset( self ):
		self.paren_level = 0
		self.indent_levels = [0]

	def tokenize( self, data, reset = True ):
		if reset:
			self.reset()
		for tok in self.lexer.tokenize( data ):
			yield tok

	def tokenize_nt( self, data, reset = True ):
		for t in self.tokenize( data, reset ):
			yield t.data

	def python_tokenize( self, data, reset = True ):
		for t in self.tokenize( data, reset ):
			yield t.data.python()

	def _ifind( self, it, val ):
		v = it.next()
		while val != v:
			v = it.next()
		return v, it

	def _indent_level( self, it, include_below = False ):
		depth = 0
		while depth >= 0:
			t = it.next()
			if t == Indent():	depth += 1
			elif t == Dedent():	depth -= 1
			if include_below or depth == 0:
				yield t

	def _get_file_tokens( self, filename ):
		f = file( filename, 'r' )
		return self.tokenize_nt( chain.from_iterable( f ) )

	def _get_class_tokens( self, filename, clsname, include_header = False ):
		tokens = self._get_file_tokens( filename )
		n = Identifier('')
		while n != Identifier( clsname ):
			cls, tokens = self._ifind( tokens, Identifier('class') )
			n = tokens.next()

		if include_header:
			yield cls
			yield n
		while n != Indent():
			n = tokens.next()
			if include_header:
				yield n

		for t in self._indent_level( tokens, True ):
			yield t

	def _get_fn_tokens( self, filename, clsname, fnname, 
			args = False, header = False ):
		tokens = self._get_class_tokens( filename, clsname )
		n = Identifier( '' )
		while n != Identifier( fnname ):
			d, tokens = self._ifind( tokens, Identifier( 'def' ) )
			n = tokens.next()

		if header:
			yield d
			yield n
		while n != Indent():
			n = tokens.next()
			if args:
				yield n

		for t in self._indent_level( tokens, True ):
			yield t

	def _intro( self ):
		yield "\n\n" + "#"*70 + '\n'
		yield "# Begin automatically generated code\n"
		yield "#"*70 + '\n'
		yield "from collections import defaultdict\n"
		yield "from itertools import chain\n"

	def _parser_header( self ):
		yield "class Parser:\n"
		yield "\tdef __init__( self ):\n"
		yield "\t\tself._build_lexer()\n"
		yield "\t\tself._build_parser()\n"
		yield "\t\n"
		yield "\tdef tokenize_and_parse( self, input ):\n"
		yield "\t\treturn self.parse( self.tokenize( input ) )\n"
		yield "\t\n"

	def _fn_header( self, name ):
		yield "\tdef {0}".format( name )

	def _extro( self ):
		yield "#"*70 + '\n'
		yield "# End automatically generated code\n"
		yield "#"*70 + '\n\n\n'

	def parse( self, filename ):
		cfi = chain.from_iterable

		f = file( filename, 'r' )
		tokens = self.tokenize( cfi( f ) )
		cfg_file = self.parser.parse( tokens )
		f.close()

		tokens, prods = None, None
		for i in cfg_file.items:
			if isinstance( i, TokenSection ):		tokens = i
			if isinstance( i, ProductionSection ):	prods = i

		if tokens is None or prods is None:
			return

		terminals = tokens.build()
		prods.build( terminals )

		out = chain( 
			(t for t in cfg_file.items[0].tokens),
			self.tokenize_nt( cfi(self._intro()) ),
			self._get_file_tokens( 'actions.py' ),
			self._get_class_tokens( 'glr.py', 'CFGNonterminal', True ),
			self._get_class_tokens( 'glr.py', 'CFGTerminal', True ),
			self._get_class_tokens( 'lexer.py', '_NamedEmptyObject', True ),
			self._get_class_tokens( 'lexer.py', 'CFGDFAState', True ),
			self.tokenize_nt( cfi(self._parser_header()), False ),
			self.tokenize_nt( cfi(self._fn_header( "parse" )), False ),
			self._get_fn_tokens( 'glr.py', 'CFGParser', 'parse', 
				args = True ),
			self.tokenize_nt( cfi(self._fn_header( "_parse" )), False ),
			self._get_fn_tokens( 'glr.py', 'CFGParser', '_parse', 
				args = True ),
			self.tokenize_nt( cfi(self._fn_header( "tokenize" )), False ),
			self._get_fn_tokens( 'lexer.py', 'CFGLexer', 'tokenize', 
				args = True ),
			self.tokenize_nt( cfi(self._fn_header( "_tokenize" )), False ),
			self._get_fn_tokens( 'lexer.py', 'CFGLexer', '_tokenize', 
				args = True ),
			self.tokenize_nt( cfi(self._fn_header( "_wrap_data" )), False ),
			self._get_fn_tokens( 'lexer.py', 'CFGLexer', '_wrap_data', 
				args = True ),
			self.tokenize_nt( cfi(tokens.lexer_tokens()), False ),
			self.tokenize_nt( cfi(prods.parser_tokens()), False ),
			self.tokenize_nt( cfi(self._extro()), False ),
			(t for t in cfg_file.items[-1].tokens)
			)

		indents, previous = [''], None
		f = file( 'output.py', 'w' )
		for t in out:
			data, indents, previous = t.output( indents, previous )
			f.write( data )

		return cfg_file

	def output( self ):
		pass