def test_Grammar(): g = Grammar( Production('S2', 'S'), Production('S', 'A', 'b'), Production('A', 'a', 'a') ) assert_equal(set(g.symbols), {'S2', 'S', 'A', 'a', 'b'}) assert_equal(set(g.terminals), {'a', 'b'}) assert_equal(set(g.non_terminals), {'S2', 'S', 'A'}) assert_equal(set(g.get_prods_for('S')), {g.prods[1]})
def test_AttributeGrammar_syn(): g = Grammar( Production('S', 'S', '+', 'S'), Production('S', 'zero'), Production('S', 'digit') ) attr_g = AttributeGrammar( g, syn_attrs={'res'}, prod_attr_rules=[ {'res.0': lambda res: res(1) + res(3)}, {'res.0': lambda: 0}, {'res.0': lambda res: res(1)} ], terminal_attr_rules={ 'digit': {'res.0': lambda word: int(word)} } ) assert_equal(attr_g.attrs, {'res'}) assert_equal(attr_g.syn_attrs, {'res'}) assert_equal(attr_g.inh_attrs, set()) assert_equal(attr_g.get_terminal_syn_attr_eval('digit', '6'), {'res': 6}) assert_equal(attr_g.get_terminal_syn_attr_eval('zero', '0'), {}) assert_equal(attr_g.eval_prod_syn_attr(g.prods[0], {}, [{'res': 4}, {}, {'res': 7}]), {'res': 4 + 7}) assert_equal(attr_g.eval_prod_syn_attr(g.prods[2], {}, [{'res': 8}]), {'res': 8}) assert_equal(attr_g.eval_prod_syn_attr(g.prods[1], {}, [{}]), {'res': 0})
def test_ParserGenerator_simple_ast(): class Ast: def __init__(self, pos: TreePosition): self.pos = pos class ConstantAst(Ast): def __init__(self, pos, num): super().__init__(pos) self.num = num class SumAst(Ast): def __init__(self, pos, ast, num): super().__init__(pos) self.ast = ast self.num = num g = Grammar( Production('A', 'B'), Production('B', 'num'), Production('B', 'B', '+', 'num') ) attr_g = AttributeGrammar( g, inh_attrs=set(), syn_attrs={'ast', 'num'}, prod_attr_rules=[ {'ast': 'ast.1'}, {'ast': lambda _pos, num: ConstantAst(_pos, num(1))}, {'ast': lambda _pos, ast, num: SumAst(_pos, ast(1), num(3))} ], terminal_attr_rules={ 'num': {'num': lambda val: int(val)} } ) assert_equal(set(g.terminals), {'num', '+'}) assert_equal(attr_g.attrs, {'ast', 'num'}) assert_equal(attr_g.syn_attrs, {'ast', 'num'}) assert_equal(attr_g.inh_attrs, set()) word = '1+2+3' tokens = ['num', '+', 'num', '+', 'num'] tokens_pos = [0, 1, 2, 3, 4] print('tokens:', tokens, 'with decomposition', tokens_pos) parser = ParserGenerator(g) right_analysis, attr_eval = parser.parse_syn_attr_analysis(attr_g, word, tokens, tokens_pos) print('right analysis:', right_analysis) print('attribute eval:', attr_eval) assert 'ast' in attr_eval ast3 = attr_eval['ast'] assert isinstance(ast3, SumAst) assert ast3.pos == TreePosition(word, 0, 5) assert ast3.num == 3 ast2 = ast3.ast assert isinstance(ast2, SumAst) assert ast2.pos == TreePosition(word, 0, 3) assert ast2.num == 2 ast1 = ast2.ast assert isinstance(ast1, ConstantAst) assert ast1.pos == TreePosition(word, 0, 1) assert ast1.num == 1
def test_ParserGenerator_arithmetic(): # left associative, with operator precedence op_plus = Production('Sum', 'Sum', '+', 'Prod') op_minus = Production('Sum', 'Sum', '-', 'Prod') op_mult = Production('Prod', 'Prod', '*', 'Term') op_div = Production('Prod', 'Prod', '/', 'Term') const_terms = {Production('Term', str(i)): i for i in range(11)} # we can clean this up once we have proper tokens g = Grammar(*[ Production('Expr', 'Sum'), op_plus, op_minus, Production('Sum', 'Prod'), op_mult, op_div, Production('Prod', 'Term'), Production('Term', '(', 'Sum', ')')] + list(const_terms.keys()) ) parser = ParserGenerator(g) def evaluate(word: str): assert isinstance(word, str) tokens = list(word) tokens_pos = list(range(len(tokens))) print('input word:', word) analysis = parser.parse_analysis(word, tokens, tokens_pos) print('analysis:', analysis) stack = [] for prod in reversed(analysis): if prod in const_terms: stack.append(const_terms[prod]) elif prod == op_plus: b, a = stack.pop(-1), stack.pop(-1) stack.append(a + b) elif prod == op_minus: b, a = stack.pop(-1), stack.pop(-1) stack.append(a - b) elif prod == op_mult: b, a = stack.pop(-1), stack.pop(-1) stack.append(a * b) elif prod == op_div: b, a = stack.pop(-1), stack.pop(-1) stack.append(a / b) assert len(stack) == 1 result_value = stack[0] print('result:', result_value) assert result_value == eval(word) evaluate('1*2+3') evaluate('1+2*3') evaluate('(1+2)*3') evaluate('1+2+3') evaluate('1+2-3') evaluate('1-2+3') evaluate('3-2-1') evaluate('1*2+3*4') evaluate('4/2-1')
def test_ParserGenerator_simple_lookahead(): g = Grammar( Production('S2', 'S'), Production('S', 'b', 'b'), Production('S', 'a', 'b') ) parser = ParserGenerator(g) assert_equal(parser.parse_analysis('bb', ['b', 'b'], [0, 1]), (g.prods[0], g.prods[1])) assert_equal(parser.parse_analysis('ab', ['a', 'b'], [0, 1]), (g.prods[0], g.prods[2])) with assert_raises(ParseError): parser.parse_analysis('ba', ['b', 'a'], [0, 1])
def test_ParserGenerator_simple_left_recursive_epsilon(): g = Grammar( Production('S2', 'S'), Production('S', 'S', 'a'), Production('S') ) parser = ParserGenerator(g) for count in [0, 1, 2, 3, 20, 100, 10000]: assert_equal( parser.parse_analysis( 'a' * count, ['a'] * count, list(range(count))), (g.prods[0],) + count * (g.prods[1],) + (g.prods[2],))
def test_ParserGenerator_simple_left_recursive(): g = Grammar( Production('S2', 'S'), Production('S', 'S', 'a'), Production('S', 'a') ) parser = ParserGenerator(g) for count in [1, 2, 3, 20, 100, 10000]: assert_equal( parser.parse_analysis( 'a' * count, ['a'] * count, list(range(count))), (g.prods[0],) + (count-1) * (g.prods[1],) + (g.prods[2],)) with assert_raises(ParseError): parser.parse_analysis('', [], [])
def test_make_first1_sets_epsilon(): g = Grammar( Production('S', 'A', 'B'), Production('A', 'a'), Production('A'), Production('B', 'A'), Production('B', 'b', 'c') ) first1 = make_first1_sets(g) print('first1 sets:', first1) assert_equal(first1['A'], {EPSILON, 'a'}) assert_equal(first1['B'], {EPSILON, 'a', 'b'}) assert_equal(first1['S'], {EPSILON, 'a', 'b'}) assert_equal(get_first1_set_for_word(first1, ('B', 'c')), {'a', 'b', 'c'})
def test_ParserGenerator_parse_tree_epsilon2(): g = Grammar( Production('TopLevelExpr', 'ExprList'), Production('ExprList'), Production('ExprList', 'Expr', 'ExprList'), Production('Expr', 'Val', ';'), Production('Val', 'number') ) parser = ParserGenerator(g) word = '42;' tokens = ['number', ';'] tokens_pos = [0, 2] print(parser.parse_analysis(word, tokens, tokens_pos)) print(parser.parse_tree(word, tokens, tokens_pos))
def test_make_first1_sets(): g = Grammar( Production('S', 'S', 'O', 'S'), Production('S', '(', 'S', ')'), Production('S', '0'), Production('S', '1'), Production('O', '+'), Production('O', '*'), ) assert_equal(set(g.terminals), {'0', '1', '(', ')', '+', '*'}) assert_equal(set(g.non_terminals), {'S', 'O'}) first1 = make_first1_sets(g) print('first1 sets:', first1) assert_equal(first1['S'], {'0', '1', '('}) assert_equal(first1['O'], {'+', '*'}) assert_equal(get_first1_set_for_word(first1, ('(', 'S')), {'('})
def test_ParserGenerator_attr_syn(): g = Grammar( Production('A', 'S'), Production('S', 'T', '+', 'S'), Production('S', 'T'), Production('T', 'zero'), Production('T', 'digit') ) attr_g = AttributeGrammar( g, inh_attrs=set(), syn_attrs={'res'}, prod_attr_rules=[ {'res.0': lambda res: res(1)}, {'res.0': lambda res: res(1) + res(3)}, {'res.0': lambda res: res(1)}, {'res.0': lambda res: 0}, {'res.0': lambda res: res(1)} ], terminal_attr_rules={ 'digit': {'res.0': lambda value_: int(value_)} } ) assert_equal(attr_g.attrs, {'res'}) assert_equal(attr_g.syn_attrs, {'res'}) assert_equal(attr_g.inh_attrs, set()) word = '5+7' tokens = ['digit', '+', 'digit'] tokens_pos = [0, 1, 2] print('tokens:', tokens, 'with decomposition', tokens_pos) parser = ParserGenerator(g) right_analysis, attr_eval = parser.parse_syn_attr_analysis(attr_g, word, tokens, tokens_pos) print('right analysis:', right_analysis) print('attribute eval (online):', attr_eval) assert_equal(right_analysis, (g.prods[0], g.prods[1], g.prods[2], g.prods[4], g.prods[4])) assert_equal(attr_eval, {'res': 5 + 7}) tree = parser.parse_tree(word, tokens, tokens_pos) print('parse tree:', tree) assert_equal( tree, SyntaxTree(g.prods[0], SyntaxTree( g.prods[1], SyntaxTree(g.prods[4], None), None, SyntaxTree(g.prods[2], SyntaxTree(g.prods[4], None))))) attr_eval_gen = AttributeEvalGenerator(attr_g) tree_attr_eval = attr_eval_gen.eval_attrs(tree, word, tokens, tokens_pos) print('attribute eval (using tree):', tree_attr_eval) assert_equal(tree_attr_eval, {'res': 5 + 7})
def make_first1_sets(grammar: Grammar) -> Dict[str, Set[Optional[str]]]: # fi(x) = {x} for all terminals, compute non-terminals iteratively first1_sets: Dict[str, Set[Optional[str]]] = { symbol: {symbol} if symbol in grammar.terminals else set() for symbol in grammar.symbols} changes = True while changes: changes = False for symbol in grammar.non_terminals: first1 = set() for prod in grammar.get_prods_for(symbol): first1.update(get_first1_set_for_word(first1_sets, prod.right)) if first1 != first1_sets[symbol]: first1_sets[symbol] = first1 changes = True assert all(all(x in grammar.terminals or x is EPSILON for x in first1) for first1 in first1_sets.values()) return first1_sets
def test_ParserGenerator_parse_tree_epsilon(): g = Grammar( Production('S', 'A'), Production('A'), Production('A', 'B'), Production('B', 'a'), Production('B', 'a', 'B') ) parser = ParserGenerator(g) word = 'aaa' tokens = ['a', 'a', 'a'] tokens_pos = [0, 1, 2] assert_equal( parser.parse_analysis(word, tokens, tokens_pos), (g.prods[0], g.prods[2], g.prods[4], g.prods[4], g.prods[3])) assert_equal( parser.parse_tree(word, tokens, tokens_pos), SyntaxTree(g.prods[0], SyntaxTree(g.prods[2], SyntaxTree(g.prods[4], None, SyntaxTree(g.prods[4], None, SyntaxTree( g.prods[3], None))))))
def test_ParserGenerator_cfg_with_lexer(): g = Grammar( Production('Grammar', 'Decl'), Production('Decl', 'Prod', ';', 'Decl'), Production('Decl', 'Prod'), Production('Prod', 'Symb', '->', 'Right'), Production('Right', 'Symbs', '|', 'Symbs'), Production('Right', 'Symbs'), Production('Symbs', 'Symb', 'Symbs'), Production('Symbs'), ) lexer = LexerGenerator([';', '->', '|', 'Symb'], ['; *|[\n\r ]+', ' *\\-> *', ' *\\| *', '([a-z]|[A-Z])+']) parser = ParserGenerator(g) word = 'A->Bc|B; B->ca' print('input word:', word) tokens, tokens_pos = lexer.tokenize(word) print('tokenized word:', tokens, 'with decomposition', tokens_pos) analysis = parser.parse_analysis(word, tokens, tokens_pos) print('right-most analysis:', analysis)
def test_ParserGenerator_cfg(): g = Grammar( Production('Grammar', 'Decl'), Production('Decl', 'Prod', ';', 'Decl'), Production('Decl', 'Prod', '\n', 'Decl'), Production('Decl', 'Prod'), Production('Prod', 'Symb', '->', 'Right'), Production('Right', 'Symbs', '|', 'Symbs'), Production('Right', 'Symbs'), Production('Symbs', 'Symb', 'Symbs'), Production('Symbs'), Production('Symb', 'A'), Production('Symb', 'B'), Production('Symb', 'C'), Production('Symb', 'a'), Production('Symb', 'b'), Production('Symb', 'c') ) parser = ParserGenerator(g) tokens = ['A', '->', 'B', 'c', '|', 'B', ';', 'B', '->', 'c', 'a'] print('tokenized word:', tokens) analysis = parser.parse_analysis(''.join(tokens), tokens, list(range(len(tokens)))) print('right-most analysis:', analysis)
def test_AttributeEvalGenerator_typed_arithmetic(): import math import numpy as np lexer = LexerGenerator(token_names=[ '(', ')', '+', '-', '*', '**', '/', '[', ']', ',', 'const', 'name', IGNORED_TOKEN ], token_regex_table=[ '\\(', '\\)', '\\+', '\\-', '\\*', '\\*\\*', '/', '\\[', '\\]', ',', '(0|[1-9][0-9]*)(\\.[0-9]+)?', '([a-z]|[A-Z])+', ' +' ]) # left associative (except ** that is right associative), with operator precedence g = Grammar(Production('Expr', 'Sum'), Production('Sum', 'Sum', '+', 'Prod'), Production('Sum', 'Sum', '-', 'Prod'), Production('Sum', 'Prod'), Production('Prod', 'Prod', '*', 'Pow'), Production('Prod', 'Prod', '/', 'Pow'), Production('Prod', 'Pow'), Production('Pow', 'Term', '**', 'Pow'), Production('Pow', 'Term'), Production('Term', '-', 'NegTerm'), Production('Term', 'NegTerm'), Production('NegTerm', '(', 'Sum', ')'), Production('NegTerm', 'name', '(', 'Sum', ')'), Production('NegTerm', 'const'), Production('NegTerm', '[', 'ExprList', ']'), Production('ExprList', 'Sum'), Production('ExprList', 'ExprList', ',', 'Sum')) ERROR = None def op_plus_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type == right_type in {'num', 'vec', 'mat'}: if np.shape(left) != np.shape(right): return ERROR return left + right return ERROR def op_minus_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type == right_type in {'num', 'vec', 'mat'}: if np.shape(left) != np.shape(right): return ERROR return left - right return ERROR def op_times_type(type): left_type, right_type = type(1), type(3) if left_type == right_type == 'num': return 'num' if (left_type == 'vec' and right_type == 'num') or (left_type == 'num' and right_type == 'vec'): return 'vec' if left_type == 'mat' or right_type == 'mat': return 'mat' return ERROR def op_times_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type not in {'num', 'vec', 'mat' } or right_type not in {'num', 'vec', 'mat'}: return ERROR if left_type == 'num' or right_type == 'num': return left * right if left_type == 'mat' or right_type == 'mat': if left_type == 'vec': assert right_type == 'mat' if np.shape(left)[0] != np.shape(right)[0]: return ERROR return np.matmul(left, right) if right_type == 'vec': assert left_type == 'mat' if np.shape(left)[1] != np.shape(right)[0]: return ERROR return np.matmul(left, right) assert left_type == right_type == 'mat' if np.shape(left)[1] != np.shape(right)[0]: return ERROR return np.matmul(left, right) return ERROR def op_divided_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type not in {'num', 'vec', 'mat' } or right_type not in {'num', 'vec', 'mat'}: return ERROR if right_type == 'num': return left / right return ERROR def op_pow_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type == right_type == 'num': assert isinstance(left, (float, int)) and isinstance( right, (float, int)) return left**right if left_type == 'mat' and right_type == 'num': if np.shape(left)[0] != np.shape(left)[1]: return ERROR return left**right return ERROR def op_neg_res(type, res): type_, res_ = type(2), res(2) if res_ is ERROR: return ERROR if type_ in {'num', 'vec', 'mat'}: return -res_ return ERROR def op_func_type(type, name): arg_type = type(3) if name(1) in {'ones', 'zeros'}: return 'vec' if name(1) in {'len'}: return 'num' return arg_type def op_func_res(type, res, name): arg, arg_type = res(3), type(3) if arg is ERROR: return ERROR if name(1) in {'ones', 'zeros'}: if arg_type != 'num' or arg <= 1: return ERROR assert isinstance(arg, (float, int)) return (np.ones if name(1) == 'ones' else np.zeros)(int(arg)) if name(1) == 'len': if arg_type != 'vec': return ERROR return np.linalg.norm(arg) if arg_type == 'num': if not hasattr(math, name(1)): return ERROR return getattr(math, name(1))(arg) return ERROR def op_const_vec_type(type_list, res_list): type_list_, res_list_ = type_list(2), res_list(2) assert len(type_list_) == len(res_list_) if len(type_list_) == 0: return ERROR type = type_list_[0] if not all(t == type for t in type_list_): return ERROR if type == 'num': return 'vec' if type == 'vec': return 'mat' return ERROR def op_const_vec_res(type_list, res_list): type_list_, res_list_ = type_list(2), res_list(2) assert len(type_list_) == len(res_list_) if len(type_list_) == 0: return ERROR type = type_list_[0] if not all(t == type for t in type_list_): return ERROR if type in {'num', 'vec'}: if type == 'vec': length = len(res_list_[0]) if not all(len(vec) == length for vec in res_list_): return ERROR return np.array(res_list_) return ERROR attr_g = AttributeGrammar( g, syn_attrs={'type', 'res', 'name', 'type_list', 'res_list'}, prod_attr_rules=[{ 'type': 'type.1', 'res': 'res.1' }, { 'type': 'type.1', 'res': op_plus_res }, { 'type': 'type.1', 'res': op_minus_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': op_times_type, 'res': op_times_res }, { 'type': op_times_type, 'res': op_divided_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': 'type.1', 'res': op_pow_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': 'type.2', 'res': op_neg_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': 'type.2', 'res': 'res.2' }, { 'type': op_func_type, 'res': op_func_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': op_const_vec_type, 'res': op_const_vec_res }, { 'type_list': lambda type: [type(1)], 'res_list': lambda res: [res(1)] }, { 'type_list': lambda type, type_list: type_list(1) + [type(3)], 'res_list': lambda res, res_list: res_list(1) + [res(3)] }], terminal_attr_rules={ 'const': { 'res': lambda lit: float(lit), 'type': lambda _: 'num' }, 'name': { 'name': lambda lit: lit } }) parser = ParserGenerator(g) def evaluate(word, expected_result, expected_type=None): print('----') print('input word:', word) tokens, tokens_pos = lexer.tokenize(word) print('tokens:', tokens, 'with decomposition', tokens_pos) analysis, result = parser.parse_syn_attr_analysis( attr_g, word, tokens, tokens_pos) print('result:') print(result['res']) if isinstance(expected_result, (float, int)): assert_almost_equal(result['res'], expected_result) else: np.testing.assert_equal(result['res'], expected_result) if result['res'] is not ERROR: print('result type:', result['type']) assert_equal(result['type'], expected_type) evaluate('1*2+3', 5, 'num') evaluate('5-3', 2, 'num') evaluate('(1+2)*3', 9, 'num') evaluate('1+2+3', 6, 'num') evaluate('1-2+3', 2, 'num') evaluate('3-2-1', 0, 'num') evaluate('1*2+3*4', 14, 'num') evaluate('4/2-1', 1, 'num') evaluate('sin(3.1415)', 9.26535897e-5, 'num') evaluate('2**2**3', 256, 'num') evaluate('(2**2)**3', 64, 'num') evaluate('(3**2+4**2)**0.5', 5, 'num') evaluate('ones(4)', [1, 1, 1, 1], 'vec') evaluate('ones(4) * 5', [5, 5, 5, 5], 'vec') evaluate('ones(2) - ones(2) * 0', [1, 1], 'vec') evaluate('2 ** ones(2)', ERROR) evaluate('[1 , 2 , 3]', [1, 2, 3], 'vec') evaluate('[1, 2] + 2 * [3, 2]', [7, 6], 'vec') evaluate('[[1,2],[3,4]]', [[1, 2], [3, 4]], 'mat') evaluate('[ones(2)]', [[1, 1]], 'mat') evaluate('[ones(3), ones(4)]', ERROR) evaluate('zeros(0+2-2)', ERROR) evaluate('[[1,0,0],[0,1,0],[0,0,1]]', [[1, 0, 0], [0, 1, 0], [0, 0, 1]], 'mat') evaluate('[[1,0,0],[0,1,0],[0,0,1]]+[[0,1]]', ERROR) evaluate('[[1,0,0],[0,1,0],[0,0,1]]+[[1,0,0],[0,1,0],[0,0,1]]', [[2, 0, 0], [0, 2, 0], [0, 0, 2]], 'mat') evaluate('[[1,0,0],[0,1,0],[0,0,1]]+0*[[1,0,0],[0,1,0],[0,0,1]]', [[1, 0, 0], [0, 1, 0], [0, 0, 1]], 'mat') evaluate('[[1,0],[0,1]] ** 5', [[1, 0], [0, 1]], 'mat') evaluate('[[0,2],[1,1]] + 3 * [[3,0],[1,1]]', [[9, 2], [4, 4]], 'mat') evaluate('[[1,0,3,-2],[3,2,1,0]] * [[2,1,0],[0,1,0],[-1,4,1],[3,2,-1]]', [[-7, 9, 5], [5, 9, 1]], 'mat') evaluate('2**-3', 2**-3, 'num') evaluate('2--3', 5, 'num') evaluate('-[1,0]*4', [-4, 0], 'vec') evaluate('[1,2,3]*[1,2]', ERROR) evaluate('len([3,4])', 5, 'num') evaluate('len(4)', ERROR) evaluate('len([[2,3],[1,2]])', ERROR) evaluate('(((((((((7)))))))))', 7, 'num')
def test_AttributeEvalGenerator_check_declaredness(): lexer = LexerGenerator( [';', '=', 'const', 'name'], [';', '=', '(0|[1-9])[0-9]*(\\.[0-9]*)?', '([a-z]|[A-Z])+']) g = Grammar( Production('Prog0', 'Decls'), Production('Decls', 'Decl'), Production('Decls', 'Decl', ';', 'Decls'), Production('Decl', 'name', '=', 'Var'), Production('Var', 'name'), Production('Var', 'const'), ) parser = ParserGenerator(g) attr_g = AttributeGrammar( g, inh_attrs={'decl_i'}, syn_attrs={'decl_s', 'name', 'ok'}, prod_attr_rules=[{ 'decl_i.1': set(), 'decl_s.0': 'decl_s.1', 'ok': 'ok.1' }, { 'decl_i.1': 'decl_i.0', 'decl_s.0': 'decl_s.1', 'ok': 'ok.1' }, { 'decl_i.1': 'decl_i.0', 'decl_i.3': 'decl_s.1', 'decl_s.0': 'decl_s.3', 'ok': lambda ok: ok(1) and ok(3) }, { 'decl_i.3': 'decl_i.0', 'decl_s.0': lambda decl_i, name: decl_i(0) | {name(1)}, 'ok.0': 'ok.3' }, { 'ok.0': lambda decl_i, name: name(1) in decl_i(0) }, { 'ok.0': True }], terminal_attr_rules={'name': { 'name.0': lambda value: value }}) attr_eval_gen = AttributeEvalGenerator(attr_g) def evaluate(word, target_eval): print('----') print('input word:', word) tokens, tokens_pos = lexer.tokenize(word) print('tokens:', tokens, 'with decomposition', tokens_pos) tree = parser.parse_tree(word, tokens, tokens_pos) print('parsed tree:', tree) tree_attr_eval = attr_eval_gen.eval_attrs(tree, word, tokens, tokens_pos) print('attribute eval:', tree_attr_eval) assert_equal(tree_attr_eval, target_eval) evaluate('alpha=5;beta=7;gamma=alpha', { 'decl_s': {'alpha', 'beta', 'gamma'}, 'ok': True }) evaluate('alpha=5;beta=alpha;gamma=alpha', { 'decl_s': {'alpha', 'beta', 'gamma'}, 'ok': True }) evaluate('alpha=1;beta=alpha;gamma=beta', { 'decl_s': {'alpha', 'beta', 'gamma'}, 'ok': True }) evaluate('alpha=alpha', {'decl_s': {'alpha'}, 'ok': False}) evaluate('alpha=1.0;beta=gamma', { 'decl_s': {'alpha', 'beta'}, 'ok': False }) evaluate('alpha=1.0;beta=4.0;gamma=beta;delta=alpha;alpha=alpha', { 'decl_s': {'alpha', 'beta', 'gamma', 'delta'}, 'ok': True })
REGEX_REPEAT_EXISTS_OP = Production('Repeat', 'Range', '+') REGEX_OPTIONAL_OP = Production('Repeat', 'Range', '?') REGEX_RANGE_OP = Production('Range', '[', 'a', '-', 'a', ']') REGEX_RANGE_LITS_OP = Production('Range', '[', 'Lits', ']') REGEX_INV_RANGE_OP = Production('Range', '[', '^', 'a', '-', 'a', ']') REGEX_INV_RANGE_LITS_OP = Production('Range', '[', '^', 'Lits', ']') REGEX_LIT_OP = Production('Lit', REGEX_LIT_TOKEN) REGEX_LIT_ANY_OP = Production('Lit', '.') REGEX_LITS_MULTIPLE_OP = Production('Lits', 'a', 'Lits') REGEX_LITS_SINGLE_OP = Production('Lits', 'a') REGEX_GRAMMAR = Grammar(Production('Regex', 'Choice'), REGEX_CHOICE_OP, Production('Choice', 'Concat'), REGEX_CONCAT_OP, Production('Concat', 'Repeat'), REGEX_REPEAT_OP, REGEX_REPEAT_EXISTS_OP, REGEX_OPTIONAL_OP, Production('Repeat', 'Range'), REGEX_RANGE_OP, REGEX_RANGE_LITS_OP, REGEX_INV_RANGE_OP, REGEX_INV_RANGE_LITS_OP, Production('Range', 'Lit'), Production('Range', '(', 'Choice', ')'), REGEX_LITS_MULTIPLE_OP, REGEX_LITS_SINGLE_OP, REGEX_LIT_OP, REGEX_LIT_ANY_OP) REGEX_PARSER = ParserGenerator(REGEX_GRAMMAR) def tokenize_regex( word: str, file_path: Path | DummyPath = DummyPath("regex") ) -> Tuple[Tuple[str], Tuple[int]]: """ :raises: LexError :returns: tokens with decomposition """ escape_next = False
def test_ParserGenerator_arithmetic_syn(): import math lexer = LexerGenerator( token_names=['(', ')', '+', '-', '*', '**', '/', 'const', 'func'], token_regex_table=[ '\\(', '\\)', '\\+', '\\-', '\\*', '\\*\\*', '/', '(0|[1-9][0-9]*)(\\.[0-9]+)?', '([a-z]|[A-Z])+'] ) # left associative (except ** that is right associative), with operator precedence g = Grammar( Production('Expr', 'Sum'), Production('Sum', 'Sum', '+', 'Prod'), Production('Sum', 'Sum', '-', 'Prod'), Production('Sum', 'Prod'), Production('Prod', 'Prod', '*', 'Pow'), Production('Prod', 'Prod', '/', 'Pow'), Production('Prod', 'Pow'), Production('Pow', 'Term', '**', 'Pow'), Production('Pow', 'Term'), Production('Term', '(', 'Sum', ')'), Production('Term', 'func', '(', 'Sum', ')'), Production('Term', 'const') ) attr_g = AttributeGrammar( g, syn_attrs={'res', 'name'}, prod_attr_rules=[ {'res': lambda res: res(1)}, {'res': lambda res: res(1) + res(3)}, {'res': lambda res: res(1) - res(3)}, {'res': lambda res: res(1)}, {'res': lambda res: res(1) * res(3)}, {'res': lambda res: res(1) / res(3)}, {'res': lambda res: res(1)}, {'res': lambda res: res(1) ** res(3)}, {'res': lambda res: res(1)}, {'res': lambda res: res(2)}, {'res': lambda res, name: getattr(math, name(1))(res(3))}, {'res': lambda res: res(1)}, ], terminal_attr_rules={ 'const': {'res': lambda lit: float(lit)}, 'func': {'name': lambda lit: lit} } ) parser = ParserGenerator(g) def evaluate(word_): print('----') print('input word:', word_) tokens, tokens_pos = lexer.tokenize(word_) print('tokens:', tokens, 'with decomposition', tokens_pos) analysis, result = parser.parse_syn_attr_analysis(attr_g, word_, tokens, tokens_pos) print('result:', result['res']) # import common operator names for python eval() sin, cos, tan, exp, sqrt = math.sin, math.cos, math.tan, math.exp, math.sqrt # noqa assert_equal(result['res'], eval(word_)) for word in [ '1*2+3', '5-3', '(1+2)*3', '1+2+3', '1+2-3', '1-2+3', '3-2-1', '1*2+3*4', '4/2-1', 'sin(3.1415)', '2**2**3', '(2**2)**3', '(3**2+4**2)**0.5']: evaluate(word)