def test_parseinfo_directive(self): grammar = ''' @@parseinfo @@parseinfo :: True test = value:"test" $; ''' model = tatsu.compile(grammar, "test") ast = model.parse("test") self.assertIsNotNone(ast.parseinfo) code = codegen(model) self.assertTrue('parseinfo=True' in code) compile(code, 'test.py', EXEC) grammar = ''' @@parseinfo :: False test = value:"test" $; ''' model = tatsu.compile(grammar, "test") ast = model.parse("test") self.assertIsNone(ast.parseinfo) code = codegen(model) self.assertTrue('parseinfo=False' in code) compile(code, 'test.py', EXEC)
def test_walk_node_ast(): GRAMMAR = r''' @@grammar::TLA # # comment # start = expression $; expression = temporal_expression | nontemporal_top_expression ; nontemporal_top_expression(SampleExpression) = expr:nontemporal_expression ; temporal_expression = temporal_atom; temporal_atom(TemporalSeq) = ['Seq'] '(' @:temporal_arg_list ')'; temporal_arg_list = "," .{@+:expression}+; nontemporal_expression = number ; # tokens number::int = /\d+/; ''' parser = tatsu.compile(GRAMMAR, asmodel=True) model = parser.parse('Seq(1,1)') assert model.ast is not None seen = defaultdict(int) class PW(DepthFirstWalker): def walk_Node(self, node, *args, **kwargs): t = type(node).__name__ print(f'node {t}') seen[t] += 1 print(json.dumps(asjson(model), indent=2)) PW().walk(model) assert seen == {'SampleExpression': 2, 'TemporalSeq': 1}
def test_whitespace_no_newlines(self): grammar = """ @@whitespace :: /[\t ]+/ # this is just a token with any character but space and newline # it should finish before it capture space or newline character token = /[^ \n]+/; # expect whitespace to capture spaces between tokens, but newline # should be captured afterwards token2 = {token}* /\n/; # document is just list of this strings of tokens document = {@+:token2}* $; """ text = trim("""\ a b c d e f """) expected = [ (["a", "b"], "\n"), (["c", "d"], "\n"), (["e", "f"], "\n"), ] model = tatsu.compile(grammar, "document") ast = model.parse(text, start='document') self.assertEqual(expected, ast)
def translate(text=None, filename=None, name=None, encoding='utf-8', trace=False): if text is None and filename is None: raise ValueError('either `text` or `filename` must be provided') if text is None: name = name or path.splitext(path.basename(filename))[0].capitalize() with codecs.open(filename, encoding=encoding) as f: text = f.read() name = name or 'Unknown' semantics = ANTLRSemantics(name) grammar = compile(antlr_grammar()) model = grammar.parse( text, name=name, filename=filename, semantics=semantics, trace=trace, colorize=True, ) print(model)
def main_formula(self, str1): ls = { '...': '\ldot', '\colon:': ':', '\cdot': '*', '\to': '\\to', '\v': "\\v", '\a': '\\a', '\f': '\\f', '\n': '\\n', '\b': '\\b', '\\tfrac': '\\frac', '\ \\': "\\", '\geqslant': '\geq', '\leqslant': '\leq', 'arrow': '\\rightarrow' } grammar = open(self.grammar_path).read() parser = tatsu.compile(grammar, asmodel=True) for x in ls: if x in str1: str1 = str1.replace(x, ls[x]) try: if not '#' in str1: a = parser.parse(str1, semantics=CalcSemantics()) return a except: return []
def main(self, str1): ls = { ' >': '>', '\\,': ' ', ' <': '<', ' =': '=', ' dx': 'dx', '\\leqslant': '\\leq', '\\\\neq': '\\neq', '\\,': "", ' \\, d': '*d' } for x in ls: if x in str1: str1 = str1.replace(x, ls[x]) A = formula_simplifier(str1) str1 = A.main() grammar = open(self.grammar_path).read() parser = tatsu.compile(grammar, asmodel=True) a = parser.parse(str1, semantics=CalcSemantics()) a = str(a) a = a.replace("(", "[") a = a.replace(")", "]") a = eval(a) return a
def main(): import pprint import json from tatsu import parse, compile, to_python_model from tatsu.util import asjson import tatsu input = """SELECT * FROM (SELECT ZSID FROM (SELECT B.* FROM (SELECT A.ID FROM DUNCE AS A) B ) C);""" #input = "SELECT * FROM MARA;" # Load the grammar in from a reference file. with open(GRAMMAR_DIR + "HANA_SQL_Grammar.bnf") as f: grammar = f.read() model = compile(grammar, verbose=True) munged_input = input.replace(',', ' , ') ast = model.parse(munged_input) #print(ast) result = str(json.dumps(asjson(ast), sort_keys=True, indent=2)) print(result)
def parser(code: str, **kwargs): """ Parses Datalog code into an Abstract Syntax Tree (AST) Parameters ---------- code : str code written in Datalog, as described by it's EBNF syntax **kwargs completed and passed to the tatsu parser Returns ------- AST Abstract Syntax Tree resulting from code parsing """ kwargs["semantics"] = kwargs.get("semantics", TatsuASTConverter()) kwargs["parseinfo"] = True kwargs["trace"] = kwargs.get("trace", False) kwargs["colorize"] = True parser_tatsu = tatsu.compile(grammar_EBNF) ast = parser_tatsu.parse(code, **kwargs) return ast
def get_parser(self, path_to_ebnf_file): """ creates a tatsu parser out of an ebnf_file from source :param path_to_ebnf_file: path to source :return: tatsu parser """ return tatsu.compile(get_file_content(path_to_ebnf_file))
def parse_with_basic_semantics(expr): grammar = open('calc.ebnf').read() parser = tatsu.compile(grammar) ast = parser.parse(expr, semantics=CalcBasicSemantics()) print('# BASIC SEMANTICS RESULT') pprint.pprint(ast, width=20, indent=4)
def parse(expr): grammar = open('calc.ebnf').read() parser = tatsu.compile(grammar) ast = parser.parse(expr) print('# BASIC RESULT') pprint.pprint(ast, width=20, indent=4)
def init(grammar_file='../twolcsyntax.ebnf'): """Initializes the module and compiles and returns a tatsu parser grammar_file -- the name of the file containing the EBNF grammar for rules """ grammar = open(grammar_file).read() parser = tatsu.compile(grammar) return parser
def test_simple_grammar(): g = generate_tatsu_grammar( gll_grammar.Grammar(rules={"Foo": gll_grammar.StringLiteral("foo")})) assert str(g) == str(compile("Foo = 'foo';")) assert g.parse("foo") == "foo" with pytest.raises(exceptions.FailedToken): g.parse("bar")
def simple_parse(): grammar = open('umsg.ebnf').read() code = open('example.umsg').read() parser = tatsu.compile(grammar, asmodel=True) ast = parser.parse(code) walker = UmsgNodeWalker() print(dumps(walker.walk(ast), indent=2))
def register_model(self, model, *, extra_fields=[]): grammar = tatsu.compile(QUERY_GRAMMAR) for field in [column.name for column in model.__table__.c] + extra_fields: grammar.rules[1].exp.options.append(tatsu.grammars.Token(field)) self.models[model.__tablename__] = model self.query_grammars[model.__tablename__] = grammar
class Grammar(): def __init__(self, semantics=Semantics()): self.semantics = semantics def set_it(self, v): self.semantics.set_var("it", v) grammar = r''' @@grammar::EVAL start = expression $ ; expression = varname:varname op:'=' val:expression | varname:varname op:':=:' val:expression | left:expression op:'+' right:term | left:expression op:'-' right:term | term ; term = left:term op:'*' right:factor | left:term op:'Ă—' right:factor | left:term op:'//' right:factor | left:term op:'/' right:factor | left:term op:'Ă·' right:factor | left:term op:'%' right:factor | factor ; factor = left:base '**' right:factor | left:base '^' right:factor | base; base = number | funcall | varval | compound_expression; compound_expression = '(' @:expression ')' ; funcall = varval '(' ','.{expression}+ ')' ; number = fp | integer | pi; integer = signed_digits ; signed_digits = sign digits; sign = '-' | '+' | (); pi = 'Ď€' | 'pi'; fp = [ integer_part:signed_digits ] '.' [ fraction_part:digits ] [ e_part:e_notation ]; e_notation = /[eE]/ @:signed_digits ; # ('E' | 'e') does not work, because of nameguard digits = /\d+/; varval = varname ; varname = /[A-Za-z]\w*/ ; ''' parser = tatsu.compile(grammar, name="eval") def parse(self, expr): return self.parser.parse(expr, semantics=self.semantics)
def parse_with_basic_semantics(): grammar = open('grammars/calc_annotated.ebnf').read() parser = tatsu.compile(grammar) ast = parser.parse('3 + 5 * ( 10 - 20 )', semantics=CalcBasicSemantics()) print('# BASIC SEMANTICS RESULT') pprint(ast, width=20, indent=4) print()
def annotated_parse(): grammar = open('grammars/calc_annotated.ebnf').read() parser = tatsu.compile(grammar) ast = parser.parse('3 + 5 * ( 10 - 20 )') print('# ANNOTATED AST') pprint(ast, width=20, indent=4) print()
def parse_and_walk_model(): grammar = open('grammars/calc_model.ebnf').read() parser = tatsu.compile(grammar, asmodel=True) model = parser.parse('3 + 5 * ( 10 - 20 )') print('# WALKER RESULT') print(CalcWalker().walk(model)) print()
def parse_to_model(): grammar = open('grammars/calc_model.ebnf').read() parser = tatsu.compile(grammar, asmodel=True) model = parser.parse('3 + 5 * ( 10 - 20 )') print('# MODEL TYPE IS:', type(model).__name__) print(json.dumps(model.asjson(), indent=4)) print()
def query_parser(): from .tagging_subtree_via_string import grammar_path_ as func _grammar_path = func('the-query-grammar.ebnf') with open(_grammar_path) as fh: ebnf_grammar_big_string = fh.read() import tatsu return tatsu.compile(ebnf_grammar_big_string, asmodel=True)
def parse_with_basic_semantics(): grammar = open('./calc_cut.ebnf').read() parser = tatsu.compile(grammar) ast = parser.parse( ' 3 + 5 * (10 - 20)', semantics = CalcBasicSemantics() ) print('# basic semantic result') pprint(ast, width=20, indent=4)
def parse_factored(): grammar = open('grammars/calc_factored.ebnf').read() parser = tatsu.compile(grammar) ast = parser.parse('3 + 5 * ( 10 - 20 )', semantics=CalcSemantics()) print('# FACTORED SEMANTICS RESULT') pprint(ast, width=20, indent=4) print()
def setUp(self): # Load the grammar in from a reference file. with open(GRAMMAR_DIR + "HANA_SQL_Grammar.bnf") as f: grammar = f.read() self.debug = False self.model = compile(grammar)
def include(self, ast): return "" # the 'semantics' of the include statement are to parse, then hoist the program referred to into this program. print "CWD " + str(os.getcwd()) grammar = open('dedalus.tatsu').read() prog = open(ast.file).read() parser = tatsu.compile(grammar) ast = parser.parse(prog.read(), trace=False, colorize=True, semantics=DedalusSemantics()) return ast
def _parse(text, start=None, preprocessor=None, implied_any=False, implied_base=False, pipes=True, subqueries=True): """Function for parsing EQL with arbitrary entry points. :param str text: EQL source text to parse :param str start: Entry point for the EQL grammar :param bool implied_any: Allow for event queries to match on any event type when a type is not specified. If enabled, the query ``process_name == "cmd.exe"`` becomes ``any where process_name == "cmd.exe"`` :param bool implied_base: Allow for queries to be built with only pipes. Base query becomes 'any where true' :param bool pipes: Toggle support for pipes :param bool subqueries: Toggle support for subqueries, which are required by ``sequence``, ``join``, ``descendant of``, ``child of`` and ``event of`` :param PreProcessor preprocessor: Optional preprocessor to expand definitions and constants :rtype: EqlNode """ global GRAMMAR, tatsu_parser if tatsu_parser is None: GRAMMAR = get_etc_file('eql.ebnf') tatsu_parser = tatsu.compile( GRAMMAR, parseinfo=True, semantics=tatsu.semantics.ModelBuilderSemantics()) if not text.strip(): raise ParseError("No text specified", 0, 0, text) # Convert everything to unicode text = to_unicode(text) walker = EqlWalker(implied_any=implied_any, implied_base=implied_base, preprocessor=preprocessor, pipes=pipes, subqueries=subqueries) try: model = tatsu_parser.parse(text, rule_name=start, start=start, parseinfo=True) eql_node = walker.walk(model) return eql_node except tatsu.exceptions.FailedParse as e: info = e.buf.line_info(e.pos) message = e.message line = info.line col = info.col source = info.text.rstrip() if not source: source = text.strip().splitlines()[-1].strip() col = max(len(source) - 1, 0) raise ParseError(message, line, col, source)
def __init__(self): Fact.init_fact_tree() try: self.path = str(Path(sys.argv[0]).parent) + '/' grammar = self.read_file('../syntax.peg') self.parser = tatsu.compile(grammar) except Exception: print("Don't touch syntax file!") sys.exit(1)
def test_eol_comments_re_directive(self): grammar = ''' @@eol_comments :: /#.*?$/ test = "test" $; ''' model = tatsu.compile(grammar, "test") code = codegen(model) compile(code, 'test.py', EXEC)
def test_whitespace_directive(self): grammar = ''' @@whitespace :: /[\t ]+/ test = "test" $; ''' model = tatsu.compile(grammar, "test") code = codegen(model) compile('test.py', code, EXEC)
def pgl_parser(trace=False): pgl_grammar = grammars.load_plg_grammar() return tatsu.compile( pgl_grammar, semantics=PGLSemantics(), parseinfo=True, colorize=True, trace=trace, )
def test_startrule_issue62(self): grammar = ''' @@grammar::TEST file_input = expr $ ; expr = number '+' number ; number = /[0-9]/ ; ''' model = tatsu.compile(grammar=grammar) model.parse('4 + 5')
import tatsu from tatsu.ast import AST from tatsu.walkers import NodeWalker from pprint import pprint import unittest from dedalus_parser import DedalusParser import sys import os grammar = open('dedalus_asmodel.tatsu').read() parser = tatsu.compile(grammar, asmodel = True) dp = DedalusParser() prog = dp.expand_file(sys.argv[1]) print "POGO " + prog w = dp.parse(prog) pfx = os.path.basename(sys.argv[1]) w.to_dot().render(pfx + "_dataflow")
def __init__(self): self.grammar = open('dedalus_asmodel.tatsu').read() self.parser = tatsu.compile(self.grammar, asmodel = True)
interval = 'week'|'month'|'year'|'day'|'hour'|'minute'|'second'|'ms'|'millisecond'; intervals = 'weeks'|'months'|'years'|'days'|'hours'|'minutes'|'seconds'|'ms'|'milliseconds'; weekday = 'mon'|'monday'|'tue'|'tuesday'|'wed'|'wednesday'|'thu'|'thursday'|'fri'|'friday'|'sat'|'saturday'|'sun'|'sunday'; timezone = /[A-z0-9]+\/[A-z0-9]+/; #actual constraints timeofdayconstraint = ['at'] timeofdayconstraint:times; aftertimeofdayconstraint = 'after' aftertimeofdayconstraint:time; beforetimeofdayconstraint = 'before' beforetimeofdayconstraint:time; beforetimeconstraint = ('before'|'until') before:datetimewithyear; betweentimesofdayconstraint = ('between' @+:time 'and' @+:time)| ('from' @+:time 'to' @+:time); nintervalconstraint = ('every' integer intervals) | ('every' ordinal interval) | ('every' enumber intervals); intervalconstraint = ('every' interval); dateconstraint = (["on"] dates) | ('every year on') date; datewithyearconstraint = (["on"] datewithyear); yeardayconstraint = "on the " ordinal "day of the year"; monthdayconstraint = "on the" @+:ordinal {[','] @+:ordinal} [[',']'and' @+:ordinal] [["day"] "of the month"]; weekdayconstraint = ['every'|'on'] @+:weekday {[','] @+:weekday} [[',']'and' @+:weekday]; nthweekdayconstraint = ('the'|'on the'|'every') @+:ordinal @+:weekday 'of the month'; monthconstraint = [('during'|'in'|'in the month of'|'in the months of')] @+:month{[','] @+:month} [[',']'and' @+:month]; #Directives startingat = ("starting" ['at'|'on'] @:datetimewithyear) |"starting on" weekday:weekday; #constants predefinedtime = "noon"| "midnight"; """ parser = tatsu.compile(grammar)