def main(s, out_fn): graphviz_setup() project_root = os.path.normpath(os.path.join(os.path.dirname(__file__), "../../")) fld = os.path.normpath(project_root + "./mappyfile") gf = os.path.join(fld, "mapfile.lalr.g") grammar_text = open(gf).read() g = Lark(grammar_text, parser="lalr", lexer="contextual") t = g.parse(s) print(t) pydot__tree_to_png(t, os.path.join(project_root, "docs/images", out_fn)) print(t.pretty())
def bacula_parse(daemon="bareos-dir", hn=False): # Parse the preprocessed config with lark-parser parser = Lark(r""" ?value: resources | resource | directive | string string : ESCAPED_STRING resource : (string "{" "\n" (directive|resource)* "}" "\n") resources : resource* directive : string " " "=" " " string "\n" %import common.ESCAPED_STRING %import common.WORD %import common.WS """, start='value') config = preprocess_config(daemon, hn) if not config: return None tree = parser.parse(config) trans = MyTransformer().transform(tree) # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(trans) return trans
zip_ref.extractall(tmpdir) # Replace project.json with open(os.path.join(tmpdir, "project.json"), "w") as f: f.write(json.dumps(self.data)) zip_ref = zipfile.ZipFile(filename, "w") for content_file in os.listdir(tmpdir): zip_ref.write(os.path.join(tmpdir, content_file), arcname=content_file) zip_ref.close() shutil.rmtree(tmpdir) with open("scratch.lark") as f: ScratchParser = Lark(f.read()) def parse(text): return parse_tree(ScratchParser.parse(text)) def parse_tree(t): if t.data == "start": return list(map(parse_tree, t.children)) if t.data == "function_definition": func = str(t.children[0]) opcode = "none" data = dict() if func == "when_flag_clicked":
from lark import Lark short_parser = Lark(r""" value: SIGNED_NUMBER | and | or or : "[" [value] ("|" value)* "]" and : [value] ("&" value) %import common.SIGNED_NUMBER %import common.WS %ignore WS """, start='value') def normalize_raw_text(raw_text): operators = dict() logical_form = '' round_brackets = 0 cur_operator = '' for c in raw_text: if c == '(': round_brackets += 1 if round_brackets == 0: logical_form += c else: cur_operator += c if c == ')': round_brackets -= 1 if round_brackets == 0:
def get_ast_from_idl_string(idl_string): global _parser if _parser is None: _parser = Lark(grammar, start='specification') return _parser.parse(idl_string)
| "IF" aff "THEN" aff -> if_func | aff "OR" aff -> or_func | aff "AND" aff -> and_func | aff "IFF" aff -> iff_func | p -> prop_func p: word+ -> prop word: /[a-zA-Z][a-z]+/ %import common.WS %ignore /\./ %ignore WS """ parser = Lark(grammar) class MyTransformer(Transformer): def alias_start(self, value): return AndNode(value) def and_func(self, value): return AndNode(value) def if_func(self, value): return ImplicationNode(value[0], value[1]) def iff_func(self, value): return IffNode(value[0], value[1])
parser = Lark(r""" //start symbol query: create | drop | delete | update | select | insert // insert stmt insert: "INSERT INTO "i NAME ["(" name_expr ")"] "VALUES"i "(" insert_expr ")" -> insert insert_expr: literal[comma insert_expr] -> insert_expression name_expr: NAME [ comma name_expr] -> column_expression // create stmt create: "create table"i NAME "("expr ")"-> create expr: primary_expr -> only_primary | primary_expr comma non_primary_expr -> primary_beg | non_primary_expr comma primary_expr comma non_primary_expr -> primary_mid | non_primary_expr comma primary_expr -> primary_end primary_expr: NAME dtype "PRIMARY KEY"i -> pri non_primary_expr: NAME dtype [constraints] [comma non_primary_expr] -> non_primary dtype: "int"i -> int | "char"i | "date"i constraints: "not null"i | "unique"i -> unique | "foreign key"i // drop stmt drop: "drop table"i NAME -> drop_clause // update stmt update: "UPDATE"i NAME "SET"i update_ex ["WHERE"i where_ex] update_ex: NAME "=" literal [comma update_ex ] // delete stmt delete: "DELETE FROM"i NAME ["WHERE"i where_ex] -> delete_stmt // select stmt select: "SELECT"i [select_mode] select_expr "FROM"i from_expr [join NAME "ON"i boolean_expr ] ["WHERE"i where_ex] ["GROUP BY"i grp_expr ] ["HAVING"i boolean_expr] ["ORDER BY"i ord_expr] ["LIMIT"i limit_expr] limit_expr: [NUMBER ["OFFSET"i NUMBER] ] | "ALL"i select_expr: "*" | NAME".*" | NAME"."NAME | NAME | select_expr comma select_expr grp_expr: NAME"."NAME | NAME | grp_expr comma grp_expr ord_expr: NAME"."NAME ["ASC"i|"DESC"i] | NAME ["ASC"i|"DESC"i] | ord_expr comma ord_expr select_mode: "DISTINCT"i | "ALL"i from_expr: NAME[comma from_expr] -> from_expression | "(" select ")" ["as"i NAME ] -> nested_query join: "LEFT JOIN"i |"INNER JOIN"i |"RIGHT JOIN"i // common stmt ( where, operator, expression, literal) where_ex: boolean_expr boolean_expr: paren_expr | boolean_expr "OR"i boolean_expr -> or_oper | boolean_expr "AND"i boolean_expr -> and_oper paren_expr: operator | "(" boolean_expr "AND"i operator ")" -> and_oper | "(" boolean_expr "OR"i operator ")" -> or_oper operator: equal| notequal| greater| greater_equal| less| less_equal| between equal: expression "=" expression -> equal notequal: expression "<>" expression ->not_equal greater: expression ">" expression ->greater_than greater_equal: expression ">=" expression -> greater_than_equal less: expression "<" expression ->less_than less_equal: expression "<=" expression -> less_than_equal between: expression "BETWEEN"i expression "AND"i expression ->between expression: [NAME"."](NAME|"*") ->attribute_name | literal literal : "true" ->true | "false" -> false | NUMBER -> number | NAME -> string | "\"" /[a-zA-Z0-9_'-' ]+/"\"" -> single_quoted_string | "'" /[a-zA-Z0-9_'-' ]+/"'" -> double_quoted_string comma: "," -> comma %import common.CNAME -> NAME %import common.NUMBER -> NUMBER %import common.WS_INLINE %ignore WS_INLINE %import common.WS %ignore WS """, start='query')
if argv[1][0] != '-': opts[argv[0]] = argv[1] else: opts[argv[0]] = True elif len(argv) == 1: opts[argv[0]] = True # Reduce the argument list by copying it starting from index 1. argv = argv[1:] return opts if __name__ == '__main__': myargs = getopts(argv) dir_path = os.path.dirname(os.path.realpath(__file__)) grammar_file_path = os.path.join(dir_path, "grammar", "grammar.ebnf") f = open(grammar_file_path) parser = Lark(f.read()) if '-i' in myargs: f = open(myargs['-i']) else: exit(1) if '-v' in myargs: logging.basicConfig(level=logging.INFO) print(parser.parse(f.read()))
array = list pair = tuple object = dict number = inline_args(float) null = lambda self, _: None true = lambda self, _: True false = lambda self, _: False # json_parser = Lark(json_grammar, parser='earley', lexer='standard') # def parse(x): # return TreeToJson().transform(json_parser.parse(x)) json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) parse = json_parser.parse def test(): test_json = ''' { "empty_object" : {}, "empty_array" : [], "booleans" : { "YES" : true, "NO" : false }, "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], "nothing" : null } '''
math_parser = Lark(r""" filtrations: filtration+ filtration: NAME filtration_tag* "=" (expr | cases) cases: "cases:" case+ case: "dim" INTEGER ":" (expr | error) | ELSE ":" (expr | error) ELSE: "else" expr: term ((ADD | SUBTRACT) term)* term: factor ((MULTIPLY | DIVIDE) factor)* factor: (base (POWER exponent)?) | (EXP "(" exponent ")") base: "(" expr ")" | SIGNED_NUMBER | identifier | function | VAR | array_element exponent: "(" expr ")" | SIGNED_NUMBER | identifier | function | VAR | array_element error: "error" ESCAPED_STRING ADD : "+" SUBTRACT : "-" MULTIPLY : "*" DIVIDE : "/" POWER : "^" EXP : "exp" identifier: DIMENSION | arrays arrays: FACE_WEIGHTS | CELL_VERTICES global_arrays: VERTEX_WEIGHTS | VERTEX_OUT_DEGREES | VERTEX_IN_DEGREES DIMENSION : "dimension" FACE_WEIGHTS : "faceWeights" CELL_VERTICES : "cellVertices" VERTEX_OUT_DEGREES: "vertexOutDegrees" VERTEX_IN_DEGREES: "vertexInDegrees" VERTEX_WEIGHTS: "vertexWeights" filtration_tag: OVERRIDE_VERTICES | OVERRIDE_EDGES OVERRIDE_VERTICES: "overrideVertices" OVERRIDE_EDGES: "overrideEdges" function: max | min | sum | product | map | reduce array_functions: map array_like: arrays | array_functions max: "max(" array_like ("," range)? ")" min: "min(" array_like ("," range)? ")" sum: "sum(" array_like ("," range)? ("," lambda)? ")" product: "product(" array_like ("," range)? ("," lambda)? ")" reduce: ("combine" | "reduce") "(" array_like ("," range)? ("," lambda2) ("," expr) ")" range: (from "," to) map: ("map" | "modifyEach") "(" array_like ("," range)? ("," lambda) ")" lambda: VAR "->" expr lambda2: VAR VAR "->" expr from: expr to: expr array_element: (array_like | global_arrays) "[" expr "]" NAME: /[a-zA-Z_0-9]+/ VAR: /[a-zA-Z]+/ INTEGER : /[0-9]+/ %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS """, start='filtrations')
script_parser = Lark(r""" block: "{" (_block SEMICOLON*)? "}" _block: stmt SEMICOLON _block | stmt_no_semi SEMICOLON? _block | stmt SEMICOLON | stmt_no_semi ?stmt: call | var_decl | "goto" label -> label_goto | "return" -> return_stmt | "break" -> break_stmt | "break match" -> break_match_stmt | "break loop" -> break_loop_stmt | "sleep" expr -> sleep_stmt | "sleep" expr "secs" -> sleep_secs_stmt | "spawn" expr -> spawn_stmt | "await" expr -> await_stmt | "jump" expr -> jump_stmt | lhs "=" "spawn" expr -> spawn_set_stmt | lhs "=" "does_script_exist" expr -> does_script_exist | lhs set_op expr -> set_stmt | lhs set_op "(int)" expr -> set_int_stmt | lhs set_op "(float)" expr -> set_float_stmt | lhs set_op "(const)" expr -> set_const_stmt | bind_stmt | bind_set_stmt | "bind_padlock" expr expr collider_id expr -> bind_padlock_stmt | "unbind" -> unbind_stmt | "priority" expr -> set_priority | "timescale" expr -> set_timescale | "group" expr -> set_group | suspend_stmt | resume_stmt | kill_stmt | "buf_use" expr -> buf_use | "buf_read" expr+ -> buf_read | "buf_peek" expr expr -> buf_peek | "buf_usef" expr -> buf_usef | "buf_readf" expr+ -> buf_readf | "buf_peekf" expr expr -> buf_peekf | "arr_use" expr -> use_array | "flags_use" expr -> use_flags | "arr_new" expr expr -> new_array ?stmt_no_semi: label ":" -> label_decl | if_stmt | match_stmt | loop_stmt | ["await"] block -> block_stmt | "spawn" block -> spawn_block_stmt | "parallel" block -> parallel_block_stmt call: (c_identifier | HEX_INT) "(" [expr ("," expr)* [","]] ")" if_stmt: "if" "(" expr cond_op expr ")" block ["else" block] ?cond_op: "==" -> cond_op_eq | "!=" -> cond_op_ne | ">" -> cond_op_gt | "<" -> cond_op_lt | ">=" -> cond_op_ge | "<=" -> cond_op_le | "&" -> cond_op_flag | "!&" -> cond_op_not_flag match_stmt: "match" expr "{" (match_cases SEMICOLON*)? "}" match_const_stmt: "matchc" expr "{" (match_cases SEMICOLON*)? "}" match_cases: match_case SEMICOLON* match_cases | match_case ?match_case: "else" block -> case_else | cond_op expr ["," multi_case] block -> case_op | expr "..." expr ["," multi_case] block -> case_range | multi_case block -> case_multi multi_case: expr ("," expr)* suspend_stmt: "suspend" control_type expr ("," control_type expr)* [","] resume_stmt: "resume" control_type expr ("," control_type expr)* [","] kill_stmt: "kill" control_type expr ("," control_type expr)* [","] ?control_type: "group" -> control_type_group | "others" -> control_type_others | ["script"] -> control_type_script bind_stmt: "bind" expr expr collider_id bind_set_stmt: lhs "=" "bind" expr expr collider_id loop_stmt: "loop" [expr] block var_decl: ("int"|"float") variable ?collider_id: "entity" "(" expr ")" -> entity_id | expr ?expr: c_const_expr | ESCAPED_STRING | SIGNED_INT | SIGNED_DECIMAL | HEX_INT | variable | c_identifier ?lhs: c_const_expr | variable ?set_op: "=" -> set_op_eq | "+=" -> set_op_add | "-=" -> set_op_sub | "*=" -> set_op_mul | "/=" -> set_op_div | "%=" -> set_op_mod | "&=" -> set_op_and | "|=" -> set_op_or variable: "$" CNAME c_identifier: CNAME c_const_expr: "(" c_const_expr_internal ")" c_const_expr_internal: "(" (c_const_expr_internal | NOT_PARENS)+ ")" NOT_PARENS: /[^()]+/ SEMICOLON: ";" label: /[a-zA-Z0-9_]+/ %import common.CNAME %import common.SIGNED_INT %import common.DECIMAL %import common.HEXDIGIT %import common.ESCAPED_STRING SIGNED_DECIMAL: ["+"|"-"] DECIMAL HEX_INT: ["+"|"-"] "0x" HEXDIGIT+ LINE_COMMENT: "//" /[^\n]*/ NEWLINE %ignore LINE_COMMENT %import common.WS_INLINE %import common.NEWLINE %ignore WS_INLINE %ignore NEWLINE """, start="block", propagate_positions=True) #, parser="lalr", cache=True)
@v_args(inline=True) def descriptor(self, a): return a.value def link_list(self, a): return a def section(self, a): return {a[0]: a[1]} def start(self, a): result = {} for subdict in a: result.update(subdict) return result if __name__ == '__main__': json_parser = Lark.open("grammar.lark") with open("file.txt") as fo: tree = json_parser.parse(fo.read()) print(tree.pretty()) transformer = MyTransformer() transformed_tree = transformer.transform(tree) print(transformed_tree)
self.pos_tags = [] self.ner_tags = [] self.map = {} self.graph = Graph() from itertools import chain import json from lark import Lark, Transformer parser = Lark(''' start: term term: "(" varname "/" symbol (":" relation (term | value))* ")" varname: /[a-zA-Z0-9@'_+-]+/ symbol: /[a-zA-Z0-9@'_+-]+/ relation: /[a-zA-Z0-9@'_+-]+/ value: /(?:#[^#]+#|[a-zA-Z0-9@'_&+.-][a-zA-Z0-9@+ &:_'.-]*)/ %import common.WS %ignore WS ''', parser='lalr') class AmrTransformer(Transformer): relation_instance_count = 0 def __init__(self, conversation, turn): self.graph = Graph() Transformer.__init__(self) self.conversation = conversation
from pathlib import Path from lark import Lark, InlineTransformer from sidekick import pipeline from ox.backend.python.nodes_expr import Atom, Name, BinOp, GetAttr from ox.backend.python.nodes_stmt import Symbol from ox.algorithms import reduce_op_chain as op_chain path = Path(__file__).parent.parent / "grammars" / "python-template.lark" grammar = Lark(open(path), parser="lalr") fn = staticmethod cte = lambda x: lambda *args: x atom = lambda cls, *opts: lambda *args: Atom(cls(args[-1], *opts)) class PythonT(InlineTransformer): int = atom(int) hex = fn(lambda x: int(x[2:], 16)) oct = fn(lambda x: int(x[2:], 8)) bin = fn(lambda x: int(x[2:], 2)) float = atom(float) complex = atom(complex) string = atom(eval) true = cte(Atom(True)) false = cte(Atom(False)) none = cte(Atom(None)) ellipsis = cte(Atom(...)) name = fn(pipeline(Symbol, Name)) opchain = fn(lambda *xs: xs[0] if len(xs) == 1 else op_chain(xs, expr=BinOp))
from lark import Lark, InlineTransformer, Token # **Q2)** Crie um programa baseado no último exemplo da questão anterior que leia listas no estilo Javascript e retorne o valor processado, trocando `undefined` por None em Python. Assuma que os elementos podem ser outras listas ou números inteiros. Deste modo, o código de entrada `"[1,,2,[,,],]"` seria convertido em `[1, None, 2, [None, None]]`. q2 = r""" start : list list : "[" (item | ",")* "]" item : ([1-9] | [a-z])+ """ grammar = Lark(q2) # for example in examples: # result = grammar.parse(example) # print(result.pretty()) class tr(InlineTransformer): def array(self, *arg): return list(arg) def number(self, arg): return int(arg) def empty(self): return None def do(): tra = tr() examples = ["[1,,2,[,,],]"]
return children[0][1:-1] def CNAME(self, children): return children[0][1:-1] test_program = """ { (On, 10), (Off, 20), loop 3: { (On, 1), (Off, 2) } } """ def loadBSL(program): """Parses a Burnlight Scheduling Language string into a Program""" parser = Lark(schedule_grammar) return ProgramTransformer().transform(parser.parse(program)) if __name__ == '__main__': parser = Lark(schedule_grammar) tree = parser.parse(test_program) print(tree.pretty()) program = ProgramTransformer().transform(tree) print(program)
from lark import Lark l = Lark(''' start: bar+ bar: /a|b|c*/ "foo" ''') l.parse('afoobfooccfoo') l2 = Lark(''' start: "bar"+ ''') l2.parse('barbarbar')
from persistence.botdb import BotDB from cogs import userconfig from discordclasses.confirm import Confirm from discordclasses.deletable import DeletableListView DB_NAME = 'macros' SUPPRESS_SAVE_CONFIG_KEY = 'Dice.SuppressSaveSuggestionUntil' parser = Lark(r""" %import common.WS %ignore WS %import common.DIGIT POSINT : DIGIT DIGIT* sign : /[+-]/ rollset : expression ("," expression)* expression : die -> roll | POSINT -> mod | expression sign expression -> math die : [POSINT] _DSEPARATOR POSINT count : POSINT size : POSINT _DSEPARATOR : "d" """, start='rollset') class RollsetTransformer(Transformer): def rollset(self, list): results = OrderedDict() for item in list: key = item['name'] iterator = 0
class LarkAdapter(): def __init__(self, pcfg: PCFG): self.pcfg = pcfg self.savelark(LARKD / 'grammar.lark') log("Creating Lark Parser...") self.parser = Lark( self.larkstr(), start='sentence', ambiguity='explicit', parser='earley' ) self.disambig = Disambiguator(self.pcfg) def rules_larkstr(self, lhs: str, rules: OrderedDict) -> str: # log("lhs", lhs) is_preterminal = lhs in self.pcfg.preterminals return lhs + ': ' + \ ' | '.join( [' '.join( [f"\"{p}\"" for p in rule.rhs] if is_preterminal else rule.rhs ) for rule in rules] ) + \ '\n' def larkstr(self) -> str: larkstr = "" for lhs, rules in self.pcfg.rules.items(): larkstr += self.rules_larkstr(lhs, rules) larkstr += "%import common.WS\n" larkstr += "%ignore WS\n" return larkstr def savelark(self, file: Path): with open(file, 'w', encoding='utf8') as f: f.write(self.larkstr()) def test(self, tokens: List[str], true_tree: Tree, verbose: bool = False) \ -> Optional[Tuple[float, float, float]]: larktree = self.parse(tokens) if larktree is None: return None # if verbose: # log("Pre disambiguation:") # log(larktree.pretty()) self.disambig.disambiguate(larktree) if verbose: log("Post disambiguation:") log(larktree.pretty()) tree = Tree.fromlark(larktree) recall = self.calc_recall(tree, true_tree) precision = self.calc_precision(tree, true_tree) fscore = f1(recall, precision) if verbose: log(precision, recall, fscore) log() return precision, recall, fscore @staticmethod def ruleset(tree: Tree): ruleset: Set[Rule] = set() for node in tree.iterlevels(): ruleset.add(create_rule(node)) return ruleset @staticmethod def calc_precision(tree: Tree, true_tree: Tree) -> float: ruleset, true_ruleset = LarkAdapter.ruleset(tree), LarkAdapter.ruleset(true_tree) correct = len(true_ruleset.intersection(ruleset)) total = len(ruleset) return correct / total @staticmethod def calc_recall(tree: Tree, true_tree: Tree) -> float: ruleset, true_ruleset = LarkAdapter.ruleset(tree), LarkAdapter.ruleset(true_tree) correct = len(true_ruleset.intersection(ruleset)) total = len(true_ruleset) return correct / total def parse(self, tokens: List[str]) -> Optional[LarkTree]: try: return self.parser.parse(" ".join(tokens)) except UnexpectedCharacters: log("Lark does not support this sentence structure.") return None except ParseError: log("Lark does not support this sentence structure.") return None
def install_grammar(cls): grammar = Path(grammar_root / cls.grammar_filename).resolve() with open(str(grammar), 'r') as fh: cls.lark = Lark(fh, **GenericParser.lark_options) return cls
Ops, ParsedList, QasmProgram, QuantumRegister, TensorOp, Term, UnaryOperation, flatten, format_wires, unpack, ) with open(pathlib.Path(__file__).parent / "qasm.lark", "r") as _f: qasm_grammar = "".join(_f.readlines()) qasm_parser = Lark(qasm_grammar, start="mainprogram") class QASMToIRTransformer(Transformer): """Transformer for processing the Lark parse tree. Transformers visit each node of the tree, and run the appropriate method on it according to the node's data. All method names mirror the corresponding symbols from the grammar. """ # pylint:disable=no-self-use PI = lambda self, _: sympy.pi sin = lambda self, _: "sin" cos = lambda self, _: "cos" tan = lambda self, _: "tan"
parser = Lark(''' %import common.NUMBER %import common.ESCAPED_STRING %import common.CNAME %import common.NEWLINE %import common.WS %ignore WS COMMENT: "/*" /(.|\\n|\\r)+/ "*/" | "//" /(.)+/ NEWLINE %ignore COMMENT num: NUMBER -> literal str: ESCAPED_STRING -> literal ident: CNAME ADD: "+" SUB: "-" MUL: "*" DIV: "/" AND: "&&" OR: "||" BIT_AND: "&" BIT_OR: "|" GE: ">=" LE: "<=" NEQUALS: "!=" EQUALS: "==" GT: ">" LT: "<" call: ident "(" ( expr ( "," expr )* )? ")" ?group: num | str | ident | call | "(" expr ")" ?mult: group | mult ( MUL | DIV ) group -> bin_op ?add: mult | add ( ADD | SUB ) mult -> bin_op ?compare1: add | add ( GT | LT | GE | LE ) add -> bin_op ?compare2: compare1 | compare1 ( EQUALS | NEQUALS ) compare1 -> bin_op ?logical_and: compare2 | logical_and AND compare2 -> bin_op ?logical_or: logical_and | logical_or OR logical_and -> bin_op ?expr: logical_or ?var_decl_inner: ident | ident "=" expr -> assign vars_decl: ident var_decl_inner ( "," var_decl_inner )* ?simple_stmt: ident "=" expr -> assign | call ?for_stmt_list: vars_decl | ( simple_stmt ( "," simple_stmt )* )? -> stmt_list ?for_cond: expr | -> stmt_list ?for_body: stmt | ";" -> stmt_list ?stmt: vars_decl ";" | simple_stmt ";" | "if" "(" expr ")" stmt ("else" stmt)? -> if | "for" "(" for_stmt_list ";" for_cond ";" for_stmt_list ")" for_body -> for | "{" stmt_list "}" stmt_list: ( stmt ";"* )* ?prog: stmt_list ?start: prog ''', start='start') # , parser='lalr')
validate_name, ) class PythonIndenter(Indenter): NL_type = '_NEWLINE' OPEN_PAREN_types = ('LPAR', 'LSQB', 'LBRACE') CLOSE_PAREN_types = ('RPAR', 'RSQB', 'RBRACE') INDENT_type = '_INDENT' DEDENT_type = '_DEDENT' tab_len = 4 parser = Lark.open( 'python3.lark', parser='lalr', rel_to=__file__, postlex=PythonIndenter(), start='file_input', propagate_positions=True, ) def parse_python(source_code: str) -> Tree: return parser.parse(source_code + '\n') def get_node_type(node: LarkNode) -> str: """ Returns the node type (name of matching grammar rule) for the given lark node. """ if isinstance(node, Tree):
def __init__(self, grammar_file='config_grammar.lark', start='root') -> None: with open(res_path(grammar_file), 'r') as f: self._grammar = f.read() self._parser = Lark(self._grammar, start=start, parser='lalr', transformer=TreeToDict())
class FrutexParser(): def __init__(self): self.parser = Lark(r""" ?expr: comp_exp | value | if_exp | add_exp | or_exp ?if_exp: "if" "(" expr ")" expr ("elif" "(" expr ")" expr )* ["else" expr] ?comp_exp: expr _comp_op expr ?add_exp: term_exp (_add_op term_exp)* ?term_exp: factor (_mult_op factor)* ?factor: _factor_op factor | pow_exp ?pow_exp: value ["**" factor] ?or_exp: and_exp (_or_op and_exp)* ?and_exp: comp_exp (_and_op comp_exp)* ?value: "(" expr ")" | float | integer | NAME "(" [arguments] ")" -> funccall | NAME -> var | string string : ESCAPED_STRING integer: INT float: DECIMAL arguments: expr ("," expr)* !_comp_op: ">"|"<"|">="|"<="|"=="|"!=" !_mult_op: "*"|"/"|"//"|"%" !_factor_op: "+"|"-"|"not" !_add_op: "+"|"-" !_or_op: "or" !_and_op: "and" NAME: /[a-zA-Z_][\w:]*/ %import common.DECIMAL %import common.INT %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS """, start='expr') def parse(self, code): replaced = code.replace("\n ", '\n') return self.parser.parse(replaced) def eval(self, cell, attrib, config, cell_dict): expression = cell.expressions.get(attrib) if expression is None: expression = config.get_default(attrib) else: expression = expression.text parsed_expression = self.parse(expression) repr = tree_to_repr(parsed_expression, (cell, attrib)) return repr.eval(cell, attrib, config, cell_dict)
"asciiz": lambda val: AsciizDecl(val.val) } @v_args(inline=True) class DeclTransformer(Transformer): def create_decl(self, decl_type, val): if decl_type not in _decl_types: raise Exception(f"line {decl_type.line}: No such declaration type: .{decl_type}") return _decl_types[decl_type](val) class SegmentTransformer(Transformer): def text_segm(self, lst): return TextSegment(lst) def data_segm(self, lst): return DataSegment(lst) transformer = RegisterTransformer() * ConstTransformer() * DeclTransformer() * LabelTransformer() * InstrTransformer() * SegmentTransformer() grammar_path = path.dirname(path.abspath(__file__)) parser = Lark.open(f"{grammar_path}/mipsasm.lark", parser='lalr') def parse(text): if text[-1] != '\n': text = text + '\n' tree = parser.parse(text) tree = transformer.transform(tree) return tree.children
PN_CHARS_BASE: /[A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF]/ PN_CHARS_U: PN_CHARS_BASE | "_" PN_CHARS: PN_CHARS_U | /[\-0-9\u00B7\u0300-\u036F\u203F-\u2040]/ PN_PREFIX: PN_CHARS_BASE ((PN_CHARS | ".")* PN_CHARS)? PN_LOCAL: (PN_CHARS_U | ":" | /[0-9]/ | PLX) ((PN_CHARS | "." | ":" | PLX)* (PN_CHARS | ":" | PLX))? PLX: PERCENT | PN_LOCAL_ESC PERCENT: "%" HEX~2 HEX: /[0-9A-Fa-f]/ PN_LOCAL_ESC: "\\" /[_~\.\-!$&'()*+,;=\/?#@%]/ %ignore WS COMMENT: "#" /[^\n]/* %ignore COMMENT """ turtle_lark = Lark(grammar, start="turtle_doc", parser="lalr") LEGAL_IRI = re.compile(r'^[^\x00-\x20<>"{}|^`\\]*$') def validate_iri(iri): if not LEGAL_IRI.match(iri): raise ValueError('Illegal characters in IRI: ' + iri) return iri def unpack_predicate_object_list(subject, pol): if not isinstance(subject, (NamedNode, BlankNode)): for triple_or_node in subject: if isinstance(triple_or_node, Triple): yield triple_or_node
def __init__(self, filename): self.ebnf = open(filename, mode="r", encoding="utf-8") self.grammar = self.ebnf.read() self.parser = Lark(self.grammar, start="program", keep_all_tokens="false")
aspCoreParser = Lark(r''' s : program | "^" program : statements | query | statements query statements : statements statement | statement statement : CONS body DOT | CONS DOT | head CONS DOT | head CONS body DOT | head DOT | WCONS body DOT SQUARE_OPEN weight_at_level SQUARE_CLOSE | WCONS DOT SQUARE_OPEN weight_at_level SQUARE_CLOSE | optimize DOT query : classical_literal QUERY_MARK head : disjunction | choice body : naf_literal | aggregate | NAF aggregate | body COMMA naf_literal | body COMMA aggregate | body COMMA NAF aggregate disjunction : disjunction OR classical_literal | classical_literal choice : CURLY_OPEN choice_elements CURLY_CLOSE binop term | CURLY_OPEN CURLY_CLOSE binop term | CURLY_OPEN CURLY_CLOSE | CURLY_OPEN choice_elements CURLY_CLOSE | term binop CURLY_OPEN choice_elements CURLY_CLOSE | term binop CURLY_OPEN CURLY_CLOSE binop term | term binop CURLY_OPEN CURLY_CLOSE | term binop CURLY_OPEN choice_elements CURLY_CLOSE binop term | CURLY_OPEN choice_elements CURLY_CLOSE binop classical_literal | CURLY_OPEN CURLY_CLOSE binop classical_literal | classical_literal binop CURLY_OPEN choice_elements CURLY_CLOSE | classical_literal binop CURLY_OPEN CURLY_CLOSE binop classical_literal | classical_literal binop CURLY_OPEN CURLY_CLOSE binop term | term binop CURLY_OPEN CURLY_CLOSE binop classical_literal | classical_literal binop CURLY_OPEN CURLY_CLOSE | classical_literal binop CURLY_OPEN choice_elements CURLY_CLOSE binop classical_literal | classical_literal binop CURLY_OPEN choice_elements CURLY_CLOSE binop term | term binop CURLY_OPEN choice_elements CURLY_CLOSE binop classical_literal choice_elements : choice_elements SEMICOLON choice_element | choice_element choice_element : classical_literal COLON naf_literal | classical_literal COLON | classical_literal aggregate : aggregate_function CURLY_OPEN aggregate_elements CURLY_CLOSE binop term | aggregate_function CURLY_OPEN aggregate_elements CURLY_CLOSE binop classical_literal | aggregate_function CURLY_OPEN CURLY_CLOSE binop term | aggregate_function CURLY_OPEN CURLY_CLOSE binop classical_literal | aggregate_function CURLY_OPEN CURLY_CLOSE | aggregate_function CURLY_OPEN aggregate_elements CURLY_CLOSE | b aggregate_function CURLY_OPEN aggregate_elements CURLY_CLOSE | b aggregate_function CURLY_OPEN CURLY_CLOSE binop term | b aggregate_function CURLY_OPEN CURLY_CLOSE binop classical_literal | b aggregate_function CURLY_OPEN CURLY_CLOSE | b aggregate_function CURLY_OPEN aggregate_elements CURLY_CLOSE binop term | b aggregate_function CURLY_OPEN aggregate_elements CURLY_CLOSE binop classical_literal aggregate_elements : aggregate_elements SEMICOLON aggregate_element | aggregate_element aggregate_element : terms COLON naf_literals | terms | terms COLON | COLON | COLON naf_literals aggregate_function : AGGREGATE_COUNT | AGGREGATE_MAX | AGGREGATE_MIN | AGGREGATE_SUM optimize : optimize_function CURLY_OPEN optimize_elements CURLY_CLOSE | optimize_function CURLY_OPEN CURLY_CLOSE optimize_function : MAXIMIZE | MINIMIZE optimize_elements : optimize_elements SEMICOLON optimize_element | optimize_element optimize_element : weight_at_level COLON naf_literals | weight_at_level COLON | weight_at_level weight_at_level : term AT term COMMA terms | classical_literal AT term COMMA terms | term AT classical_literal COMMA terms | classical_literal AT classical_literal COMMA terms | term AT term | classical_literal AT term | term AT classical_literal | classical_literal AT classical_literal | term | classical_literal naf_literals : naf_literals COMMA naf_literal | naf_literal naf_literal : classical_literal | NAF classical_literal | builtin_atom builtin_atom : b term | b classical_literal binop : EQUAL | UNEQUAL | LESS | GREATER | LESS_OR_EQ | GREATER_OR_EQ terms : terms COMMA term | terms COMMA classical_literal | term | classical_literal term : NUMBER | STRING | VARIABLE | ANONYMOUS_VARIABLE | PAREN_OPEN term PAREN_CLOSE | termdue term | termdue termdue termdue: NUMBER arithop | STRING arithop | VARIABLE arithop | ANONYMOUS_VARIABLE arithop | PAREN_OPEN termdue PAREN_CLOSE arithop | PAREN_OPEN term PAREN_CLOSE arithop | ID PAREN_OPEN terms PAREN_CLOSE arithop arithop : PLUS | MINUS | TIMES | DIV classical_literal : ID | ID PAREN_OPEN PAREN_CLOSE | ID PAREN_OPEN terms PAREN_CLOSE | MINUS classical_literal b: term binop | classical_literal binop ID: /[a-z][A-Za-z0-9_]*/ VARIABLE: /[A-Z][A-Za-z0-9_]*/ STRING: "\"" ("\\\""|/[^"]/)* "\"" NUMBER: "0"|/[1-9][0-9]*/ ANONYMOUS_VARIABLE: "_" DOT: "." COMMA: "," QUERY_MARK: "?" COLON: ":" SEMICOLON: ";" OR: "|" NAF: "not" CONS: ":-" WCONS: ":~" PLUS: "+" MINUS: "-" TIMES: "*" DIV: "/" AT: "@" PAREN_OPEN: "(" PAREN_CLOSE: ")" SQUARE_OPEN: "[" SQUARE_CLOSE: "]" CURLY_OPEN: "{" CURLY_CLOSE: "}" EQUAL: "=" UNEQUAL: "<>"|"!=" LESS: "<" GREATER: ">" LESS_OR_EQ: "<=" GREATER_OR_EQ: ">=" AGGREGATE_COUNT: "#count" AGGREGATE_MAX: "#max" AGGREGATE_MIN: "#min" AGGREGATE_SUM: "#sum" MINIMIZE: "#minimi" /[zs]/ "e" MAXIMIZE: "#maximi" /[zs]/ "e" COMMENT: "%" /([^*\n][^\n]*)?\n/ MULTI_LINE_COMMENT: "%*" /([^*]|\*[^%])/* "*%" BLANK: /[ \t\n]/+ %ignore COMMENT %ignore MULTI_LINE_COMMENT %ignore BLANK ''', start="s", parser='lalr', debug=True)
# Prevent expressions like (1and1) or (1ina) # Changing these terminals in the grammar will prevent collision detection # Waiting on irregular! from lark.lexer import PatternRE _operators = ['IN', 'NOT_IN', 'AND', 'OR'] def _edit_terminals(t): if t.name in _operators: t.pattern = PatternRE('%s(?!\w)' % t.pattern.value) parser = Lark.open( 'preql.lark', rel_to=__file__, parser='lalr', postlex=Postlexer(), start=['stmts', 'expr'], maybe_placeholders=True, propagate_positions=True, cache=True, edit_terminals=_edit_terminals, # transformer=T() ) def terminal_desc(name): if name == '_NL': return "<NEWLINE>" p = parser.get_terminal(name).pattern if p.type == 'str': return p.value return '<%s>' % name
JS: /{%.*?%}/s js: JS? NAME: /[a-zA-Z_$]\w*/ COMMENT: /#[^\n]*/ REGEXP: /\[.*?\]/ %import common.ESCAPED_STRING -> STRING %import common.WS %ignore WS %ignore COMMENT """ nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard') def _get_rulename(name): name = {'_': '_ws_maybe', '__': '_ws'}.get(name, name) return 'n_' + name.replace('$', '__DOLLAR__').lower() class NearleyToLark(InlineTransformer): def __init__(self): self._count = 0 self.extra_rules = {} self.extra_rules_rev = {} self.alias_js_code = {}
def __init__(self): grammar_data = pkg_resources.resource_filename(__name__, 'seq.g') self.parser = Lark(open(grammar_data))
def loadBSL(program): """Parses a Burnlight Scheduling Language string into a Program""" parser = Lark(schedule_grammar) return ProgramTransformer().transform(parser.parse(program))
def read_ampgen( cls, filename=None, text=None, grammar=None, parser="lalr", **kargs ): """ Read in an ampgen file :param filename: Filename to read :param text: Text to read (use instead of filename) :return: array of AmplitudeChains, parameters, constants, event type """ if grammar is None: grammar = data.basepath.joinpath("ampgen.lark").read_text() # Read the file in, ignore empty lines and comments if filename is not None: with open(filename) as f: text = f.read() elif text is None: raise RuntimeError("Must have filename or text") lark = Lark(grammar, parser=parser, transformer=AmpGenTransformer(), **kargs) parsed = lark.parse(text) (event_type,) = get_from_parser(parsed, "event_type") # invert_lines = get_from_parser(parsed, "invert_line") cplx_decay_lines = get_from_parser(parsed, "cplx_decay_line") # cart_decay_lines = get_from_parser(parsed, "cart_decay_line") variables = get_from_parser(parsed, "variable") constants = get_from_parser(parsed, "constant") try: all_states = [Particle.from_string(n) for n in event_type] except Exception: print("Did not find at least one of the state particles from", *event_type) raise fcs = get_from_parser(parsed, "fast_coherent_sum") if fcs: (fcs,) = fcs (fcs,) = fcs.children cls.cartesian = bool(fcs) # TODO: re-enable this # Combine dual line Cartesian lines into traditional cartesian lines # for a, b in combinations(cart_decay_lines, 2): # if a['name'] == b['name']: # if a['cart'] == 'Re' and b['cart'] == 'Im': # pass # elif a['cart'] == 'Im' and b['cart'] == 'Re': # a, b = b, a # else: # raise RuntimeError("Can't process a line with *both* components Re or Im") # new_string = "{a[name]} {a[fix]} {a[amp]} {a[err]} {b[fix]} {b[amp]} {b[err]}".format( # a=a, b=b) # real_lines.append(ampline.dual.match(new_string).groupdict()) # Make the partial lines and constants as dataframes parameters = pd.DataFrame( variables, columns="name fix value error".split() ).set_index("name") constants = pd.DataFrame(constants, columns="name value".split()).set_index( "name" ) # Convert the matches into AmplitudeChains line_arr = [cls.from_matched_line(c) for c in cplx_decay_lines] # Expand partial lines into complete lines new_line_arr = [ ln for line in line_arr if line.particle == all_states[0] for ln in line.expand_lines(line_arr) ] # Return return new_line_arr, parameters, constants, all_states
parser = Lark(r""" %import common.INT TICK: "-" rotate: [TICK | "(" INT ")"] YY: "N" | "P" | "C" XX: "O" | "S" | YY TYPE: "C" | "T" | "E" | "Z" core: TYPE XX YY rotate ARYL0: "10" | "11" | "2" | "3" | "8" | "9" ARYL2: "12" | "13" | "4" | "5" | "6" | "7" XGROUP: "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" RGROUP: "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" raryl0: ARYL0 rotate raryl2: ARYL2 rotate aryl: raryl0 | raryl2 -> aryl2_0 | raryl2 RGROUP -> aryl2_1 | raryl2 RGROUP RGROUP -> aryl2_2 arylchain: aryl* end: arylchain [XGROUP] m_extend: "_" "m" INT n_extend: "_" "n" INT rend: ["_" end] lend: [end "_"] rarylchain: ["_" arylchain] larylchain: [arylchain "_"] _rn_benzo: core rend rarylchain _rt_benzo: core rend rend m_benzo: lend core rarylchain rend m_extend n_benzo: larylchain _rn_benzo n_extend term_benzo: lend _rt_benzo n_multibenzo: larylchain _rn_benzo ("_" _rn_benzo)+ n_extend term_multibenzo: lend _rn_benzo ("_" _rn_benzo)* ("_" _rt_benzo) chain: arylchain n_extend | end benzo: n_benzo | m_benzo | term_benzo multibenzo: n_multibenzo | term_multibenzo DIR: "x" | "y" | "z" stack: ["_" DIR INT] meta: stack stack stack molecule: (chain | benzo | multibenzo) meta """, start='molecule')
from javascriptGrammar import * from lark import Lark from javascriptSemantic import javascriptSemantic import sys parser = Lark(javascriptGrammar, parser="lalr", transformer=javascriptSemantic()) language = parser.parse file = sys.argv[1] f = open(file, "r") sample = f.read() language(sample)
from lark import Lark GRAMMAR=''' start: (_NEWLINE | stmt)* NAME: /[a-zA-Z_]\w*/ COMMENT: /--[^\n]*/ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ %ignore /[\t \f]+/ %ignore /\\[\t \f]*\r?\n/ ''' l = Lark(GRAMMAR) ast = l.parse() print(ast)