def test_switch_grammar_unicode_terminal(self): """ This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON tokens (e.g., `+=`) to mis-match between the two grammars. """ g1 = """ start: (NL | stmt)* stmt: "keyword" var op var !op: ("+=" | "-=" | "*=" | "/=") var: WORD NL: /(\\r?\\n)+\s*/ """ + common g2 = """ start: (NL | stmt)* stmt: "குறிப்பு" var op var !op: ("+=" | "-=" | "*=" | "/=") var: WORD NL: /(\\r?\\n)+\s*/ """ + common code = """ keyword x += y """ l1 = Lark(g1, parser='lalr', maybe_placeholders=False) l2 = Lark(g2, parser='lalr', maybe_placeholders=False) r = Reconstructor(l2) tree = l1.parse(code) code2 = r.reconstruct(tree) assert l2.parse(code2) == tree
def __init__(self, xlm_wrapper): self.xlm_wrapper = xlm_wrapper self.cell_addr_regex_str = r"((?P<sheetname>[^\s]+?|'.+?')!)?\$?(?P<column>[a-zA-Z]+)\$?(?P<row>\d+)" self.cell_addr_regex = re.compile(self.cell_addr_regex_str) macro_grammar = open('xlm-macro.lark', 'r', encoding='utf_8').read() self.xlm_parser = Lark(macro_grammar, parser='lalr') self.defined_names = self.xlm_wrapper.get_defined_names() self.tree_reconstructor = Reconstructor(self.xlm_parser)
def __init__(self, tree, parser, matcher): assert matcher is not None assert tree is not None assert parser is not None self.matcher = matcher self.tree = tree self.parser = parser self._reconstructor = Reconstructor(parser)
def parse(self, env: Environment, string: str) -> str: """Parse input, substitute variable and return the result.""" try: tree = self.parser.parse(string) transformer = SubstitutionTransformer(env) tree = transformer.transform(tree) reconstructor = Reconstructor(self.parser) return reconstructor.reconstruct(tree) except (UnexpectedCharacters, UnexpectedToken) as e: raise ShellException('[Substitution]Unexpected characters at position %s' % e.pos_in_stream) except LarkError: raise ShellException('[Substitution]Parse error')
def _make_grammar_tools( self, product_type: str ) -> Tuple[Lark, Grammar, Reconstructor, TokenMatcher]: """ instantiate an instance of the grammar parser, the "Grammar" analyser tool, and the reconstructor """ # get grammar analyser path = os.path.join( self.grammar_path, f"{self.asset_class}{PATH_DELIMITER}{product_type}{EXT}") grammar = Lark.open(path) # make analyser analyser = Grammar(grammar.rules) expanded_rules = map(analyser.discard_terminals, analyser.expand_inline_rules()) analyser = Grammar(expanded_rules) # make reconstructor reconstructor = Reconstructor(grammar) # make token matcher token_matcher = TokenMatcher(grammar.terminals) return grammar, analyser, reconstructor, token_matcher
def assert_reconstruct(self, grammar, code, **options): parser = Lark(grammar, parser='lalr', maybe_placeholders=False, **options) tree = parser.parse(code) new = Reconstructor(parser).reconstruct(tree) self.assertEqual(_remove_ws(code), _remove_ws(new))
def test_lalr(): json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) tree = json_parser.parse(test_json) new_json = Reconstructor(json_parser).reconstruct(tree) print(new_json) print(json.loads(new_json) == json.loads(test_json))
def test_lalr(): json_parser = Lark(json_grammar, parser='lalr') tree = json_parser.parse(test_json) new_json = Reconstructor(json_parser).reconstruct(tree) print(new_json) print(json.loads(new_json) == json.loads(test_json))
class Matcher(object): def __init__(self, tree, parser, matcher): assert matcher is not None assert tree is not None assert parser is not None self.matcher = matcher self.tree = tree self.parser = parser self._reconstructor = Reconstructor(parser) def match(self, array): if not isinstance(array, set): array = set(array) return self.matcher(array) def pretty(self): return self.tree.pretty() def query_str(self): self._reconstructor.reconstruct(self.tree)
def தொகுத்தல்(தன்): லார்க்_இலக்கணம் = '\n'.join(தன்.விதிகள்('lark')) லார்க்_பகுப்பாய்வி = Lark(லார்க்_இலக்கணம்) புனரமைப்பு = Reconstructor(லார்க்_பகுப்பாய்வி) def _பின்_பகுப்பாயுவி(உரை): for ஈ in உரை: yield '{} '.format(ஈ) if len(ஈ.strip()) and not ஈ.strip(' \t').endswith('\n') else ஈ மொழிபெயர்ப்பு_அகராதிகள் = {} for நிரல்மொழி in தன்.நிரல்மொழிகள்: பதிப்புகள் = தன்.பதிப்புகள்(நிரல்மொழி) or [''] மொழிபெயர்ப்பு_அகராதி = { 'நீட்சி': {}, 'பெயர்': {}, 'பதிப்புகள்': {ப: [] for ப in பதிப்புகள்}, 'விதிகள்': {}, 'மொழிபெயர்ப்பாளர்கள்': [] } மொழிபெயர்ப்பு_அகராதிகள்[நிரல்மொழி] = மொழிபெயர்ப்பு_அகராதி for ப in பதிப்புகள்: இலக்கணம் = தன்._இலக்கணம்_உரை_பெற(நிரல்மொழி, பதிப்பு=ப) மரம் = லார்க்_பகுப்பாய்வி.parse(இலக்கணம்) for இ in மரம்.children: if isinstance(இ, Tree): if இ.children[0] in தன்._தனிப்பட்ட_விதிகள்: continue விதி = புனரமைப்பு.reconstruct(இ, postproc=_பின்_பகுப்பாயுவி).strip() மொழிபெயர்ப்பு_அகராதி['பதிப்புகள்'][ப].append(விதி) if விதி not in மொழிபெயர்ப்பு_அகராதி['விதிகள்']: அகராதி = {'பெயர்ப்பு': {}} பகுப்பாய்வி = _சட்டம்_பகுப்பாய்வி() if இ.data in ['rule', 'token']: பகுப்பாய்வி.visit(இ.children[1]) if not பகுப்பாய்வி.தேவை: அகராதி['தேவை'] = False else: அகராதி['தேவை'] = False மொழிபெயர்ப்பு_அகராதி['விதிகள்'][விதி] = அகராதி try: முன் = தன்._மொழிபெயர்ப்புகள்[நிரல்மொழி] except KeyError: முன் = {'விதிகள்': {}} தன்._புதுப்பிப்பு(முன், மொழிபெயர்ப்பு_அகராதி) மொழிப்பெயர்_கோப்பு = f"{தன்.கோப்புறை}/{நிரல்மொழி}/மொழிபெயர்ப்புகள்.json" with open(மொழிப்பெயர்_கோப்பு, 'w', encoding='utf8') as கோ: json.dump(மொழிபெயர்ப்பு_அகராதி, கோ, ensure_ascii=False, indent=2)
def test_scanless(): json_parser = Lark(json_grammar, lexer=None) tree = json_parser.parse(test_json) # print ('@@', tree.pretty()) # for x in tree.find_data('true'): # x.data = 'false' # # x.children[0].value = '"HAHA"' new_json = Reconstructor(json_parser).reconstruct(tree) print(new_json) print(json.loads(new_json) == json.loads(test_json))
def replace_var_in_expression(expression, old_var, new_var, parser=None, matching_terminal_names=('ID')): """ Replaces all occurrences of old_var in expression with new_var. :param matching_terminal_names: Token names according to the grammar taken into account for replacement. :type matching_terminal_names: tuple (of strings) :param expression: An expression in the grammar used by parser. :type expression: str :param old_var: Old variable :type old_var: str :param new_var: New variable :type new_var: str :param parser: Lark parser :type parser: Lark (if not set default parser will be used.) :return: Expression with replaced variable. :rtype: str """ if parser is None: parser = get_parser_instance() tree = parser.parse(expression) recons = Reconstructor(parser) for node in tree.iter_subtrees(): # type: Tree for child in node.children: if isinstance(child, Token): pass if isinstance(child, Token) and child.type in matching_terminal_names: if child.value == old_var: node.set(data=node.data, children=[Token(child.type, new_var)]) return recons.reconstruct(tree)
def add_grammar(self, name, file, **options): options = {**default_options, **self.options, **options} with open(file, encoding='utf8') as f: grammar = f.read() sig = grammar_signature( load_grammar(grammar, name, options["import_paths"], options["keep_all_tokens"])) is_compat = self.is_compatible(sig) if not is_compat: raise IncompatibleGrammarsException() self.signatures[name] = sig self.parsers[name] = Lark(grammar, **options) self.reconstructors[name] = Reconstructor(self.parsers[name]) self.grammars.add(name)
def test_json_example(self): test_json = ''' { "empty_object" : {}, "empty_array" : [], "booleans" : { "YES" : true, "NO" : false }, "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], "nothing" : null } ''' json_grammar = r""" ?start: value ?value: object | array | string | SIGNED_NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : "[" [value ("," value)*] "]" object : "{" [pair ("," pair)*] "}" pair : string ":" value string : ESCAPED_STRING %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS """ json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) tree = json_parser.parse(test_json) new_json = Reconstructor(json_parser).reconstruct(tree) self.assertEqual(json.loads(new_json), json.loads(test_json))
from rosidl_parser.definition import NamespacedType from rosidl_parser.definition import NestedType from rosidl_parser.definition import Service from rosidl_parser.definition import SERVICE_REQUEST_MESSAGE_SUFFIX from rosidl_parser.definition import SERVICE_RESPONSE_MESSAGE_SUFFIX from rosidl_parser.definition import String from rosidl_parser.definition import Structure from rosidl_parser.definition import UnboundedSequence from rosidl_parser.definition import WString grammar_file = os.path.join(os.path.dirname(__file__), 'grammar.lark') with open(grammar_file, mode='r', encoding='utf-8') as h: grammar = h.read() parser = Lark(grammar, start='specification') reconstructor = Reconstructor(parser) def parse_idl_file(locator, png_file=None): string = locator.get_absolute_path().read_text() try: content = parse_idl_string(string, png_file=png_file) except Exception as e: print(str(e), str(locator.get_absolute_path()), file=sys.stderr) raise return IdlFile(locator, content) def parse_idl_string(idl_string, png_file=None): global parser tree = parser.parse(idl_string)
def ਪ੍ਮੁੜ_ਉਸਾਰੀ_ਪ੍ਰਾਪਤ_ਕਰਨਾ(ਖੁਦ, ਭਾਸ਼ਾ): if ਭਾਸ਼ਾ not in ਖੁਦ._ਮੁੜ_ਉਸਾਰੀ: ਖੁਦ._ਮੁੜ_ਉਸਾਰੀ[ਭਾਸ਼ਾ] = Reconstructor(ਖੁਦ.ਵਿਸ਼ਲੇਸ਼ਣ_ਪ੍ਰਾਪਤ_ਕਰਨਾ(ਭਾਸ਼ਾ)) return ਖੁਦ._ਮੁੜ_ਉਸਾਰੀ[ਭਾਸ਼ਾ]
def match_ticker(self, tickerId, exchangeId, platform, bias): if platform in ["TradingLite", "Bookmap", "LLD", "CoinGecko", "CCXT", "Serum", "Ichibot"]: bias = "crypto" elif platform in ["IEXC"]: bias = "traditional" exchange = {} if exchangeId == "" else self.exchanges.get(exchangeId).to_dict() larkParser = Lark(GRAMMAR, parser='lalr') Ticker = larkParser.parse def match(_tickerId): if _tickerId.startswith("$"): _tickerId = _tickerId[1:] + "USD" elif _tickerId.startswith("€"): _tickerId = _tickerId[1:] + "EUR" _tickerId, _ticker = self._check_overrides(_tickerId, platform), None if bias == "crypto": if platform in ["CoinGecko"] and exchangeId == "": _ticker = self.find_coingecko_crypto_market(_tickerId) elif platform in ["Serum"] and exchangeId == "": _ticker = self.find_serum_crypto_market(_tickerId) else: _ticker = self.find_ccxt_crypto_market(_tickerId, exchangeId, platform) else: if platform in ["IEXC"]: _ticker = self.find_iexc_market(_tickerId, exchangeId, platform) if _ticker is None: _ticker = { "id": _tickerId, "name": _tickerId, "base": None, "quote": None, "symbol": None, "exchange": exchange, "mcapRank": MAXSIZE, "isReversed": False } return _ticker def search(node, shouldMatch=False): for i, child in enumerate(node.children): if not isinstance(child, Token): node.children[i] = search(child, shouldMatch) elif child.type == "NAME": newValue = match(child.value) if not shouldMatch: newValue = newValue["id"] node.children[i] = child.update(value=newValue) return node try: ticker = Ticker(tickerId) except: return [dumps({ "tree": [ "var", [[ "NAME", { "id": "BTC", "name": "BTC", "base": None, "quote": None, "symbol": None, "exchange": exchange, "mcapRank": MAXSIZE, "isReversed": False } ]] ], "id": tickerId, "name": tickerId, "exchange": exchange, "base": None, "quote": None, "symbol": None, "mcapRank": MAXSIZE, "isReversed": False, "isSimple": True }), b""] search(ticker, shouldMatch=True) isSimple = isinstance(ticker.children[0], Token) and ticker.children[0].type == "NAME" simpleTicker = ticker.children[0].value if isSimple else {} if not isSimple and platform not in ["TradingView", "Alternative.me", "CoinGecko", "CCXT", "Serum", "IEXC", "LLD"]: return [b"", f"Aggregated tickers aren't available on {platform}".encode()] reconstructedId = Reconstructor(larkParser).reconstruct(search(Ticker(tickerId))) response = { "tree": TickerTree().transform(ticker), "id": reconstructedId, "name": simpleTicker.get("name", reconstructedId), "exchange": simpleTicker.get("exchange", {}), "base": simpleTicker.get("base", None), "quote": simpleTicker.get("quote", None), "symbol": simpleTicker.get("symbol", None), "image": simpleTicker.get("image", None), "mcapRank": simpleTicker.get("mcapRank", MAXSIZE), "isReversed": simpleTicker.get("isReversed", False), "isSimple": isSimple } if isSimple and bias == "crypto": response["isTradable"] = self.check_if_tradable(reconstructedId) return [dumps(response), b""]
tab_len = 8 USEFUL_STARTS = ['file_input', 'single_input', 'eval_input', 'stmt'] base_python3_parser = Lark(PYTHON3_GRAMMAR, lexer='standard', start=USEFUL_STARTS, postlex=PythonIndenter()) template_python3_parser = Lark(PYTHON3_GRAMMAR + TEMPLATE_PYTHON, lexer='standard', start=USEFUL_STARTS, postlex=PythonIndenter()) base_python3_recons = Reconstructor(base_python3_parser, { '_NEWLINE': _special, '_DEDENT': _special, '_INDENT': _special }) class Extension(NamedTuple): name: str grammar: str used_names: Tuple[str, ...] transformer: Transformer_InPlace def _indent_postproc(items): stack = ['\n'] actions = [] for item in items:
import pathlib import pkg_resources from lark import Lark, Tree, Token from lark.reconstruct import Reconstructor _grammar_file_path = pathlib.Path( pkg_resources.resource_filename(__name__, "grammar.lark")) _lark_parser = Lark.open( _grammar_file_path, parser="lalr") # LALR parser is faster than the default one _reconstructor = Reconstructor(_lark_parser) class Profile: def __init__(self, ast): self.ast = ast @classmethod def from_scratch(cls): return cls(Tree("start", [])) @classmethod def from_file(cls, profile_path): profile_path = pathlib.Path(profile_path) with profile_path.open() as profile: return cls(_lark_parser.parse(profile.read())) @classmethod def from_string(cls, profile_code):
def assert_reconstruct(self, grammar, code): parser = Lark(grammar, parser='lalr') tree = parser.parse(code) new = Reconstructor(parser).reconstruct(tree) self.assertEqual(_remove_ws(code), _remove_ws(new))
last_was_whitespace = True if not last_was_whitespace: if item[0] in SPACE_BEFORE: yield ' ' yield item last_was_whitespace = item[-1].isspace() if not last_was_whitespace: if item[-1] in SPACE_AFTER: yield ' ' last_was_whitespace = True yield "\n" python_reconstruct = Reconstructor(python_parser3, { '_NEWLINE': special, '_DEDENT': special, '_INDENT': special }) def test(): self_contents = open(__file__).read() tree = python_parser3.parse(self_contents + '\n') output = python_reconstruct.reconstruct(tree, postproc) tree_new = python_parser3.parse(output) # assert tree.pretty() == tree_new.pretty() assert tree == tree_new print(output)
class XLMInterpreter: def __init__(self, xlm_wrapper): self.xlm_wrapper = xlm_wrapper self.cell_addr_regex_str = r"((?P<sheetname>[^\s]+?|'.+?')!)?\$?(?P<column>[a-zA-Z]+)\$?(?P<row>\d+)" self.cell_addr_regex = re.compile(self.cell_addr_regex_str) macro_grammar = open('xlm-macro.lark', 'r', encoding='utf_8').read() self.xlm_parser = Lark(macro_grammar, parser='lalr') self.defined_names = self.xlm_wrapper.get_defined_names() self.tree_reconstructor = Reconstructor(self.xlm_parser) def is_float(self, text): try: float(text) return True except ValueError: return False def is_int(self, text): try: int(text) return True except ValueError: return False except TypeError: return False def get_formula_cell(self, macrosheet, col, row): result_cell = None not_found = False row = int(row) current_row = row current_addr = col + str(current_row) while current_addr not in macrosheet.cells or \ macrosheet.cells[current_addr].formula is None: if (current_row - row) < 50: current_row += 1 else: not_found = True break current_addr = col + str(current_row) if not_found is False: result_cell = macrosheet.cells[current_addr] return result_cell def get_argument_length(self, arglist_node): result = None if arglist_node.data == 'arglist': result = len(arglist_node.children) return result def get_cell(self, current_cell, cell_parse_tree): res_sheet = res_col = res_row = None if type(cell_parse_tree) is Token: names = self.xlm_wrapper.get_defined_names() label = cell_parse_tree.value if label in names: res_sheet, res_col, res_row = Cell.parse_cell_addr( names[cell_parse_tree]) else: cell = cell_parse_tree.children[0] if cell.data == 'absolute_cell': res_sheet, res_col, res_row = Cell.parse_cell_addr( cell.children[0]) if res_sheet is None: res_sheet = current_cell.sheet.name elif cell.data == 'relative_cell': first_child = cell.children[0] second_child = cell.children[1] res_sheet = current_cell.sheet.name res_col = Cell.convert_to_column_index(current_cell.column) res_row = int(current_cell.row) if first_child == 'R' and self.is_int(second_child): res_row = res_row + int(second_child) if len(cell.children) == 4: res_col = res_col + int(cell.children[4]) elif second_child == 'c': res_col = res_col + int(cell.children[2]) res_row = str(res_row) res_col = Cell.convert_to_column_name(res_col) else: raise Exception('Cell addresss, Syntax Error') return res_sheet, res_col, res_row def set_cell(self, sheet_name, col, row, text): sheets = self.xlm_wrapper.get_macrosheets() if sheet_name in sheets: sheet = sheets[sheet_name] addr = col + str(row) if addr not in sheet.cells: new_cell = Cell() new_cell.column = col new_cell.row = row new_cell.sheet = sheet sheet.cells[addr] = new_cell cell = sheet.cells[addr] if text.startswith('='): cell.formula = text else: cell.value = text def evaluate_parse_tree(self, current_cell, parse_tree_root, interactive=True): next_cell = None status = EvalStatus.NotImplemented text = None return_val = None if type(parse_tree_root) is Token: text = parse_tree_root.value status = EvalStatus.FullEvaluation return_val = text elif parse_tree_root.data == 'function_call': function_name = parse_tree_root.children[0] function_arguments = parse_tree_root.children[1] size = self.get_argument_length(function_arguments) if function_name == 'RUN': if size == 1: next_sheet, next_col, next_row = self.get_cell( current_cell, function_arguments.children[0].children[0]) if next_sheet is not None and next_sheet in self.xlm_wrapper.get_macrosheets( ): next_cell = self.get_formula_cell( self.xlm_wrapper.get_macrosheets()[next_sheet], next_col, next_row) text = 'RUN({}!{}{})'.format(next_sheet, next_col, next_row) status = EvalStatus.FullEvaluation else: status = EvalStatus.Error text = self.tree_reconstructor.reconstruct( parse_tree_root) return_val = 0 elif size == 2: text = 'RUN(reference, step)' status = EvalStatus.NotImplemented else: text = 'RUN() is incorrect' status = EvalStatus.Error elif function_name == 'CHAR': next_cell, status, return_val, text = self.evaluate_parse_tree( current_cell, function_arguments.children[0], interactive) if status == EvalStatus.FullEvaluation: text = chr(int(text)) cell = self.get_formula_cell(current_cell.sheet, current_cell.column, current_cell.row) cell.value = text return_val = text elif function_name == 'FORMULA': first_arg = function_arguments.children[0] next_cell, status, return_val, text = self.evaluate_parse_tree( current_cell, first_arg, interactive) second_arg = function_arguments.children[1].children[0] dst_sheet, dst_col, dst_row = self.get_cell( current_cell, second_arg) if status == EvalStatus.FullEvaluation: self.set_cell(dst_sheet, dst_col, dst_row, text) text = "FORMULA({},{})".format( text, '{}!{}{}'.format(dst_sheet, dst_col, dst_row)) return_val = 0 elif function_name == 'CALL': arguments = [] status = EvalStatus.FullEvaluation for argument in function_arguments.children: next_cell, tmp_status, return_val, text = self.evaluate_parse_tree( current_cell, argument, interactive) if tmp_status == EvalStatus.FullEvaluation: if text is not None: arguments.append(text) else: arguments.append(' ') else: status = tmp_status arguments.append('not evaluated') text = 'CALL({})'.format(','.join(arguments)) return_val = 0 elif function_name in ('HALT', 'CLOSE'): next_row = None next_col = None next_sheet = None text = self.tree_reconstructor.reconstruct(parse_tree_root) status = EvalStatus.End elif function_name == 'GOTO': next_sheet, next_col, next_row = self.get_cell( current_cell, function_arguments.children[0].children[0]) if next_sheet is not None and next_sheet in self.xlm_wrapper.get_macrosheets( ): next_cell = self.get_formula_cell( self.xlm_wrapper.get_macrosheets()[next_sheet], next_col, next_row) status = EvalStatus.FullEvaluation else: status = EvalStatus.Error text = self.tree_reconstructor.reconstruct(parse_tree_root) elif function_name.lower() in self.defined_names: cell_text = self.defined_names[function_name.lower()] next_sheet, next_col, next_row = self.parse_cell_address( cell_text) text = 'Label ' + function_name status = EvalStatus.FullEvaluation elif function_name == 'ERROR': text = 'ERROR' status = EvalStatus.FullEvaluation elif function_name == 'IF': if size == 3: second_arg = function_arguments.children[1] next_cell, status, return_val, text = self.evaluate_parse_tree( current_cell, second_arg, interactive) if status == EvalStatus.FullEvaluation: third_arg = function_arguments.children[2] status = EvalStatus.PartialEvaluation else: status = EvalStatus.FullEvaluation text = self.tree_reconstructor.reconstruct(parse_tree_root) elif function_name == 'NOW': text = datetime.datetime.now() status = EvalStatus.FullEvaluation elif function_name == 'DAY': first_arg = function_arguments.children[0] next_cell, status, return_val, text = self.evaluate_parse_tree( current_cell, first_arg, interactive) if status == EvalStatus.FullEvaluation: if type(text) is datetime.datetime: text = str(text.day) status = EvalStatus.FullEvaluation elif self.is_float(text): text = 'DAY(Serial Date)' status = EvalStatus.NotImplemented else: # args_str ='' # for argument in function_arguments.children: # next_cell, status, return_val, text = self.evaluate_parse_tree(current_cell, argument, interactive) # args_str += str(return_val) +',' # args_str.strip(',') # text = '{}({})'.format(function_name, args_str) text = self.tree_reconstructor.reconstruct(parse_tree_root) status = EvalStatus.NotImplemented elif parse_tree_root.data == 'method_call': text = self.tree_reconstructor.reconstruct(parse_tree_root) status = EvalStatus.NotImplemented elif parse_tree_root.data == 'cell': sheet_name, col, row = self.get_cell(current_cell, parse_tree_root) cell_addr = col + str(row) sheet = self.xlm_wrapper.get_macrosheets()[sheet_name] missing = True if cell_addr not in sheet.cells or sheet.cells[ cell_addr].value is None: if interactive: self.interactive_shell( current_cell, '{} is not populated, what should be its value?'. format(cell_addr)) if cell_addr in sheet.cells: cell = sheet.cells[cell_addr] if cell.value is not None: text = cell.value status = EvalStatus.FullEvaluation return_val = text missing = False else: text = "{}".format(cell_addr) else: text = "{}".format(cell_addr) elif parse_tree_root.data == 'binary_expression': left_arg = parse_tree_root.children[0] next_cell, l_status, return_val, text_left = self.evaluate_parse_tree( current_cell, left_arg, interactive) operator = str(parse_tree_root.children[1].children[0]) right_arg = parse_tree_root.children[2] next_cell, r_status, return_val, text_right = self.evaluate_parse_tree( current_cell, right_arg, interactive) if l_status == EvalStatus.FullEvaluation and r_status == EvalStatus.FullEvaluation: status = EvalStatus.FullEvaluation if operator == '&': text = text_left + text_right elif self.is_int(text_left) and self.is_int(text_right): if operator == '-': text = str(int(text_left) - int(text_right)) elif operator == '+': text = str(int(text_left) + int(text_right)) elif operator == '*': text = str(int(text_left) * int(text_right)) else: text = 'Operator ' + operator status = EvalStatus.NotImplemented else: text = self.tree_reconstructor.reconstruct(parse_tree_root) status = EvalStatus.PartialEvaluation else: status = EvalStatus.PartialEvaluation text = '{}{}{}'.format(text_left, operator, text_right) return_val = text else: status = EvalStatus.FullEvaluation for child_node in parse_tree_root.children: if child_node is not None: next_cell, tmp_status, return_val, text = self.evaluate_parse_tree( current_cell, child_node, interactive) if tmp_status != EvalStatus.FullEvaluation: status = tmp_status return next_cell, status, return_val, text def interactive_shell(self, current_cell, message): print('\nProcess Interruption:') print('CELL:{:10}{}'.format(current_cell.get_local_address(), current_cell.formula)) print(message) print('Enter XLM macro:') print('Tip: CLOSE() or HALT() to exist') while True: line = input() line = '=' + line.strip().strip('"') if line: try: parse_tree = self.xlm_parser.parse(line) next_cell, status, return_val, text = self.evaluate_parse_tree( current_cell, parse_tree, interactive=False) print(return_val) if status == EvalStatus.End: break except ParseError: print("Invalid XLM macro") else: break def deobfuscate_macro(self, interactive): result = [] auto_open_labels = self.xlm_wrapper.get_defined_name('auto_open', full_match=False) if auto_open_labels is not None and len(auto_open_labels) > 0: macros = self.xlm_wrapper.get_macrosheets() print('[Starting Deobfuscation]') for auto_open_label in auto_open_labels: sheet_name, col, row = Cell.parse_cell_addr(auto_open_label[1]) current_cell = self.get_formula_cell(macros[sheet_name], col, row) self.branches = [] while current_cell is not None: parse_tree = self.xlm_parser.parse(current_cell.formula) next_cell, status, return_val, text = self.evaluate_parse_tree( current_cell, parse_tree, interactive) if return_val is not None: current_cell.value = str(return_val) if next_cell is None and status != EvalStatus.Error: next_cell = self.get_formula_cell( current_cell.sheet, current_cell.column, str(int(current_cell.row) + 1)) yield (current_cell, status, text) if next_cell is not None: current_cell = next_cell else: break
string : ESCAPED_STRING COMMENT: "/*" /(.|\\n)+?/ "*/" | /(#|\\/\\/)[^\\n]*/ TRAILING_COMMA: "," %import common.ESCAPED_STRING %import common.SIGNED_NUMBER %import common.WS %ignore WS %ignore COMMENT ''', maybe_placeholders=False, parser='lalr') serializer = Reconstructor(parser) def detect_encoding(b): ''' Taken from `json` package in CPython 3.7. Source can be found at https://bit.ly/2OHqCIK. ''' bstartswith = b.startswith if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): return 'utf-32' if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): return 'utf-16' if bstartswith(codecs.BOM_UTF8):
def reconstruct(self): return Reconstructor(self.parser).reconstruct(self.tree)