Ejemplo n.º 1
0
    def test_switch_grammar_unicode_terminal(self):
        """
        This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed
        with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode
        keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON
        tokens (e.g., `+=`) to mis-match between the two grammars.
        """

        g1 = """
        start: (NL | stmt)*
        stmt: "keyword" var op var
        !op: ("+=" | "-=" | "*=" | "/=")
        var: WORD
        NL: /(\\r?\\n)+\s*/
        """ + common

        g2 = """
        start: (NL | stmt)*
        stmt: "குறிப்பு" var op var
        !op: ("+=" | "-=" | "*=" | "/=")
        var: WORD
        NL: /(\\r?\\n)+\s*/
        """ + common

        code = """
        keyword x += y
        """

        l1 = Lark(g1, parser='lalr', maybe_placeholders=False)
        l2 = Lark(g2, parser='lalr', maybe_placeholders=False)
        r = Reconstructor(l2)

        tree = l1.parse(code)
        code2 = r.reconstruct(tree)
        assert l2.parse(code2) == tree
Ejemplo n.º 2
0
 def __init__(self, xlm_wrapper):
     self.xlm_wrapper = xlm_wrapper
     self.cell_addr_regex_str = r"((?P<sheetname>[^\s]+?|'.+?')!)?\$?(?P<column>[a-zA-Z]+)\$?(?P<row>\d+)"
     self.cell_addr_regex = re.compile(self.cell_addr_regex_str)
     macro_grammar = open('xlm-macro.lark', 'r', encoding='utf_8').read()
     self.xlm_parser = Lark(macro_grammar, parser='lalr')
     self.defined_names = self.xlm_wrapper.get_defined_names()
     self.tree_reconstructor = Reconstructor(self.xlm_parser)
Ejemplo n.º 3
0
    def __init__(self, tree, parser, matcher):
        assert matcher is not None
        assert tree is not None
        assert parser is not None

        self.matcher = matcher
        self.tree = tree
        self.parser = parser
        self._reconstructor = Reconstructor(parser)
Ejemplo n.º 4
0
    def parse(self, env: Environment, string: str) -> str:
        """Parse input, substitute variable and return the result."""
        try:
            tree = self.parser.parse(string)

            transformer = SubstitutionTransformer(env)
            tree = transformer.transform(tree)

            reconstructor = Reconstructor(self.parser)
            return reconstructor.reconstruct(tree)
        except (UnexpectedCharacters, UnexpectedToken) as e:
            raise ShellException('[Substitution]Unexpected characters at position %s' % e.pos_in_stream)
        except LarkError:
            raise ShellException('[Substitution]Parse error')
Ejemplo n.º 5
0
    def _make_grammar_tools(
        self, product_type: str
    ) -> Tuple[Lark, Grammar, Reconstructor, TokenMatcher]:
        """
        instantiate an instance of the grammar parser, the "Grammar" analyser tool, and the reconstructor
        """
        # get grammar analyser
        path = os.path.join(
            self.grammar_path,
            f"{self.asset_class}{PATH_DELIMITER}{product_type}{EXT}")
        grammar = Lark.open(path)

        # make analyser
        analyser = Grammar(grammar.rules)
        expanded_rules = map(analyser.discard_terminals,
                             analyser.expand_inline_rules())
        analyser = Grammar(expanded_rules)

        # make reconstructor
        reconstructor = Reconstructor(grammar)

        # make token matcher
        token_matcher = TokenMatcher(grammar.terminals)

        return grammar, analyser, reconstructor, token_matcher
Ejemplo n.º 6
0
 def assert_reconstruct(self, grammar, code, **options):
     parser = Lark(grammar,
                   parser='lalr',
                   maybe_placeholders=False,
                   **options)
     tree = parser.parse(code)
     new = Reconstructor(parser).reconstruct(tree)
     self.assertEqual(_remove_ws(code), _remove_ws(new))
Ejemplo n.º 7
0
def test_lalr():

    json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
    tree = json_parser.parse(test_json)

    new_json = Reconstructor(json_parser).reconstruct(tree)
    print(new_json)
    print(json.loads(new_json) == json.loads(test_json))
def test_lalr():

    json_parser = Lark(json_grammar, parser='lalr')
    tree = json_parser.parse(test_json)

    new_json = Reconstructor(json_parser).reconstruct(tree)
    print(new_json)
    print(json.loads(new_json) == json.loads(test_json))
Ejemplo n.º 9
0
class Matcher(object):
    def __init__(self, tree, parser, matcher):
        assert matcher is not None
        assert tree is not None
        assert parser is not None

        self.matcher = matcher
        self.tree = tree
        self.parser = parser
        self._reconstructor = Reconstructor(parser)

    def match(self, array):
        if not isinstance(array, set):
            array = set(array)
        return self.matcher(array)

    def pretty(self):
        return self.tree.pretty()

    def query_str(self):
        self._reconstructor.reconstruct(self.tree)
Ejemplo n.º 10
0
    def தொகுத்தல்(தன்):
        லார்க்_இலக்கணம் = '\n'.join(தன்.விதிகள்('lark'))
        லார்க்_பகுப்பாய்வி = Lark(லார்க்_இலக்கணம்)
        புனரமைப்பு = Reconstructor(லார்க்_பகுப்பாய்வி)

        def _பின்_பகுப்பாயுவி(உரை):
            for ஈ in உரை:
                yield '{} '.format(ஈ) if len(ஈ.strip()) and not ஈ.strip(' \t').endswith('\n') else ஈ

        மொழிபெயர்ப்பு_அகராதிகள் = {}
        for நிரல்மொழி in தன்.நிரல்மொழிகள்:
            பதிப்புகள் = தன்.பதிப்புகள்(நிரல்மொழி) or ['']
            மொழிபெயர்ப்பு_அகராதி = {
                'நீட்சி': {},
                'பெயர்': {},
                'பதிப்புகள்': {ப: [] for ப in பதிப்புகள்},
                'விதிகள்': {},
                'மொழிபெயர்ப்பாளர்கள்': []
            }
            மொழிபெயர்ப்பு_அகராதிகள்[நிரல்மொழி] = மொழிபெயர்ப்பு_அகராதி

            for ப in பதிப்புகள்:
                இலக்கணம் = தன்._இலக்கணம்_உரை_பெற(நிரல்மொழி, பதிப்பு=ப)
                மரம் = லார்க்_பகுப்பாய்வி.parse(இலக்கணம்)

                for இ in மரம்.children:
                    if isinstance(இ, Tree):
                        if இ.children[0] in தன்._தனிப்பட்ட_விதிகள்:
                            continue
                        விதி = புனரமைப்பு.reconstruct(இ, postproc=_பின்_பகுப்பாயுவி).strip()
                        மொழிபெயர்ப்பு_அகராதி['பதிப்புகள்'][ப].append(விதி)

                        if விதி not in மொழிபெயர்ப்பு_அகராதி['விதிகள்']:
                            அகராதி = {'பெயர்ப்பு': {}}
                            பகுப்பாய்வி = _சட்டம்_பகுப்பாய்வி()
                            if இ.data in ['rule', 'token']:
                                பகுப்பாய்வி.visit(இ.children[1])
                                if not பகுப்பாய்வி.தேவை:
                                    அகராதி['தேவை'] = False
                            else:
                                அகராதி['தேவை'] = False
                            மொழிபெயர்ப்பு_அகராதி['விதிகள்'][விதி] = அகராதி

            try:
                முன் = தன்._மொழிபெயர்ப்புகள்[நிரல்மொழி]
            except KeyError:
                முன் = {'விதிகள்': {}}

            தன்._புதுப்பிப்பு(முன், மொழிபெயர்ப்பு_அகராதி)

            மொழிப்பெயர்_கோப்பு = f"{தன்.கோப்புறை}/{நிரல்மொழி}/மொழிபெயர்ப்புகள்.json"
            with open(மொழிப்பெயர்_கோப்பு, 'w', encoding='utf8') as கோ:
                json.dump(மொழிபெயர்ப்பு_அகராதி, கோ, ensure_ascii=False, indent=2)
def test_scanless():

    json_parser = Lark(json_grammar, lexer=None)
    tree = json_parser.parse(test_json)

    # print ('@@', tree.pretty())
    # for x in tree.find_data('true'):
    #     x.data = 'false'
    #     # x.children[0].value = '"HAHA"'

    new_json = Reconstructor(json_parser).reconstruct(tree)
    print(new_json)
    print(json.loads(new_json) == json.loads(test_json))
Ejemplo n.º 12
0
def replace_var_in_expression(expression,
                              old_var,
                              new_var,
                              parser=None,
                              matching_terminal_names=('ID')):
    """ Replaces all occurrences of old_var in expression with new_var.

    :param matching_terminal_names: Token names according to the grammar taken into account for replacement.
    :type matching_terminal_names: tuple (of strings)
    :param expression: An expression in the grammar used by parser.
    :type expression: str
    :param old_var: Old variable
    :type old_var: str
    :param new_var: New variable
    :type new_var: str
    :param parser: Lark parser
    :type parser: Lark (if not set default parser will be used.)
    :return: Expression with replaced variable.
    :rtype: str
    """
    if parser is None:
        parser = get_parser_instance()
    tree = parser.parse(expression)
    recons = Reconstructor(parser)

    for node in tree.iter_subtrees():  # type: Tree
        for child in node.children:
            if isinstance(child, Token):
                pass
            if isinstance(child,
                          Token) and child.type in matching_terminal_names:
                if child.value == old_var:
                    node.set(data=node.data,
                             children=[Token(child.type, new_var)])

    return recons.reconstruct(tree)
Ejemplo n.º 13
0
    def add_grammar(self, name, file, **options):
        options = {**default_options, **self.options, **options}

        with open(file, encoding='utf8') as f:
            grammar = f.read()

        sig = grammar_signature(
            load_grammar(grammar, name, options["import_paths"],
                         options["keep_all_tokens"]))
        is_compat = self.is_compatible(sig)
        if not is_compat:
            raise IncompatibleGrammarsException()
        self.signatures[name] = sig

        self.parsers[name] = Lark(grammar, **options)
        self.reconstructors[name] = Reconstructor(self.parsers[name])

        self.grammars.add(name)
Ejemplo n.º 14
0
    def test_json_example(self):
        test_json = '''
            {
                "empty_object" : {},
                "empty_array"  : [],
                "booleans"     : { "YES" : true, "NO" : false },
                "numbers"      : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
                "strings"      : [ "This", [ "And" , "That", "And a \\"b" ] ],
                "nothing"      : null
            }
        '''

        json_grammar = r"""
            ?start: value

            ?value: object
                  | array
                  | string
                  | SIGNED_NUMBER      -> number
                  | "true"             -> true
                  | "false"            -> false
                  | "null"             -> null

            array  : "[" [value ("," value)*] "]"
            object : "{" [pair ("," pair)*] "}"
            pair   : string ":" value

            string : ESCAPED_STRING

            %import common.ESCAPED_STRING
            %import common.SIGNED_NUMBER
            %import common.WS

            %ignore WS
        """

        json_parser = Lark(json_grammar,
                           parser='lalr',
                           maybe_placeholders=False)
        tree = json_parser.parse(test_json)

        new_json = Reconstructor(json_parser).reconstruct(tree)
        self.assertEqual(json.loads(new_json), json.loads(test_json))
Ejemplo n.º 15
0
from rosidl_parser.definition import NamespacedType
from rosidl_parser.definition import NestedType
from rosidl_parser.definition import Service
from rosidl_parser.definition import SERVICE_REQUEST_MESSAGE_SUFFIX
from rosidl_parser.definition import SERVICE_RESPONSE_MESSAGE_SUFFIX
from rosidl_parser.definition import String
from rosidl_parser.definition import Structure
from rosidl_parser.definition import UnboundedSequence
from rosidl_parser.definition import WString

grammar_file = os.path.join(os.path.dirname(__file__), 'grammar.lark')
with open(grammar_file, mode='r', encoding='utf-8') as h:
    grammar = h.read()

parser = Lark(grammar, start='specification')
reconstructor = Reconstructor(parser)


def parse_idl_file(locator, png_file=None):
    string = locator.get_absolute_path().read_text()
    try:
        content = parse_idl_string(string, png_file=png_file)
    except Exception as e:
        print(str(e), str(locator.get_absolute_path()), file=sys.stderr)
        raise
    return IdlFile(locator, content)


def parse_idl_string(idl_string, png_file=None):
    global parser
    tree = parser.parse(idl_string)
Ejemplo n.º 16
0
    def ਪ੍ਮੁੜ_ਉਸਾਰੀ_ਪ੍ਰਾਪਤ_ਕਰਨਾ(ਖੁਦ, ਭਾਸ਼ਾ):
        if ਭਾਸ਼ਾ not in ਖੁਦ._ਮੁੜ_ਉਸਾਰੀ:
            ਖੁਦ._ਮੁੜ_ਉਸਾਰੀ[ਭਾਸ਼ਾ] = Reconstructor(ਖੁਦ.ਵਿਸ਼ਲੇਸ਼ਣ_ਪ੍ਰਾਪਤ_ਕਰਨਾ(ਭਾਸ਼ਾ))

        return ਖੁਦ._ਮੁੜ_ਉਸਾਰੀ[ਭਾਸ਼ਾ]
Ejemplo n.º 17
0
	def match_ticker(self, tickerId, exchangeId, platform, bias):
		if platform in ["TradingLite", "Bookmap", "LLD", "CoinGecko", "CCXT", "Serum", "Ichibot"]: bias = "crypto"
		elif platform in ["IEXC"]: bias = "traditional"

		exchange = {} if exchangeId == "" else self.exchanges.get(exchangeId).to_dict()

		larkParser = Lark(GRAMMAR, parser='lalr')
		Ticker = larkParser.parse

		def match(_tickerId):
			if _tickerId.startswith("$"): _tickerId = _tickerId[1:] + "USD"
			elif _tickerId.startswith("€"): _tickerId = _tickerId[1:] + "EUR"
			_tickerId, _ticker = self._check_overrides(_tickerId, platform), None
			if bias == "crypto":
				if platform in ["CoinGecko"] and exchangeId == "": _ticker = self.find_coingecko_crypto_market(_tickerId)
				elif platform in ["Serum"] and exchangeId == "": _ticker = self.find_serum_crypto_market(_tickerId)
				else: _ticker = self.find_ccxt_crypto_market(_tickerId, exchangeId, platform)
			else:
				if platform in ["IEXC"]: _ticker = self.find_iexc_market(_tickerId, exchangeId, platform)
			if _ticker is None:
				_ticker = {
					"id": _tickerId,
					"name": _tickerId,
					"base": None,
					"quote": None,
					"symbol": None,
					"exchange": exchange,
					"mcapRank": MAXSIZE,
					"isReversed": False
				}
			return _ticker

		def search(node, shouldMatch=False):
			for i, child in enumerate(node.children):
				if not isinstance(child, Token):
					node.children[i] = search(child, shouldMatch)
				elif child.type == "NAME":
					newValue = match(child.value)
					if not shouldMatch: newValue = newValue["id"]
					node.children[i] = child.update(value=newValue)
			return node

		try:
			ticker = Ticker(tickerId)
		except:
			return [dumps({
				"tree": [
					"var",
					[[
						"NAME",
						{
							"id": "BTC",
							"name": "BTC",
							"base": None,
							"quote": None,
							"symbol": None,
							"exchange": exchange,
							"mcapRank": MAXSIZE,
							"isReversed": False
						}
					]]
				],
				"id": tickerId,
				"name": tickerId,
				"exchange": exchange,
				"base": None,
				"quote": None,
				"symbol": None,
				"mcapRank": MAXSIZE,
				"isReversed": False,
				"isSimple": True
			}), b""]

		search(ticker, shouldMatch=True)

		isSimple = isinstance(ticker.children[0], Token) and ticker.children[0].type == "NAME"
		simpleTicker = ticker.children[0].value if isSimple else {}
		if not isSimple and platform not in ["TradingView", "Alternative.me", "CoinGecko", "CCXT", "Serum", "IEXC", "LLD"]:
			return [b"", f"Aggregated tickers aren't available on {platform}".encode()]

		reconstructedId = Reconstructor(larkParser).reconstruct(search(Ticker(tickerId)))

		response = {
			"tree": TickerTree().transform(ticker),
			"id": reconstructedId,
			"name": simpleTicker.get("name", reconstructedId),
			"exchange": simpleTicker.get("exchange", {}),
			"base": simpleTicker.get("base", None),
			"quote": simpleTicker.get("quote", None),
			"symbol": simpleTicker.get("symbol", None),
			"image": simpleTicker.get("image", None),
			"mcapRank": simpleTicker.get("mcapRank", MAXSIZE),
			"isReversed": simpleTicker.get("isReversed", False),
			"isSimple": isSimple
		}
		if isSimple and bias == "crypto": response["isTradable"] = self.check_if_tradable(reconstructedId)

		return [dumps(response), b""]
    tab_len = 8


USEFUL_STARTS = ['file_input', 'single_input', 'eval_input', 'stmt']

base_python3_parser = Lark(PYTHON3_GRAMMAR,
                           lexer='standard',
                           start=USEFUL_STARTS,
                           postlex=PythonIndenter())
template_python3_parser = Lark(PYTHON3_GRAMMAR + TEMPLATE_PYTHON,
                               lexer='standard',
                               start=USEFUL_STARTS,
                               postlex=PythonIndenter())
base_python3_recons = Reconstructor(base_python3_parser, {
    '_NEWLINE': _special,
    '_DEDENT': _special,
    '_INDENT': _special
})


class Extension(NamedTuple):
    name: str
    grammar: str
    used_names: Tuple[str, ...]
    transformer: Transformer_InPlace


def _indent_postproc(items):
    stack = ['\n']
    actions = []
    for item in items:
Ejemplo n.º 19
0
import pathlib
import pkg_resources
from lark import Lark, Tree, Token
from lark.reconstruct import Reconstructor

_grammar_file_path = pathlib.Path(
    pkg_resources.resource_filename(__name__, "grammar.lark"))

_lark_parser = Lark.open(
    _grammar_file_path,
    parser="lalr")  # LALR parser is faster than the default one

_reconstructor = Reconstructor(_lark_parser)


class Profile:
    def __init__(self, ast):
        self.ast = ast

    @classmethod
    def from_scratch(cls):
        return cls(Tree("start", []))

    @classmethod
    def from_file(cls, profile_path):
        profile_path = pathlib.Path(profile_path)
        with profile_path.open() as profile:
            return cls(_lark_parser.parse(profile.read()))

    @classmethod
    def from_string(cls, profile_code):
Ejemplo n.º 20
0
 def assert_reconstruct(self, grammar, code):
     parser = Lark(grammar, parser='lalr')
     tree = parser.parse(code)
     new = Reconstructor(parser).reconstruct(tree)
     self.assertEqual(_remove_ws(code), _remove_ws(new))
Ejemplo n.º 21
0
                last_was_whitespace = True
            if not last_was_whitespace:
                if item[0] in SPACE_BEFORE:
                    yield ' '
            yield item
            last_was_whitespace = item[-1].isspace()
            if not last_was_whitespace:
                if item[-1] in SPACE_AFTER:
                    yield ' '
                    last_was_whitespace = True
    yield "\n"


python_reconstruct = Reconstructor(python_parser3, {
    '_NEWLINE': special,
    '_DEDENT': special,
    '_INDENT': special
})


def test():
    self_contents = open(__file__).read()

    tree = python_parser3.parse(self_contents + '\n')
    output = python_reconstruct.reconstruct(tree, postproc)

    tree_new = python_parser3.parse(output)
    # assert tree.pretty() == tree_new.pretty()
    assert tree == tree_new

    print(output)
Ejemplo n.º 22
0
class XLMInterpreter:
    def __init__(self, xlm_wrapper):
        self.xlm_wrapper = xlm_wrapper
        self.cell_addr_regex_str = r"((?P<sheetname>[^\s]+?|'.+?')!)?\$?(?P<column>[a-zA-Z]+)\$?(?P<row>\d+)"
        self.cell_addr_regex = re.compile(self.cell_addr_regex_str)
        macro_grammar = open('xlm-macro.lark', 'r', encoding='utf_8').read()
        self.xlm_parser = Lark(macro_grammar, parser='lalr')
        self.defined_names = self.xlm_wrapper.get_defined_names()
        self.tree_reconstructor = Reconstructor(self.xlm_parser)

    def is_float(self, text):
        try:
            float(text)
            return True
        except ValueError:
            return False

    def is_int(self, text):
        try:
            int(text)
            return True
        except ValueError:
            return False
        except TypeError:
            return False

    def get_formula_cell(self, macrosheet, col, row):
        result_cell = None
        not_found = False
        row = int(row)
        current_row = row
        current_addr = col + str(current_row)
        while current_addr not in macrosheet.cells or \
                macrosheet.cells[current_addr].formula is None:
            if (current_row - row) < 50:
                current_row += 1
            else:
                not_found = True
                break
            current_addr = col + str(current_row)

        if not_found is False:
            result_cell = macrosheet.cells[current_addr]

        return result_cell

    def get_argument_length(self, arglist_node):
        result = None
        if arglist_node.data == 'arglist':
            result = len(arglist_node.children)
        return result

    def get_cell(self, current_cell, cell_parse_tree):
        res_sheet = res_col = res_row = None
        if type(cell_parse_tree) is Token:
            names = self.xlm_wrapper.get_defined_names()
            label = cell_parse_tree.value
            if label in names:
                res_sheet, res_col, res_row = Cell.parse_cell_addr(
                    names[cell_parse_tree])
        else:
            cell = cell_parse_tree.children[0]
            if cell.data == 'absolute_cell':
                res_sheet, res_col, res_row = Cell.parse_cell_addr(
                    cell.children[0])
                if res_sheet is None:
                    res_sheet = current_cell.sheet.name
            elif cell.data == 'relative_cell':
                first_child = cell.children[0]
                second_child = cell.children[1]
                res_sheet = current_cell.sheet.name
                res_col = Cell.convert_to_column_index(current_cell.column)
                res_row = int(current_cell.row)
                if first_child == 'R' and self.is_int(second_child):
                    res_row = res_row + int(second_child)
                    if len(cell.children) == 4:
                        res_col = res_col + int(cell.children[4])
                elif second_child == 'c':
                    res_col = res_col + int(cell.children[2])

                res_row = str(res_row)
                res_col = Cell.convert_to_column_name(res_col)
            else:
                raise Exception('Cell addresss, Syntax Error')

        return res_sheet, res_col, res_row

    def set_cell(self, sheet_name, col, row, text):
        sheets = self.xlm_wrapper.get_macrosheets()
        if sheet_name in sheets:
            sheet = sheets[sheet_name]
            addr = col + str(row)
            if addr not in sheet.cells:
                new_cell = Cell()
                new_cell.column = col
                new_cell.row = row
                new_cell.sheet = sheet
                sheet.cells[addr] = new_cell

            cell = sheet.cells[addr]
            if text.startswith('='):
                cell.formula = text
            else:
                cell.value = text

    def evaluate_parse_tree(self,
                            current_cell,
                            parse_tree_root,
                            interactive=True):
        next_cell = None
        status = EvalStatus.NotImplemented
        text = None
        return_val = None

        if type(parse_tree_root) is Token:
            text = parse_tree_root.value
            status = EvalStatus.FullEvaluation
            return_val = text
        elif parse_tree_root.data == 'function_call':
            function_name = parse_tree_root.children[0]
            function_arguments = parse_tree_root.children[1]
            size = self.get_argument_length(function_arguments)

            if function_name == 'RUN':
                if size == 1:
                    next_sheet, next_col, next_row = self.get_cell(
                        current_cell,
                        function_arguments.children[0].children[0])
                    if next_sheet is not None and next_sheet in self.xlm_wrapper.get_macrosheets(
                    ):
                        next_cell = self.get_formula_cell(
                            self.xlm_wrapper.get_macrosheets()[next_sheet],
                            next_col, next_row)
                        text = 'RUN({}!{}{})'.format(next_sheet, next_col,
                                                     next_row)
                        status = EvalStatus.FullEvaluation
                    else:
                        status = EvalStatus.Error
                        text = self.tree_reconstructor.reconstruct(
                            parse_tree_root)
                    return_val = 0
                elif size == 2:
                    text = 'RUN(reference, step)'
                    status = EvalStatus.NotImplemented
                else:
                    text = 'RUN() is incorrect'
                    status = EvalStatus.Error

            elif function_name == 'CHAR':
                next_cell, status, return_val, text = self.evaluate_parse_tree(
                    current_cell, function_arguments.children[0], interactive)
                if status == EvalStatus.FullEvaluation:
                    text = chr(int(text))
                    cell = self.get_formula_cell(current_cell.sheet,
                                                 current_cell.column,
                                                 current_cell.row)
                    cell.value = text
                    return_val = text

            elif function_name == 'FORMULA':
                first_arg = function_arguments.children[0]
                next_cell, status, return_val, text = self.evaluate_parse_tree(
                    current_cell, first_arg, interactive)
                second_arg = function_arguments.children[1].children[0]
                dst_sheet, dst_col, dst_row = self.get_cell(
                    current_cell, second_arg)
                if status == EvalStatus.FullEvaluation:
                    self.set_cell(dst_sheet, dst_col, dst_row, text)
                text = "FORMULA({},{})".format(
                    text, '{}!{}{}'.format(dst_sheet, dst_col, dst_row))
                return_val = 0

            elif function_name == 'CALL':
                arguments = []
                status = EvalStatus.FullEvaluation
                for argument in function_arguments.children:
                    next_cell, tmp_status, return_val, text = self.evaluate_parse_tree(
                        current_cell, argument, interactive)
                    if tmp_status == EvalStatus.FullEvaluation:
                        if text is not None:
                            arguments.append(text)
                        else:
                            arguments.append(' ')
                    else:
                        status = tmp_status
                        arguments.append('not evaluated')
                text = 'CALL({})'.format(','.join(arguments))
                return_val = 0

            elif function_name in ('HALT', 'CLOSE'):
                next_row = None
                next_col = None
                next_sheet = None
                text = self.tree_reconstructor.reconstruct(parse_tree_root)
                status = EvalStatus.End

            elif function_name == 'GOTO':
                next_sheet, next_col, next_row = self.get_cell(
                    current_cell, function_arguments.children[0].children[0])
                if next_sheet is not None and next_sheet in self.xlm_wrapper.get_macrosheets(
                ):
                    next_cell = self.get_formula_cell(
                        self.xlm_wrapper.get_macrosheets()[next_sheet],
                        next_col, next_row)
                    status = EvalStatus.FullEvaluation
                else:
                    status = EvalStatus.Error
                text = self.tree_reconstructor.reconstruct(parse_tree_root)

            elif function_name.lower() in self.defined_names:
                cell_text = self.defined_names[function_name.lower()]
                next_sheet, next_col, next_row = self.parse_cell_address(
                    cell_text)
                text = 'Label ' + function_name
                status = EvalStatus.FullEvaluation

            elif function_name == 'ERROR':
                text = 'ERROR'
                status = EvalStatus.FullEvaluation

            elif function_name == 'IF':
                if size == 3:
                    second_arg = function_arguments.children[1]
                    next_cell, status, return_val, text = self.evaluate_parse_tree(
                        current_cell, second_arg, interactive)
                    if status == EvalStatus.FullEvaluation:
                        third_arg = function_arguments.children[2]
                    status = EvalStatus.PartialEvaluation
                else:
                    status = EvalStatus.FullEvaluation
                text = self.tree_reconstructor.reconstruct(parse_tree_root)

            elif function_name == 'NOW':
                text = datetime.datetime.now()
                status = EvalStatus.FullEvaluation

            elif function_name == 'DAY':
                first_arg = function_arguments.children[0]
                next_cell, status, return_val, text = self.evaluate_parse_tree(
                    current_cell, first_arg, interactive)
                if status == EvalStatus.FullEvaluation:
                    if type(text) is datetime.datetime:
                        text = str(text.day)
                        status = EvalStatus.FullEvaluation
                    elif self.is_float(text):
                        text = 'DAY(Serial Date)'
                        status = EvalStatus.NotImplemented

            else:
                # args_str =''
                # for argument in function_arguments.children:
                #     next_cell, status, return_val, text = self.evaluate_parse_tree(current_cell, argument, interactive)
                #     args_str += str(return_val) +','
                # args_str.strip(',')
                # text = '{}({})'.format(function_name, args_str)
                text = self.tree_reconstructor.reconstruct(parse_tree_root)
                status = EvalStatus.NotImplemented

        elif parse_tree_root.data == 'method_call':
            text = self.tree_reconstructor.reconstruct(parse_tree_root)
            status = EvalStatus.NotImplemented

        elif parse_tree_root.data == 'cell':
            sheet_name, col, row = self.get_cell(current_cell, parse_tree_root)
            cell_addr = col + str(row)
            sheet = self.xlm_wrapper.get_macrosheets()[sheet_name]
            missing = True
            if cell_addr not in sheet.cells or sheet.cells[
                    cell_addr].value is None:
                if interactive:
                    self.interactive_shell(
                        current_cell,
                        '{} is not populated, what should be its value?'.
                        format(cell_addr))

            if cell_addr in sheet.cells:
                cell = sheet.cells[cell_addr]
                if cell.value is not None:
                    text = cell.value
                    status = EvalStatus.FullEvaluation
                    return_val = text
                    missing = False

                else:
                    text = "{}".format(cell_addr)
            else:
                text = "{}".format(cell_addr)

        elif parse_tree_root.data == 'binary_expression':
            left_arg = parse_tree_root.children[0]
            next_cell, l_status, return_val, text_left = self.evaluate_parse_tree(
                current_cell, left_arg, interactive)
            operator = str(parse_tree_root.children[1].children[0])
            right_arg = parse_tree_root.children[2]
            next_cell, r_status, return_val, text_right = self.evaluate_parse_tree(
                current_cell, right_arg, interactive)
            if l_status == EvalStatus.FullEvaluation and r_status == EvalStatus.FullEvaluation:
                status = EvalStatus.FullEvaluation
                if operator == '&':
                    text = text_left + text_right
                elif self.is_int(text_left) and self.is_int(text_right):
                    if operator == '-':
                        text = str(int(text_left) - int(text_right))
                    elif operator == '+':
                        text = str(int(text_left) + int(text_right))
                    elif operator == '*':
                        text = str(int(text_left) * int(text_right))
                    else:
                        text = 'Operator ' + operator
                        status = EvalStatus.NotImplemented
                else:
                    text = self.tree_reconstructor.reconstruct(parse_tree_root)
                    status = EvalStatus.PartialEvaluation
            else:
                status = EvalStatus.PartialEvaluation
                text = '{}{}{}'.format(text_left, operator, text_right)
            return_val = text
        else:
            status = EvalStatus.FullEvaluation
            for child_node in parse_tree_root.children:
                if child_node is not None:
                    next_cell, tmp_status, return_val, text = self.evaluate_parse_tree(
                        current_cell, child_node, interactive)
                    if tmp_status != EvalStatus.FullEvaluation:
                        status = tmp_status

        return next_cell, status, return_val, text

    def interactive_shell(self, current_cell, message):
        print('\nProcess Interruption:')
        print('CELL:{:10}{}'.format(current_cell.get_local_address(),
                                    current_cell.formula))
        print(message)
        print('Enter XLM macro:')
        print('Tip: CLOSE() or HALT() to exist')

        while True:
            line = input()
            line = '=' + line.strip().strip('"')
            if line:
                try:
                    parse_tree = self.xlm_parser.parse(line)
                    next_cell, status, return_val, text = self.evaluate_parse_tree(
                        current_cell, parse_tree, interactive=False)
                    print(return_val)
                    if status == EvalStatus.End:
                        break
                except ParseError:
                    print("Invalid XLM macro")
            else:
                break

    def deobfuscate_macro(self, interactive):
        result = []

        auto_open_labels = self.xlm_wrapper.get_defined_name('auto_open',
                                                             full_match=False)
        if auto_open_labels is not None and len(auto_open_labels) > 0:
            macros = self.xlm_wrapper.get_macrosheets()

            print('[Starting Deobfuscation]')
            for auto_open_label in auto_open_labels:
                sheet_name, col, row = Cell.parse_cell_addr(auto_open_label[1])
                current_cell = self.get_formula_cell(macros[sheet_name], col,
                                                     row)
                self.branches = []
                while current_cell is not None:
                    parse_tree = self.xlm_parser.parse(current_cell.formula)
                    next_cell, status, return_val, text = self.evaluate_parse_tree(
                        current_cell, parse_tree, interactive)
                    if return_val is not None:
                        current_cell.value = str(return_val)
                    if next_cell is None and status != EvalStatus.Error:
                        next_cell = self.get_formula_cell(
                            current_cell.sheet, current_cell.column,
                            str(int(current_cell.row) + 1))
                    yield (current_cell, status, text)
                    if next_cell is not None:
                        current_cell = next_cell
                    else:
                        break
Ejemplo n.º 23
0
    string : ESCAPED_STRING

    COMMENT: "/*" /(.|\\n)+?/ "*/"
           | /(#|\\/\\/)[^\\n]*/
    TRAILING_COMMA: ","

    %import common.ESCAPED_STRING
    %import common.SIGNED_NUMBER
    %import common.WS
    %ignore WS
    %ignore COMMENT
''',
              maybe_placeholders=False,
              parser='lalr')

serializer = Reconstructor(parser)


def detect_encoding(b):
    '''
    Taken from `json` package in CPython 3.7.

    Source can be found at https://bit.ly/2OHqCIK.
    '''

    bstartswith = b.startswith
    if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
        return 'utf-32'
    if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
        return 'utf-16'
    if bstartswith(codecs.BOM_UTF8):
Ejemplo n.º 24
0
 def reconstruct(self):
     return Reconstructor(self.parser).reconstruct(self.tree)