def getEquationComponents(self, equation_str): component_structure_grammar = r""" entry = component / subscript_definition / lookup_definition component = name _ subscriptlist? _ "=" _ expression subscript_definition = name _ ":" _ subscript _ ("," _ subscript)* lookup_definition = name _ &"(" _ expression # uses lookahead assertion to capture whole group name = basic_id / escape_group subscriptlist = '[' _ subscript _ ("," _ subscript)* _ ']' expression = ~r".*" # expression could be anything, at this point. subscript = basic_id / escape_group basic_id = ~r"[a-zA-Z][a-zA-Z0-9_\s]*" escape_group = "\"" ( "\\\"" / ~r"[^\"]" )* "\"" _ = ~r"[\s\\]*" # whitespace character """ # replace any amount of whitespace with a single space equation_str = equation_str.replace('\\t', ' ') equation_str = re.sub(r"\s+", ' ', equation_str) parser = parsimonious.Grammar(component_structure_grammar) tree = parser.parse(equation_str) parse_object = ComponentParser(tree) return { 'real_name': parse_object.real_name, 'subs': parse_object.subscripts, 'expr': parse_object.expression, 'kind': parse_object.kind }
def __init__(self, model_namespace=None, subscript_dict=None): if model_namespace is None: model_namespace = {} if subscript_dict is None: subscript_dict = {} self.model_namespace = model_namespace self.subscript_dict = subscript_dict self.extended_model_namespace = { key.replace(' ', '_'): value for key, value in self.model_namespace.items()} self.extended_model_namespace.update(self.model_namespace) # === # 3.5.5 Time Functions # http://docs.oasis-open.org/xmile/xmile/v1.0/csprd01/xmile-v1.0-csprd01.html#_Toc398039984 # === self.extended_model_namespace.update({'dt': 'time_step'}) self.extended_model_namespace.update({'starttime': 'initial_time'}) self.extended_model_namespace.update({'endtime': 'final_time'}) grammar = pkg_resources.resource_string("pysd", "py_backend/xmile/smile.grammar") grammar = grammar.decode('ascii').format( funcs=format_word_list(functions.keys()), in_ops=format_word_list(infix_operators.keys()), pre_ops=format_word_list(prefix_operators.keys()), identifiers=format_word_list(self.extended_model_namespace.keys()), build_keywords=format_word_list(builders.keys()) ) self.grammar = parsimonious.Grammar(grammar)
def get_host_dicts(full_text): """ Return a list of dictionaries representing vagrant VMs (hosts). These dictionaries have keys defined by the first word in a line, and values that are the rest of that line, e.g. `{ "Host": "control", "Hostname": "127.0.0.1", "Port": 2200, ... }`. Appropriate fields will be parsed as their native types, specifically "Port" will be parsed as an int. """ # Parsimonious grammar, similar to BNF: # https://github.com/erikrose/parsimonious return Walker( parsimonious.Grammar(r""" output = block+ block = newline* first_line line+ last_line? line = port_line / arbitrary_line # Only the first line is unindented first_line = key whitespace1 hostname newline port_line = whitespace2 key whitespace1 port_number newline arbitrary_line = whitespace2 key whitespace1 value newline # last_line only gets called when the last line has no trailing newline, # e.g. it won't be called if there's another block following. last_line = whitespace2 key whitespace1 value key = ~"[A-z]+" # Values are arbitrary and can be paths, ints, etc. value = ~".+" newline = ~"\n" whitespace1 = ~"\s" whitespace2 = ~"\s\s" # https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names hostname = ~"[A-z0-9-]+"i # Port numbers are in the range 1~65000 port_number = ~"[0-9]{1,5}" """)).match(full_text)
def parse_lookup_expression(element, subscript_dict): """ This syntax parses lookups that are defined with their own element """ lookup_grammar = r""" lookup = _ "(" _ (regularLookup / excelLookup) _ ")" regularLookup = range? _ ( "(" _ number _ "," _ number _ ")" _ ","? _ )+ excelLookup = ~"GET( |_)(XLS|DIRECT)( |_)LOOKUPS"I _ "(" _ args (_ "," _ args)* _ ")" args = ~r"[^,()]*" number = ("+"/"-")? ~r"\d+\.?\d*(e[+-]\d+)?" _ = ~r"[\s\\]*" #~r"[\ \t\n]*" #~r"[\s\\]*" # whitespace character range = _ "[" ~r"[^\]]*" "]" _ "," """ parser = parsimonious.Grammar(lookup_grammar) tree = parser.parse(element['expr']) class LookupParser(parsimonious.NodeVisitor): def __init__(self, ast): self.translation = "" self.new_structure = [] self.visit(ast) def visit__(self, n, vc): # remove whitespace return '' def visit_regularLookup(self, n, vc): pairs = max(vc, key=len) mixed_list = pairs.replace('(', '').replace(')', '').split(',') xs = mixed_list[::2] ys = mixed_list[1::2] string = "functions.lookup(x, [%(xs)s], [%(ys)s])" % { 'xs': ','.join(xs), 'ys': ','.join(ys) } self.translation = string def visit_excelLookup(self, n, vc): source = vc[4] _, name, col, cell = [i.strip() for i in vc[5].split(',')] args = [source, name, col, cell] trans, structure = builders["get xls lookups"](element, subscript_dict, args) self.translation = trans self.new_structure += structure def generic_visit(self, n, vc): return ''.join(filter(None, vc)) or n.text parse_object = LookupParser(tree) return ({ 'py_expr': parse_object.translation, 'arguments': 'x' }, parse_object.new_structure)
def format_templates(format_str: str, templates: Dict[str, str]) -> str: grammar = r''' expression = (token/alternative)* token = group/raw_text/variable group = group_start (alternative/token)* group_end alternative = token("|"token)+ raw_text = word variable = "%"word"%" word = ~"[^%\[\]\|]+"i group_start = "[" group_end = "]" ''' class EntryParser(parsimonious.nodes.NodeVisitor): def __init__(self, templates): self.templates = templates def visit_expression(self, _node, visited_children): flattened = flatten(visited_children) return ''.join(child for child in flattened if child) def visit_group(self, _node, visited_children): flattened = flatten(visited_children) return (''.join(flattened) if all(flattened) else '') def visit_alternative(self, _node, visited_children): flattened = flatten(visited_children) for child in flattened: if child: return child return '' def visit_variable(self, node, _visited_children): var_name = node.children[1].text return self.templates.get(var_name, '') def visit_raw_text(self, node, _visited_children): return node.text def visit_word(self, node, _visited_children): return node.text def generic_visit(self, _node, visited_children): return visited_children try: ast = parsimonious.Grammar(grammar).parse(format_str) return EntryParser(templates).visit(ast) except (parsimonious.exceptions.ParseError, parsimonious.exceptions.IncompleteParseError): raise FormatError('Bad format string')
def getModelElements(self, model_str): model_structure_grammar = r""" model = (entry / section)+ sketch? entry = element "~" element "~" element ("~" element)? "|" section = element "~" element "|" sketch = ~r".*" #anything # Either an escape group, or a character that is not tilde or pipe element = (escape_group / ~r"[^~|]")* # between quotes, either escaped quote or character that is not a quote escape_group = "\"" ( "\\\"" / ~r"[^\"]" )* "\"" """ parser = parsimonious.Grammar(model_structure_grammar) tree = parser.parse(model_str) #print(tree) return ModelParser(tree).entries
def to_ast(s): grammar = parsimonious.Grammar("""\ term = boolean / function / atom / list / tuple / number / bitstring / string _ = ~"\s*" list = ("[" _ term (_ "," _ term)* _ "]") / ("[" _ "]") tuple = ("{" _ term (_ "," _ term)* _ "}") / ("{" _ "}") atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'") number = ~"\-?[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?" boolean = "true" / "false" bitstring = ("<<" _ (byte _ "," _)* byte _ ">>") / ("<<" _ (byte _ "," _)* bitexp _ ">>") / ("<<" bitstr ">>") bitexp = byte ":" byte byte = ~"[0-9]+" string = '"' ~r'(\\\\"|[^"])*' '"' bitstr = '"' ~r'(\\\\"|[^"])*' '"' function = "fun" point (_ ";" _ point)* _ "end" point = ("(" _ (term (_ "," _ term)*)? _ ")" _ "->" _ term) / ("(" _ any (_ "," _ any)* ")" _ "->" _ term) any = "_" """) return grammar.parse(s)
def getFileSections(self, file_str): file_structure_grammar = r""" file = encoding? (macro / main)+ macro = ":MACRO:" _ name _ "(" _ (name _ ","? _)+ _ ":"? _ (name _ ","? _)* _ ")" ~r".+?(?=:END OF MACRO:)" ":END OF MACRO:" main = !":MACRO:" ~r".+(?!:MACRO:)" name = basic_id / escape_group basic_id = ~r"[a-zA-Z][a-zA-Z0-9_\s]*" # between quotes, either escaped quote or character that is not a quote escape_group = "\"" ( "\\\"" / ~r"[^\"]" )* "\"" encoding = ~r"\{[^\}]*\}" _ = ~r"[\s\\]*" # whitespace character """ # the leading 'r' for 'raw' in this string is important for handling backslashes properly parser = parsimonious.Grammar(file_structure_grammar) tree = parser.parse(file_str) return FileParser(tree).entries
def parse_lookup_expression(element): """ This syntax parses lookups that are defined with their own element """ lookup_grammar = r""" lookup = _ "(" range? _ ( "(" _ number _ "," _ number _ ")" _ ","? _ )+ ")" number = ("+"/"-")? ~r"\d+\.?\d*(e[+-]\d+)?" _ = ~r"[\s\\]*" # whitespace character range = _ "[" ~r"[^\]]*" "]" _ "," """ parser = parsimonious.Grammar(lookup_grammar) tree = parser.parse(element['expr']) class LookupParser(parsimonious.NodeVisitor): def __init__(self, ast): self.translation = "" self.new_structure = [] self.visit(ast) def visit__(self, n, vc): # remove whitespace return '' def visit_lookup(self, n, vc): pairs = max(vc, key=len) mixed_list = pairs.replace('(', '').replace(')', '').split(',') xs = mixed_list[::2] ys = mixed_list[1::2] string = "functions.lookup(x, [%(xs)s], [%(ys)s])" % { 'xs': ','.join(xs), 'ys': ','.join(ys) } self.translation = string def generic_visit(self, n, vc): return ''.join(filter(None, vc)) or n.text parse_object = LookupParser(tree) return {'py_expr': parse_object.translation, 'arguments': 'x'}
# USAGE_PATTERN = "[ FILE <another-argument> ]" #------------------------------------------------------------------------------ def to_str(node): if node.children: return ''.join([to_str(child) for child in node]) else: return node.text #------------------------------------------------------------------------------ slurp = lambda fname: [(f.read(), f.close()) for f in [open(fname, 'r')]][0][0] if True: grammar = slurp(GRAMMAR_FILE) g = parsimonious.Grammar(grammar) else: import bootstrap g = bootstrap.docopt_grammar AST = g.parse(USAGE_PATTERN) # print( ' ' + str(eval(to_str(AST))) ) # print( ' ' + to_str(AST) ) print(AST) #------------------------------------------------------------------------------
ParseError, ) grammar = parsimonious.Grammar(r""" type = tuple_type / basic_type tuple_type = components arrlist? components = non_zero_tuple / zero_tuple non_zero_tuple = "(" type next_type* ")" next_type = "," type zero_tuple = "()" basic_type = base sub? arrlist? base = alphas sub = two_size / digits two_size = (digits "x" digits) arrlist = (const_arr / dynam_arr)+ const_arr = "[" digits "]" dynam_arr = "[]" alphas = ~"[A-Za-z]+" digits = ~"[1-9][0-9]*" """) class NodeVisitor(parsimonious.NodeVisitor):
def __init__(self): self._grammar = parsimonious.Grammar(CPPTypeNameChecker.PEG_GRAMMAR) self._known_names = set(CPPTypeNameChecker.KNOWN_BASIC_TYPE_NAMES)
class _PtsNodeVisitor(_AsyncNodeVisitor): unwrapped_exceptions = (CommandError, ) grammar = parsimonious.Grammar(GRAMMAR) def __init__(self, api: Api, origin: Optional[int]) -> None: self._api = api self._origin = origin # --- grammar features --- async def generic_visit(self, node: Any, visited: list[Any]) -> Any: if not node.expr_name and not node.children: return node.text return _flatten(visited) async def visit_unary_operation(self, node: Any, visited: list[Any]) -> Any: operator, time = _flatten(visited) if self._origin is not None: return await self.visit_binary_operation( node, [_Time(self._origin), operator, time]) if operator == "-": time.value *= -1 return time async def visit_binary_operation(self, node: Any, visited: list[Any]) -> Any: time1, operator, time2 = _flatten(visited) try: func = {"+": _Time.add, "-": _Time.sub}[operator] except LookupError as ex: raise NotImplementedError(f"unknown operator: {operator}") from ex return func(time1, time2, self._api) async def visit_line(self, node: Any, visited: list[Any]) -> Any: return _flatten(visited)[0].unpack(self._api) # --- basic tokens --- async def visit_integer(self, node: Any, visited: list[Any]) -> Any: return int(node.text) async def visit_decimal(self, node: Any, visited: list[Any]) -> Any: return float(node.text) async def visit_rel(self, node: Any, visited: list[Any]) -> Any: try: return { "c": _Token.CURRENT, "p": _Token.PREVIOUS, "n": _Token.NEXT, "f": _Token.FIRST, "l": _Token.LAST, }[node.text] except LookupError as ex: raise NotImplementedError(f"unknown relation: {node.text}") from ex async def visit_start(self, node: Any, visited: list[Any]) -> Any: return _Token.START async def visit_end(self, node: Any, visited: list[Any]) -> Any: return _Token.END # --- times --- async def visit_milliseconds(self, node: Any, visited: list[Any]) -> Any: return _Time(_flatten(visited)[0]) async def visit_seconds(self, node: Any, visited: list[Any]) -> Any: return _Time(int(_flatten(visited)[0] * 1000)) async def visit_minutes(self, node: Any, visited: list[Any]) -> Any: visited = _flatten(visited) minutes = visited[0] seconds = visited[2] if len(visited) >= 3 and visited[2] else 0 seconds += minutes * 60 return _Time(int(seconds * 1000)) async def visit_colon_time(self, node: Any, visited: list[Any]) -> Any: value = float("0." + (node.match.group("ms") or "0")) value += int(node.match.group("s") or "0") value += int(node.match.group("m") or "0") * 60 value += int(node.match.group("h") or "0") * 3600 return _Time(int(value * 1000)) async def visit_subtitle(self, node: Any, visited: list[Any]) -> Any: _, num, boundary = _flatten(visited) idx = max(1, min(num, len(self._api.subs.events))) - 1 try: sub = self._api.subs.events[idx] except IndexError: sub = None return _Time(_Token.start_end(sub, boundary) if sub else 0) async def visit_frame(self, node: Any, visited: list[Any]) -> Any: num, _ = _flatten(visited) return _Time(num, _TimeUnit.FRAME) async def visit_keyframe(self, node: Any, visited: list[Any]) -> Any: num, _ = _flatten(visited) return _Time(num, _TimeUnit.KEYFRAME) async def visit_rel_subtitle(self, node: Any, visited: list[Any]) -> Any: direction, _, boundary = _flatten(visited) sub: Optional[AssEvent] try: if direction == _Token.FIRST: sub = self._api.subs.events[0] elif direction == _Token.LAST: sub = self._api.subs.events[-1] else: sub = self._api.subs.selected_events[0] sub = _Token.prev_next(sub, direction) except LookupError: sub = None return _Time(_Token.start_end(sub, boundary) if sub else 0) async def visit_rel_frame(self, node: Any, visited: list[Any]) -> Any: direction, _ = _flatten(visited) origin = self._api.playback.current_pts if direction == _Token.FIRST: return _Time(1, _TimeUnit.FRAME) if direction == _Token.LAST: current_stream = self._api.video.current_stream if not current_stream or not current_stream.timecodes: raise CommandError("timecode information is not available") return _Time(len(current_stream.timecodes), _TimeUnit.FRAME) if direction == _Token.CURRENT: return _Time(origin) delta = _Token.delta_from_direction(direction) return _Time(_apply_frame(self._api, origin, delta)) async def visit_rel_keyframe(self, node: Any, visited: list[Any]) -> Any: direction, _ = _flatten(visited) origin = self._api.playback.current_pts if direction == _Token.FIRST: return _Time(1, _TimeUnit.KEYFRAME) if direction == _Token.LAST: current_stream = self._api.video.current_stream if not current_stream or not current_stream.keyframes: raise CommandError("timecode information is not available") return _Time( len(current_stream.keyframes), _TimeUnit.KEYFRAME, ) delta = _Token.delta_from_direction(direction) return _Time(_apply_keyframe(self._api, origin, delta)) async def visit_audio_selection(self, node: Any, visited: list[Any]) -> Any: _, boundary = _flatten(visited) if boundary == _Token.START: return _Time(self._api.audio.view.selection_start) if boundary == _Token.END: return _Time(self._api.audio.view.selection_end) raise NotImplementedError(f'unknown boundary: "{boundary}"') async def visit_audio_view(self, node: Any, visited: list[Any]) -> Any: _, boundary = _flatten(visited) if boundary == _Token.START: return _Time(self._api.audio.view.view_start) if boundary == _Token.END: return _Time(self._api.audio.view.view_end) raise NotImplementedError(f'unknown boundary: "{boundary}"') async def visit_default_duration(self, node: Any, visited: list[Any]) -> Any: return _Time(self._api.cfg.opt["subs"]["default_duration"]) async def visit_min(self, node: Any, visited: list[Any]) -> Any: return _Time(0) async def visit_max(self, node: Any, visited: list[Any]) -> Any: return _Time(self._api.playback.max_pts) async def visit_dialog(self, node: Any, visited: list[Any]) -> Any: ret = await self._api.gui.exec( time_jump_dialog, relative_checked=False, show_radio=self._origin is not None, value=self._api.playback.current_pts, ) if ret is None: raise CommandCanceled value, is_relative = ret if is_relative: assert self._origin is not None return _Time(self._origin + value) return _Time(value)
def test_json(self): res=parsimonious.Grammar(JSON).parse('{"x":1, "y":[0,1,2]}')
def __init__(self, grammar, infilename): self.filename = infilename[:-4] + '.py' builder.new_model(self.filename) self.grammar = parsimonious.Grammar(grammar) self.parse(infilename)
def get_equation_components(equation_str): """ Breaks down a string representing only the equation part of a model element. Recognizes the various types of model elements that may exist, and identifies them. Parameters ---------- equation_str : basestring the first section in each model element - the full equation. Returns ------- Returns a dictionary containing the following: real_name: basestring The name of the element as given in the original vensim file subs: list of strings list of subscripts or subscript elements expr: basestring kind: basestring What type of equation have we found? - *component* - normal model expression or constant - *lookup* - a lookup table - *subdef* - a subscript definition Examples -------- >>> get_equation_components(r'constant = 25') {'expr': '25', 'kind': 'component', 'subs': [], 'real_name': 'constant'} Notes ----- in this function we dont create python identifiers, we use real names. This is so that when everything comes back together, we can manage any potential namespace conflicts properly """ component_structure_grammar = _include_common_grammar(r""" entry = component / subscript_definition / lookup_definition component = name _ subscriptlist? _ "=" _ expression subscript_definition = name _ ":" _ subscript _ ("," _ subscript)* lookup_definition = name _ &"(" _ expression # uses lookahead assertion to capture whole group name = basic_id / escape_group subscriptlist = '[' _ subscript _ ("," _ subscript)* _ ']' expression = ~r".*" # expression could be anything, at this point. subscript = basic_id / escape_group """) # replace any amount of whitespace with a single space equation_str = equation_str.replace('\\t', ' ') equation_str = re.sub(r"\s+", ' ', equation_str) parser = parsimonious.Grammar(component_structure_grammar) tree = parser.parse(equation_str) class ComponentParser(parsimonious.NodeVisitor): def __init__(self, ast): self.subscripts = [] self.real_name = None self.expression = None self.kind = None self.visit(ast) def visit_subscript_definition(self, n, vc): self.kind = 'subdef' def visit_lookup_definition(self, n, vc): self.kind = 'lookup' def visit_component(self, n, vc): self.kind = 'component' def visit_name(self, n, vc): (name, ) = vc self.real_name = name.strip() def visit_subscript(self, n, vc): (subscript, ) = vc self.subscripts.append(subscript.strip()) def visit_expression(self, n, vc): self.expression = n.text.strip() def generic_visit(self, n, vc): return ''.join(filter(None, vc)) or n.text def visit__(self, n, vc): return ' ' parse_object = ComponentParser(tree) return { 'real_name': parse_object.real_name, 'subs': parse_object.subscripts, 'expr': parse_object.expression, 'kind': parse_object.kind }
def parse_general_expression(element, namespace=None, subscript_dict=None, macro_list=None, elements_subs_dict=None): """ Parses a normal expression # its annoying that we have to construct and compile the grammar every time... Parameters ---------- element: dictionary namespace : dictionary subscript_dict : dictionary macro_list: list of dictionaries [{'name': 'M', 'py_name':'m', 'filename':'path/to/file', 'args':['arg1', 'arg2']}] elements_subs_dict : dictionary The dictionary with element python names as keys and their merged subscripts as values. Returns ------- translation new_elements: list of dictionaries If the expression contains builder functions, those builders will create new elements to add to our running list (that will eventually be output to a file) such as stock initialization and derivative funcs, etc. Examples -------- >>> parse_general_expression({'expr': 'INTEG (FlowA, -10)', ... 'py_name':'test_stock', ... 'subs':None}, ... {'FlowA': 'flowa'}), ({'kind': 'component', 'py_expr': "_state['test_stock']"}, [{'kind': 'implicit', 'subs': None, 'doc': 'Provides initial conditions for test_stock function', 'py_name': 'init_test_stock', 'real_name': None, 'unit': 'See docs for test_stock', 'py_expr': '-10'}, {'py_name': 'dtest_stock_dt', 'kind': 'implicit', 'py_expr': 'flowa', 'real_name': None}]) """ if namespace is None: namespace = {} if subscript_dict is None: subscript_dict = {} in_ops = { "+": "+", "-": "-", "*": "*", "/": "/", "^": "**", "=": "==", "<=": "<=", "<>": "!=", "<": "<", ">=": ">=", ">": ">", ":and:": " and ", ":or:": " or "} # spaces important for word-based operators pre_ops = { "-": "-", ":not:": " not ", # spaces important for word-based operators "+": " " # space is important, so that and empty string doesn't slip through generic } # in the following, if lists are empty use non-printable character # everything needs to be escaped before going into the grammar, in case it includes quotes sub_names_list = [re.escape(x) for x in subscript_dict.keys()] or ['\\a'] sub_elems_list = [re.escape(y).replace('"', "") for x in subscript_dict.values() for y in x] or ['\\a'] ids_list = [re.escape(x) for x in namespace.keys()] or ['\\a'] in_ops_list = [re.escape(x) for x in in_ops.keys()] pre_ops_list = [re.escape(x) for x in pre_ops.keys()] if macro_list is not None and len(macro_list) > 0: macro_names_list = [re.escape(x['name']) for x in macro_list] else: macro_names_list = ['\\a'] expression_grammar = _include_common_grammar(r""" expr_type = array / expr / empty expr = _ pre_oper? _ (lookup_def / build_call / macro_call / call / lookup_call / parens / number / string / reference) _ (in_oper _ expr)? lookup_def = lookup_with_def / lookup_regular_def lookup_regular_def = range? _ ( "(" _ number _ "," _ number _ ")" _ ","? _ )+ number = ("+"/"-")? ~r"\d+\.?\d*(e[+-]\d+)?" range = _ "[" ~r"[^\]]*" "]" _ "," lookup_with_def = ~r"(WITH\ LOOKUP)"I _ "(" _ expr _ "," _ "(" _ ("[" ~r"[^\]]*" "]" _ ",")? ( "(" _ expr _ "," _ expr _ ")" _ ","? _ )+ _ ")" _ ")" lookup_call = lookup_call_subs _ "(" _ (expr _ ","? _)* ")" # these don't need their args parsed... lookup_call_subs = id _ subscript_list? param = (expr)+ call = func _ "(" _ (param _ ","? _)* ")" # these don't need their args parsed... build_call = builder _ "(" _ arguments _ ")" macro_call = macro _ "(" _ arguments _ ")" parens = "(" _ expr _ ")" arguments = (expr _ ","? _)* reference = id _ subscript_list? subscript_list = "[" _ ~"\""? _ ((sub_name / sub_element) _ ~"\""? _ "!"? _ ","? _)+ _ "]" array = (number _ ("," / ";")? _)+ !~r"." # negative lookahead for anything other than an array string = "\'" ( "\\\'" / ~r"[^\']"IU )* "\'" id = ( basic_id / escape_group ) sub_name = ~r"(%(sub_names)s)"IU # subscript names (if none, use non-printable character) sub_element = ~r"(%(sub_elems)s)"IU # subscript elements (if none, use non-printable character) func = ~r"(%(funcs)s)"IU # functions (case insensitive) in_oper = ~r"(%(in_ops)s)"IU # infix operators (case insensitive) pre_oper = ~r"(%(pre_ops)s)"IU # prefix operators (case insensitive) builder = ~r"(%(builders)s)"IU # builder functions (case insensitive) macro = ~r"(%(macros)s)"IU # macros from model file (if none, use non-printable character) empty = "" # empty string """ % { # In the following, we have to sort keywords in decreasing order of length so that the # peg parser doesn't quit early when finding a partial keyword 'sub_names': '|'.join(reversed(sorted(sub_names_list, key=len))), 'sub_elems': '|'.join(reversed(sorted(sub_elems_list, key=len))), 'funcs': '|'.join(reversed(sorted(functions.keys(), key=len))), 'in_ops': '|'.join(reversed(sorted(in_ops_list, key=len))), 'pre_ops': '|'.join(reversed(sorted(pre_ops_list, key=len))), 'builders': '|'.join(reversed(sorted(builders.keys(), key=len))), 'macros': '|'.join(reversed(sorted(macro_names_list, key=len))) }) parser = parsimonious.Grammar(expression_grammar) class ExpressionParser(parsimonious.NodeVisitor): # TODO: at some point, we could make the 'kind' identification # recursive on expression, so that if an expression is passed into # a builder function, the information about whether it is a constant, # or calls another function, goes with it. def __init__(self, ast): self.translation = "" self.subs = None # the subscript list if given self.lookup_subs = [] # the subscript list if given self.apply_dim = set() # the dimensions with ! if given self.kind = 'constant' # change if we reference anything else self.new_structure = [] self.arguments = None self.in_oper = None self.args = [] self.visit(ast) def visit_expr_type(self, n, vc): s = ''.join(filter(None, vc)).strip() self.translation = s def visit_expr(self, n, vc): s = ''.join(filter(None, vc)).strip() self.translation = s return s def visit_param(self, n, vc): s = ''.join(filter(None, vc)).strip() self.translation = s self.args.append(s) return s def visit_call(self, n, vc): self.kind = 'component' # remove dimensions info (produced by !) function_name = vc[0].lower() arguments = [e.strip() for e in vc[4].split(",")] # add dimensions as last argument if self.apply_dim and function_name in vectorial_funcs: arguments += ["dim="+str(tuple(self.apply_dim))] self.apply_dim = set() return builder.build_function_call(functions[function_name], arguments) def visit_in_oper(self, n, vc): return in_ops[n.text.lower()] def visit_pre_oper(self, n, vc): return pre_ops[n.text.lower()] def visit_reference(self, n, vc): self.kind = 'component' vc[0] += '()' if re.match("\[.+\]", vc[-1]): # sometimes the subscript list are not consumed # this is because visit_lookup_call_subs is not visited (fix?) py_expr = "".join(vc[:-1]) else: py_expr = "".join(vc) if self.subs: if elements_subs_dict[py_expr[:-2]] == self.subs: self.subs = None return py_expr coords = utils.make_coord_dict(self.subs, subscript_dict, terse=False) dims = [utils.find_subscript_name(subscript_dict, sub) for sub in self.subs] self.subs = None return builder.build_function_call( functions_utils["rearrange"], [py_expr, repr(dims), "_subscript_dict"]) return py_expr def visit_lookup_call_subs(self, n, vc): # needed to avoid doing the rearrange in the lookup arguments # lookup_subs list makes possible to work with # lookups inside lookups # TODO: this is not visited by lookups call when having subs # instead visit_reference is called, need to fix that if self.subs: self.subs = None self.lookup_subs.append(self.subs) self.subs = None else: self.lookup_subs.append(None) return vc[0] def visit_lookup_call(self, n, vc): self.kind = 'lookup' py_expr = ''.join([x.strip(',') for x in vc]) lookup_subs = self.lookup_subs.pop() if lookup_subs and elements_subs_dict[py_expr] != lookup_subs: coords = utils.make_coord_dict(lookup_subs, subscript_dict, terse=False) dims = [utils.find_subscript_name(subscript_dict, sub) for sub in lookup_subs] return builder.build_function_call( functions_utils["rearrange"], [py_expr, repr(dims), "_subscript_dict"]) return py_expr def visit_id(self, n, vc): return namespace[n.text.strip()] def visit_lookup_regular_def(self, n, vc): pairs = max(vc, key=len) mixed_list = pairs.replace('(', '').replace(')', '').split(',') xs = mixed_list[::2] ys = mixed_list[1::2] self.arguments = 'x' arguments = [ 'x', '['+','.join(xs)+']', '['+','.join(ys)+']' ] return builder.build_function_call(functions_utils['lookup'], arguments) def visit_lookup_with_def(self, n, vc): """ This exists because vensim has multiple ways of doing lookups. Which is frustrating.""" x_val = vc[4] pairs = vc[11] mixed_list = pairs.replace('(', '').replace(')', '').split(',') xs = mixed_list[::2] ys = mixed_list[1::2] arguments = [ x_val, '['+','.join(xs)+']', '['+','.join(ys)+']' ] return builder.build_function_call(functions_utils['lookup'], arguments) def visit_array(self, n, vc): if 'subs' in element and element['subs']: # first test handles when subs is not defined coords = utils.make_coord_dict(element['subs'], subscript_dict, terse=False) shape = utils.compute_shape(coords) if ';' in n.text or ',' in n.text: text = n.text.strip(';').replace(' ', '').replace(';', ',') data = np.array([float(s) for s in text.split(',')]) data = data.reshape(shape) else: data = np.tile(float(n.text), shape) datastr = np.array2string(data, separator=',').replace('\n', '').replace(' ', '') return builder.build_function_call( functions_utils["DataArray"], [datastr, repr(coords), repr(list(coords))]) else: return n.text.replace(' ', '') def visit_subscript_list(self, n, vc): refs = vc[4] subs = [x.strip() for x in refs.split(',')] coordinates = utils.make_coord_dict(subs, subscript_dict) # Implements basic "!" subscript functionality in Vensim. # Does NOT work for matrix diagonals in # FUNC(variable[sub1!,sub1!]) functions self.apply_dim.update(["%s" % s.strip('!') for s in subs if s[-1] == '!']) if len(coordinates): return ".loc[%s].squeeze().reset_coords(%s, drop=True)"\ % (repr(coordinates), repr(list(coordinates))) self.subs = ["%s" % s.strip('!') for s in subs] return "" def visit_build_call(self, n, vc): call = vc[0] arglist = vc[4] self.kind = 'component' builder_name = call.strip().lower() name, structure = builders[builder_name](element, subscript_dict, arglist) self.new_structure += structure if builder_name in ['get xls lookups', 'get direct lookups']: self.arguments = 'x' self.kind = 'lookup' # External constants if builder_name in ['get xls constants', 'get direct constants']: self.kind = 'constant' # External data if builder_name in ['get xls data', 'get direct data']: self.kind = 'component_ext_data' if builder_name == 'delay fixed': warnings.warn("Delay fixed only approximates solution," " may not give the same result as vensim") return name def visit_macro_call(self, n, vc): call = vc[0] arglist = vc[4] self.kind = 'component' py_name = utils.make_python_identifier(call)[0] macro = [x for x in macro_list if x['py_name'] == py_name][0] # should match once name, structure = builder.add_macro(macro['py_name'], macro['file_name'], macro['params'], arglist) self.new_structure += structure return name def visit_arguments(self, n, vc): arglist = [x.strip(',') for x in vc] return arglist def visit__(self, n, vc): """ Handles whitespace characters""" return '' def visit_empty(self, n, vc): return 'None' def generic_visit(self, n, vc): return ''.join(filter(None, vc)) or n.text tree = parser.parse(element['expr']) parse_object = ExpressionParser(tree) return ({'py_expr': parse_object.translation, 'kind': parse_object.kind, 'arguments': parse_object.arguments or ''}, parse_object.new_structure)
import parsimonious import quantumsim.circuit as ct import numpy as np import functools qasm_grammar = parsimonious.Grammar(r""" program = nl* (qubit_spec)* nl (circuit_spec)+ qubit_spec = "qubit " id nl circuit_spec = initall nl (gatelist)* meas initall = "init_all" nl gatelist = gate (more_gates)* nl more_gates = ("|" gate) gate = !meas ws* (two_qubit_gate / single_qubit_gate) ws* single_qubit_gate = gate_name ws arg two_qubit_gate = gate_name ws arg ws arg arg = ~"[A-Za-z0-9]+" gate_name = id meas = "RO " arg nl ws = " "+ nl = (comment / " " / "\n" / "\r")* comment = "#" ~".*" text = (id / "|" / " " / "\t")* id = ~"[A-Za-z0-9]+" """) sgl_qubit_gate_map = { "i": None, "my90": functools.partial(ct.RotateY, angle=-np.pi / 2), "y90": functools.partial(ct.RotateY, angle=np.pi / 2),
__author__ = 'kranonetka' from pathlib import Path import parsimonious with (Path(__file__).parent / 'message.grammar').open('r', encoding='utf-8') as fp: message_grammar = parsimonious.Grammar(fp.read())
class Parser(cls, parsimonious.NodeVisitor): rules = "\n".join(self.rules + extra_rules) grammar = parsimonious.Grammar(rules) def rule(self, name, text): return self.visit(self.grammar[name].parse(text))
def __init__(self, grammar, filename, text, dictofsubs): self.filename = filename self.builder = builder.Builder(self.filename, dictofsubs) self.grammar = parsimonious.Grammar(grammar) self.dictofsubs = dictofsubs self.parse(text)
def compileStr(self, grammarText: str, target=None, fileName: Path = None) -> "parsimonious.grammar.Grammar": return parsimonious.Grammar(grammarText)
def get_file_sections(file_str): """ This is where we separate out the macros from the rest of the model file. Working based upon documentation at: https://www.vensim.com/documentation/index.html?macros.htm Macros will probably wind up in their own python modules eventually. Parameters ---------- file_str Returns ------- entries: list of dictionaries Each dictionary represents a different section of the model file, either a macro, or the main body of the model file. The dictionaries contain various elements: - returns: list of strings represents what is returned from a macro (for macros) or empty for main model - params: list of strings represents what is passed into a macro (for macros) or empty for main model - name: string the name of the macro, or 'main' for main body of model - string: string string representing the model section Examples -------- >>> get_file_sections(r'a~b~c| d~e~f| g~h~i|') [{'returns': [], 'params': [], 'name': 'main', 'string': 'a~b~c| d~e~f| g~h~i|'}] """ # the leading 'r' for 'raw' in this string is important for handling backslashes properly file_structure_grammar = _include_common_grammar(r""" file = encoding? (macro / main)+ macro = ":MACRO:" _ name _ "(" _ (name _ ","? _)+ _ ":"? _ (name _ ","? _)* _ ")" ~r".+?(?=:END OF MACRO:)" ":END OF MACRO:" main = !":MACRO:" ~r".+(?!:MACRO:)" encoding = ~r"\{[^\}]*\}" """) parser = parsimonious.Grammar(file_structure_grammar) tree = parser.parse(file_str) class FileParser(parsimonious.NodeVisitor): def __init__(self, ast): self.entries = [] self.visit(ast) def visit_main(self, n, vc): self.entries.append({'name': '_main_', 'params': [], 'returns': [], 'string': n.text.strip()}) def visit_macro(self, n, vc): name = vc[2] params = vc[6] returns = vc[10] text = vc[13] self.entries.append({'name': name, 'params': [x.strip() for x in params.split(',')] if params else [], 'returns': [x.strip() for x in returns.split(',')] if returns else [], 'string': text.strip()}) def generic_visit(self, n, vc): return ''.join(filter(None, vc)) or n.text or '' return FileParser(tree).entries
def parse_general_expression(element, namespace=None, subscript_dict=None, macro_list=None): """ Parses a normal expression # its annoying that we have to construct and compile the grammar every time... Parameters ---------- element: dictionary namespace : dictionary subscript_dict : dictionary macro_list: list of dictionaries [{'name': 'M', 'py_name':'m', 'filename':'path/to/file', 'args':['arg1', 'arg2']}] Returns ------- translation new_elements: list of dictionaries If the expression contains builder functions, those builders will create new elements to add to our running list (that will eventually be output to a file) such as stock initialization and derivative funcs, etc. Examples -------- >>> parse_general_expression({'expr': 'INTEG (FlowA, -10)', ... 'py_name':'test_stock', ... 'subs':None}, ... {'FlowA': 'flowa'}), ({'kind': 'component', 'py_expr': "_state['test_stock']"}, [{'kind': 'implicit', 'subs': None, 'doc': 'Provides initial conditions for test_stock function', 'py_name': 'init_test_stock', 'real_name': None, 'unit': 'See docs for test_stock', 'py_expr': '-10'}, {'py_name': 'dtest_stock_dt', 'kind': 'implicit', 'py_expr': 'flowa', 'real_name': None}]) """ if namespace is None: namespace = {} if subscript_dict is None: subscript_dict = {} in_ops = { "+": "+", "-": "-", "*": "*", "/": "/", "^": "**", "=": "==", "<=": "<=", "<>": "!=", "<": "<", ">=": ">=", ">": ">", ":and:": " and ", ":or:": " or " } # spaces important for word-based operators pre_ops = { "-": "-", ":not:": " not ", # spaces important for word-based operators "+": " " # space is important, so that and empty string doesn't slip through generic } # in the following, if lists are empty use non-printable character # everything needs to be escaped before going into the grammar, in case it includes quotes sub_names_list = [re.escape(x) for x in subscript_dict.keys()] or ['\\a'] sub_elems_list = [ re.escape(y) for x in subscript_dict.values() for y in x ] or ['\\a'] ids_list = [re.escape(x) for x in namespace.keys()] or ['\\a'] in_ops_list = [re.escape(x) for x in in_ops.keys()] pre_ops_list = [re.escape(x) for x in pre_ops.keys()] if macro_list is not None and len(macro_list) > 0: macro_names_list = [re.escape(x['name']) for x in macro_list] else: macro_names_list = ['\\a'] expression_grammar = r""" expr_type = array / expr / empty expr = _ pre_oper? _ (lookup_def / build_call / macro_call / call / lookup_call / parens / number / reference) _ (in_oper _ expr)? lookup_def = ~r"(WITH\ LOOKUP)"I _ "(" _ expr _ "," _ "(" _ ("[" ~r"[^\]]*" "]" _ ",")? ( "(" _ expr _ "," _ expr _ ")" _ ","? _ )+ _ ")" _ ")" lookup_call = id _ "(" _ (expr _ ","? _)* ")" # these don't need their args parsed... call = func _ "(" _ (expr _ ","? _)* ")" # these don't need their args parsed... build_call = builder _ "(" _ arguments _ ")" macro_call = macro _ "(" _ arguments _ ")" parens = "(" _ expr _ ")" arguments = (expr _ ","? _)* reference = id _ subscript_list? subscript_list = "[" _ ((sub_name / sub_element) _ ","? _)+ "]" array = (number _ ("," / ";")? _)+ !~r"." # negative lookahead for anything other than an array number = ~r"\d+\.?\d*(e[+-]\d+)?" id = ( basic_id / escape_group ) basic_id = ~r"\w[\w\d_\s\']*"IU escape_group = "\"" ( "\\\"" / ~r"[^\"]"IU )* "\"" sub_name = ~r"(%(sub_names)s)"IU # subscript names (if none, use non-printable character) sub_element = ~r"(%(sub_elems)s)"IU # subscript elements (if none, use non-printable character) func = ~r"(%(funcs)s)"IU # functions (case insensitive) in_oper = ~r"(%(in_ops)s)"IU # infix operators (case insensitive) pre_oper = ~r"(%(pre_ops)s)"IU # prefix operators (case insensitive) builder = ~r"(%(builders)s)"IU # builder functions (case insensitive) macro = ~r"(%(macros)s)"IU # macros from model file (if none, use non-printable character) _ = ~r"[\s\\]*" # whitespace character empty = "" # empty string """ % { # In the following, we have to sort keywords in decreasing order of length so that the # peg parser doesn't quit early when finding a partial keyword 'sub_names': '|'.join(reversed(sorted(sub_names_list, key=len))), 'sub_elems': '|'.join(reversed(sorted(sub_elems_list, key=len))), 'funcs': '|'.join(reversed(sorted(functions.keys(), key=len))), 'in_ops': '|'.join(reversed(sorted(in_ops_list, key=len))), 'pre_ops': '|'.join(reversed(sorted(pre_ops_list, key=len))), 'builders': '|'.join(reversed(sorted(builders.keys(), key=len))), 'macros': '|'.join(reversed(sorted(macro_names_list, key=len))) } class ExpressionParser(parsimonious.NodeVisitor): # Todo: at some point, we could make the 'kind' identification recursive on expression, # so that if an expression is passed into a builder function, the information # about whether it is a constant, or calls another function, goes with it. def __init__(self, ast): self.translation = "" self.kind = 'constant' # change if we reference anything else self.new_structure = [] self.visit(ast) def visit_expr_type(self, n, vc): s = ''.join(filter(None, vc)).strip() self.translation = s def visit_expr(self, n, vc): s = ''.join(filter(None, vc)).strip() self.translation = s return s def visit_call(self, n, vc): self.kind = 'component' function_name = vc[0].lower() arguments = [e.strip() for e in vc[4].split(",")] return builder.build_function_call(functions[function_name], arguments) def visit_in_oper(self, n, vc): return in_ops[n.text.lower()] def visit_pre_oper(self, n, vc): return pre_ops[n.text.lower()] def visit_reference(self, n, vc): self.kind = 'component' id_str = vc[0] return id_str + '()' def visit_id(self, n, vc): return namespace[n.text.strip()] def visit_lookup_def(self, n, vc): """ This exists because vensim has multiple ways of doing lookups. Which is frustrating.""" x_val = vc[4] pairs = vc[11] mixed_list = pairs.replace('(', '').replace(')', '').split(',') xs = mixed_list[::2] ys = mixed_list[1::2] string = "functions.lookup(%(x)s, [%(xs)s], [%(ys)s])" % { 'x': x_val, 'xs': ','.join(xs), 'ys': ','.join(ys) } return string def visit_array(self, n, vc): if 'subs' in element and element[ 'subs']: # first test handles when subs is not defined coords = utils.make_coord_dict(element['subs'], subscript_dict, terse=False) dims = [ utils.find_subscript_name(subscript_dict, sub) for sub in element['subs'] ] shape = [len(coords[dim]) for dim in dims] if ';' in n.text or ',' in n.text: text = n.text.strip(';').replace(' ', '').replace(';', ',') data = np.array([float(s) for s in text.split(',')]).reshape(shape) else: data = np.tile(float(n.text), shape) datastr = np.array2string(data, separator=',').replace( '\n', '').replace(' ', '') return textwrap.dedent("""\ xr.DataArray(data=%(datastr)s, coords=%(coords)s, dims=%(dims)s )""" % { 'datastr': datastr, 'coords': repr(coords), 'dims': repr(dims) }) else: return n.text.replace(' ', '') def visit_subscript_list(self, n, vc): refs = vc[2] subs = [x.strip() for x in refs.split(',')] coordinates = utils.make_coord_dict(subs, subscript_dict) if len(coordinates): return '.loc[%s]' % repr(coordinates) else: return ' ' def visit_build_call(self, n, vc): call = vc[0] arglist = vc[4] self.kind = 'component' builder_name = call.strip().lower() name, structure = builders[builder_name](element, subscript_dict, arglist) self.new_structure += structure if builder_name == 'delay fixed': warnings.warn( "Delay fixed only approximates solution, may not give the same " "result as vensim") return name def visit_macro_call(self, n, vc): call = vc[0] arglist = vc[4] self.kind = 'component' py_name = utils.make_python_identifier(call)[0] macro = [x for x in macro_list if x['py_name'] == py_name][0] # should match once name, structure = builder.add_macro(macro['py_name'], macro['file_name'], macro['params'], arglist) self.new_structure += structure return name def visit_arguments(self, n, vc): arglist = [x.strip(',') for x in vc] return arglist def visit__(self, n, vc): """ Handles whitespace characters""" return '' def visit_empty(self, n, vc): return 'None' def generic_visit(self, n, vc): return ''.join(filter(None, vc)) or n.text parser = parsimonious.Grammar(expression_grammar) tree = parser.parse(element['expr']) parse_object = ExpressionParser(tree) return ({ 'py_expr': parse_object.translation, 'kind': parse_object.kind, 'arguments': '' }, parse_object.new_structure)
def get_equation_components(equation_str, root_path=None): """ Breaks down a string representing only the equation part of a model element. Recognizes the various types of model elements that may exist, and identifies them. Parameters ---------- equation_str : basestring the first section in each model element - the full equation. root_path: basestring the root path of the vensim file (necessary to resolve external data file paths) Returns ------- Returns a dictionary containing the following: real_name: basestring The name of the element as given in the original vensim file subs: list of strings list of subscripts or subscript elements expr: basestring kind: basestring What type of equation have we found? - *component* - normal model expression or constant - *lookup* - a lookup table - *subdef* - a subscript definition - *data* - a data variable keyword: basestring or None Examples -------- >>> get_equation_components(r'constant = 25') {'expr': '25', 'kind': 'component', 'subs': [], 'real_name': 'constant'} Notes ----- in this function we don't create python identifiers, we use real names. This is so that when everything comes back together, we can manage any potential namespace conflicts properly """ component_structure_grammar = _include_common_grammar(r""" entry = component / data_definition / test_definition / subscript_definition / lookup_definition component = name _ subscriptlist? _ "=" "="? _ expression subscript_definition = name _ ":" _ (imported_subscript / literal_subscript) data_definition = name _ subscriptlist? _ keyword? _ ":=" _ expression lookup_definition = name _ subscriptlist? &"(" _ expression # uses lookahead assertion to capture whole group test_definition = name _ subscriptlist? _ &keyword _ expression name = basic_id / escape_group literal_subscript = subscript _ ("," _ subscript _)* imported_subscript = func _ "(" _ (string _ ","? _)* ")" subscriptlist = '[' _ subscript _ ("," _ subscript _)* _ ']' expression = ~r".*" # expression could be anything, at this point. keyword = ":" _ basic_id _ ":" subscript = basic_id / escape_group func = basic_id string = "\'" ( "\\\'" / ~r"[^\']"IU )* "\'" """ ) # replace any amount of whitespace with a single space equation_str = equation_str.replace('\\t', ' ') equation_str = re.sub(r"\s+", ' ', equation_str) parser = parsimonious.Grammar(component_structure_grammar) tree = parser.parse(equation_str) class ComponentParser(parsimonious.NodeVisitor): def __init__(self, ast): self.subscripts = [] self.real_name = None self.expression = None self.kind = None self.keyword = None self.visit(ast) def visit_subscript_definition(self, n, vc): self.kind = 'subdef' def visit_lookup_definition(self, n, vc): self.kind = 'lookup' def visit_component(self, n, vc): self.kind = 'component' def visit_data_definition(self, n, vc): self.kind = 'data' def visit_test_definition(self, n, vc): self.kind = 'test' def visit_keyword(self, n, vc): self.keyword = n.text.strip() def visit_imported_subscript(self, n, vc): f_str = vc[0] args_str = vc[4] # todo: make this less fragile? self.subscripts += get_external_data(f_str, args_str, root_path) def visit_name(self, n, vc): (name,) = vc self.real_name = name.strip() def visit_subscript(self, n, vc): (subscript,) = vc self.subscripts.append(subscript.strip()) def visit_expression(self, n, vc): self.expression = n.text.strip() def generic_visit(self, n, vc): return ''.join(filter(None, vc)) or n.text def visit__(self, n, vc): return ' ' parse_object = ComponentParser(tree) return {'real_name': parse_object.real_name, 'subs': parse_object.subscripts, 'expr': parse_object.expression, 'kind': parse_object.kind, 'keyword': parse_object.keyword}
JMPInstruction, CMPInstruction, SLTInstruction, DATInstruction) REDCODE_GRAMMAR = parsimonious.Grammar(""" line = ws instruction? ws comment? ws comment = ";" ~".*" sep = ws "," ws ws = ~"\\s*" instruction = MOV / ADD / JMP / CMP / SLT / DAT MOV = "MOV" ws param sep param ADD = "ADD" ws param sep param JMP = "JMP" ws param CMP = "CMP" ws param sep param SLT = "SLT" ws param sep param DAT = "DAT" ws param sep param param = direct / immediate / b_indirect direct = "$"? number immediate = "#" number b_indirect = "@" number number = ~"-?[0-9]+" """) class CompilerVisitor(NodeVisitor):
def get_model_elements(model_str): """ Takes in a string representing model text and splits it into elements I think we're making the assumption that all newline characters are removed... Parameters ---------- model_str : string Returns ------- entries : array of dictionaries Each dictionary contains the components of a different model element, separated into the equation, units, and docstring. Examples -------- # Basic Parsing: >>> get_model_elements(r'a~b~c| d~e~f| g~h~i|') [{'doc': 'c', 'unit': 'b', 'eqn': 'a'}, {'doc': 'f', 'unit': 'e', 'eqn': 'd'}, {'doc': 'i', 'unit': 'h', 'eqn': 'g'}] # Special characters are escaped within double-quotes: >>> get_model_elements(r'a~b~c| d~e"~"~f| g~h~i|') [{'doc': 'c', 'unit': 'b', 'eqn': 'a'}, {'doc': 'f', 'unit': 'e"~"', 'eqn': 'd'}, {'doc': 'i', 'unit': 'h', 'eqn': 'g'}] >>> get_model_elements(r'a~b~c| d~e~"|"f| g~h~i|') [{'doc': 'c', 'unit': 'b', 'eqn': 'a'}, {'doc': '"|"f', 'unit': 'e', 'eqn': 'd'}, {'doc': 'i', 'unit': 'h', 'eqn': 'g'}] # Double-quotes within escape groups are themselves escaped with backslashes: >>> get_model_elements(r'a~b~c| d~e"\\\"~"~f| g~h~i|') [{'doc': 'c', 'unit': 'b', 'eqn': 'a'}, {'doc': 'f', 'unit': 'e"\\\\"~"', 'eqn': 'd'}, {'doc': 'i', 'unit': 'h', 'eqn': 'g'}] >>> get_model_elements(r'a~b~c| d~e~"\\\"|"f| g~h~i|') [{'doc': 'c', 'unit': 'b', 'eqn': 'a'}, {'doc': '"\\\\"|"f', 'unit': 'e', 'eqn': 'd'}, {'doc': 'i', 'unit': 'h', 'eqn': 'g'}] >>> get_model_elements(r'a~b~c| d~e"x\\nx"~f| g~h~|') [{'doc': 'c', 'unit': 'b', 'eqn': 'a'}, {'doc': 'f', 'unit': 'e"x\\\\nx"', 'eqn': 'd'}, {'doc': '', 'unit': 'h', 'eqn': 'g'}] # Todo: Handle model-level or section-level documentation >>> get_model_elements(r'*** .model doc ***~ Docstring!| d~e~f| g~h~i|') [{'doc': 'Docstring!', 'unit': '', 'eqn': ''}, {'doc': 'f', 'unit': 'e', 'eqn': 'd'}, {'doc': 'i', 'unit': 'h', 'eqn': 'g'}] # Handle control sections, returning appropriate docstring pieces >>> get_model_elements(r'a~b~c| ****.Control***~ Simulation Control Parameters | g~h~i|') [{'doc': 'c', 'unit': 'b', 'eqn': 'a'}, {'doc': 'i', 'unit': 'h', 'eqn': 'g'}] # Handle the model display elements (ignore them) >>> get_model_elements(r'a~b~c| d~e~f| \\\---///junk|junk~junk') [{'doc': 'c', 'unit': 'b', 'eqn': 'a'}, {'doc': 'f', 'unit': 'e', 'eqn': 'd'}] Notes ----- - Tildes and pipes are not allowed in element docstrings, but we should still handle them there """ model_structure_grammar = _include_common_grammar(r""" model = (entry / section)+ sketch? entry = element "~" element "~" element ("~" element)? "|" section = element "~" element "|" sketch = ~r".*" #anything # Either an escape group, or a character that is not tilde or pipe element = ( escape_group / ~r"[^~|]")* """) parser = parsimonious.Grammar(model_structure_grammar) tree = parser.parse(model_str) class ModelParser(parsimonious.NodeVisitor): def __init__(self, ast): self.entries = [] self.visit(ast) def visit_entry(self, n, vc): units, lims = parse_units(vc[2].strip()) self.entries.append({'eqn': vc[0].strip(), 'unit': units, 'lims': str(lims), 'doc': vc[4].strip(), 'kind': 'entry'}) def visit_section(self, n, vc): if vc[2].strip() != "Simulation Control Parameters": self.entries.append({'eqn': '', 'unit': '', 'lims': '', 'doc': vc[2].strip(), 'kind': 'section'}) def generic_visit(self, n, vc): return ''.join(filter(None, vc)) or n.text or '' return ModelParser(tree).entries
def __init__(self, grammar, infilename): self.filename = infilename[:-4] + '.py' self.builder = builder.Builder(self.filename) self.grammar = parsimonious.Grammar(grammar) self.parse(infilename)
def _get_grammar() -> parsimonious.Grammar: with open(os.path.join(os.path.dirname(__file__), "grammar.parsimonious")) as _grammar_file: return parsimonious.Grammar(_grammar_file.read()) # type: ignore
grammar = parsimonious.Grammar(r""" type = types optional? types = basic_type / alias_type / container_type / tuple_type / array_type container_type = container_types optional? arrlist? container_types = zero_container / non_zero_container tuple_type = type const_arr optional? array_type = type dynam_arr optional? non_zero_container = "{" type next_type* "}" next_type = "," type zero_container = "{}" optional = "?" basic_type = basic_types optional? arrlist? basic_types = integer_types / bit_type bit_type = "bit" integer_types = base_integer_type bit_size bit_size = ~"[1-9][0-9]*" base_integer_type = "uint" / "scalar" alias_type = alias_types optional? arrlist? alias_types = bool_type / bytesN_type / bytes_type / byte_type bytesN_type = bytes_type digits bool_type = "bool" bytes_type = "bytes" byte_type = "byte" arrlist = dynam_arr / const_arr dynam_arr = dynam_arr_comp any_arr_comp* const_arr = const_arr_comp any_arr_comp* any_arr_comp = (const_arr_comp / dynam_arr_comp)* dynam_arr_comp = "[]" const_arr_comp = "[" digits "]" digits = ~"[1-9][0-9]*" """)