float_number = Combine(integer + Optional(point + Optional(number))).setParseAction( lambda t: float(t[0])) # PV names according to app developer guide and tech-talk email thread at: # https://epics.anl.gov/tech-talk/2019/msg01429.php pv_name = Combine( Word(alphanums + '_-+:[]<>;{}') + Optional(Combine('.') + Word(printables))) pv_value = (float_number | Word(printables)) pv_assignment = pv_name + pv_value comment = Literal("#") + Regex(r".*") macro = Group(Word(alphas) + Literal("=").suppress() + pv_name) macros = Optional(macro + ZeroOrMore(Word(";,").suppress() + macro)) #file_include = Literal("file") + pv_name + macros file_include = Literal("file") + \ (file_name | ignored_quote + file_name + ignored_quote) \ + Optional(ignored_comma) + macros def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress() req_line = line(file_include | comment.suppress() | pv_name) req_file = OneOrMore(req_line) + StringEnd().suppress()
#Grab the '&', '*' or '**' type bit in (const QString & foo, int ** bar) pointer_or_reference = oneOf('* & &&') #The '=QString()' or '=false' bit in (int foo = 4, bool bar = false) default_value = Literal('=') + OneOrMore(number | quotedString | input_type | parentheses_pair | angle_bracket_pair | square_bracket_pair | brace_pair | Word('|&^')) #A combination building up the interesting bit -- the argument type, e.g. 'const QString &', 'int' or 'char*' argument_type = Optional(qualifier, default='')("qualifier") + \ input_type("input_type") + \ Optional(pointer_or_reference, default='')("pointer_or_reference1") + \ Optional('const')('const_pointer_or_reference') + \ Optional(pointer_or_reference, default='')("pointer_or_reference2") + \ Optional('...')('variadic_parameter_pack') #Argument + variable name + default argument = Group(argument_type('argument_type') + Optional(input_name) + Optional(default_value)) #List of arguments in parentheses with an optional 'const' on the end arglist = LPAR + Optional(delimitedList(argument)('arg_list') + Optional(COMMA + '...')('var_args')) + RPAR def normalise(symbol): """ Takes a c++ symbol or funtion and splits it into symbol and a normalised argument list. :Parameters: symbol : string A C++ symbol or function definition like ``PolyVox::Volume``, ``Volume::printAll() const`` :return: a tuple consisting of two strings: ``(qualified function name or symbol, normalised argument list)`` """
def usfmToken(key): return Group(Suppress(backslash) + Literal(key) + Suppress(White()))
class ExplicitStateUpdater(StateUpdateMethod): ''' An object that can be used for defining state updaters via a simple description (see below). Resulting instances can be passed to the ``method`` argument of the `NeuronGroup` constructor. As other state updater functions the `ExplicitStateUpdater` objects are callable, returning abstract code when called with an `Equations` object. A description of an explicit state updater consists of a (multi-line) string, containing assignments to variables and a final "x_new = ...", stating the integration result for a single timestep. The assignments can be used to define an arbitrary number of intermediate results and can refer to ``f(x, t)`` (the function being integrated, as a function of ``x``, the previous value of the state variable and ``t``, the time) and ``dt``, the size of the timestep. For example, to define a Runge-Kutta 4 integrator (already provided as `rk4`), use:: k1 = dt*f(x,t) k2 = dt*f(x+k1/2,t+dt/2) k3 = dt*f(x+k2/2,t+dt/2) k4 = dt*f(x+k3,t+dt) x_new = x+(k1+2*k2+2*k3+k4)/6 Note that for stochastic equations, the function `f` only corresponds to the non-stochastic part of the equation. The additional function `g` corresponds to the stochastic part that has to be multiplied with the stochastic variable xi (a standard normal random variable -- if the algorithm needs a random variable with a different variance/mean you have to multiply/add it accordingly). Equations with more than one stochastic variable do not have to be treated differently, the part referring to ``g`` is repeated for all stochastic variables automatically. Stochastic integrators can also make reference to ``dW`` (a normal distributed random number with variance ``dt``) and ``g(x, t)``, the stochastic part of an equation. A stochastic state updater could therefore use a description like:: x_new = x + dt*f(x,t) + g(x, t) * dW For simplicity, the same syntax is used for state updaters that only support additive noise, even though ``g(x, t)`` does not depend on ``x`` or ``t`` in that case. There a some restrictions on the complexity of the expressions (but most can be worked around by using intermediate results as in the above Runge- Kutta example): Every statement can only contain the functions ``f`` and ``g`` once; The expressions have to be linear in the functions, e.g. you can use ``dt*f(x, t)`` but not ``f(x, t)**2``. Parameters ---------- description : str A state updater description (see above). stochastic : {None, 'additive', 'multiplicative'} What kind of stochastic equations this state updater supports: ``None`` means no support of stochastic equations, ``'additive'`` means only equations with additive noise and ``'multiplicative'`` means supporting arbitrary stochastic equations. Raises ------ ValueError If the parsing of the description failed. Notes ----- Since clocks are updated *after* the state update, the time ``t`` used in the state update step is still at its previous value. Enumerating the states and discrete times, ``x_new = x + dt*f(x, t)`` is therefore understood as :math:`x_{i+1} = x_i + dt f(x_i, t_i)`, yielding the correct forward Euler integration. If the integrator has to refer to the time at the end of the timestep, simply use ``t + dt`` instead of ``t``. See also -------- euler, rk2, rk4, milstein ''' #=========================================================================== # Parsing definitions #=========================================================================== #: Legal names for temporary variables TEMP_VAR = ~Literal('x_new') + Word( string.ascii_letters + '_', string.ascii_letters + string.digits + '_').setResultsName('identifier') #: A single expression EXPRESSION = restOfLine.setResultsName('expression') #: An assignment statement STATEMENT = Group(TEMP_VAR + Suppress('=') + EXPRESSION).setResultsName('statement') #: The last line of a state updater description OUTPUT = Group(Suppress(Literal('x_new')) + Suppress('=') + EXPRESSION).setResultsName('output') #: A complete state updater description DESCRIPTION = ZeroOrMore(STATEMENT) + OUTPUT def __init__(self, description, stochastic=None, custom_check=None): self._description = description self.stochastic = stochastic self.custom_check = custom_check try: parsed = ExplicitStateUpdater.DESCRIPTION.parseString( description, parseAll=True) except ParseException as p_exc: ex = SyntaxError('Parsing failed: ' + str(p_exc.msg)) ex.text = str(p_exc.line) ex.offset = p_exc.column ex.lineno = p_exc.lineno raise ex self.statements = [] self.symbols = SYMBOLS.copy() for element in parsed: expression = str_to_sympy(element.expression) # Replace all symbols used in state updater expressions by unique # names that cannot clash with user-defined variables or functions expression = expression.subs(sympy.Function('f'), self.symbols['__f']) expression = expression.subs(sympy.Function('g'), self.symbols['__g']) symbols = list(expression.atoms(sympy.Symbol)) unique_symbols = [] for symbol in symbols: if symbol.name == 'dt': unique_symbols.append(symbol) else: unique_symbols.append(_symbol('__' + symbol.name)) for symbol, unique_symbol in zip(symbols, unique_symbols): expression = expression.subs(symbol, unique_symbol) self.symbols.update( dict(((symbol.name, symbol) for symbol in unique_symbols))) if element.getName() == 'statement': self.statements.append(('__' + element.identifier, expression)) elif element.getName() == 'output': self.output = expression else: raise AssertionError('Unknown element name: %s' % element.getName()) def __repr__(self): # recreate a description string description = '\n'.join( ['%s = %s' % (var, expr) for var, expr in self.statements]) if len(description): description += '\n' description += 'x_new = ' + str(self.output) r = "{classname}('''{description}''', stochastic={stochastic})" return r.format(classname=self.__class__.__name__, description=description, stochastic=repr(self.stochastic)) def __str__(self): s = '%s\n' % self.__class__.__name__ if len(self.statements) > 0: s += 'Intermediate statements:\n' s += '\n'.join([(var + ' = ' + sympy_to_str(expr)) for var, expr in self.statements]) s += '\n' s += 'Output:\n' s += sympy_to_str(self.output) return s def _latex(self, *args): from sympy import latex, Symbol s = [r'\begin{equation}'] for var, expr in self.statements: expr = expr.subs(Symbol('x'), Symbol('x_t')) s.append(latex(Symbol(var)) + ' = ' + latex(expr) + r'\\') expr = self.output.subs(Symbol('x'), 'x_t') s.append(r'x_{t+1} = ' + latex(expr)) s.append(r'\end{equation}') return '\n'.join(s) def _repr_latex_(self): return self._latex() def replace_func(self, x, t, expr, temp_vars, eq_symbols, stochastic_variable=None): ''' Used to replace a single occurance of ``f(x, t)`` or ``g(x, t)``: `expr` is the non-stochastic (in the case of ``f``) or stochastic part (``g``) of the expression defining the right-hand-side of the differential equation describing `var`. It replaces the variable `var` with the value given as `x` and `t` by the value given for `t`. Intermediate variables will be replaced with the appropriate replacements as well. For example, in the `rk2` integrator, the second step involves the calculation of ``f(k/2 + x, dt/2 + t)``. If `var` is ``v`` and `expr` is ``-v / tau``, this will result in ``-(_k_v/2 + v)/tau``. Note that this deals with only one state variable `var`, given as an argument to the surrounding `_generate_RHS` function. ''' try: s_expr = str_to_sympy(str(expr)) except SympifyError as ex: raise ValueError('Error parsing the expression "%s": %s' % (expr, str(ex))) for var in eq_symbols: # Generate specific temporary variables for the state variable, # e.g. '_k_v' for the state variable 'v' and the temporary # variable 'k'. if stochastic_variable is None: temp_var_replacements = dict( ((self.symbols[temp_var], _symbol(temp_var + '_' + var)) for temp_var in temp_vars)) else: temp_var_replacements = dict( ((self.symbols[temp_var], _symbol(temp_var + '_' + var + '_' + stochastic_variable)) for temp_var in temp_vars)) # In the expression given as 'x', replace 'x' by the variable # 'var' and all the temporary variables by their # variable-specific counterparts. x_replacement = x.subs(self.symbols['__x'], eq_symbols[var]) x_replacement = x_replacement.subs(temp_var_replacements) # Replace the variable `var` in the expression by the new `x` # expression s_expr = s_expr.subs(eq_symbols[var], x_replacement) # If the expression given for t in the state updater description # is not just "t" (or rather "__t"), then replace t in the # equations by it, and replace "__t" by "t" afterwards. if t != self.symbols['__t']: s_expr = s_expr.subs(SYMBOLS['t'], t) s_expr = s_expr.replace(self.symbols['__t'], SYMBOLS['t']) return s_expr def _non_stochastic_part(self, eq_symbols, non_stochastic, non_stochastic_expr, stochastic_variable, temp_vars, var): non_stochastic_results = [] if stochastic_variable is None or len(stochastic_variable) == 0: # Replace the f(x, t) part replace_f = lambda x, t: self.replace_func(x, t, non_stochastic, temp_vars, eq_symbols) non_stochastic_result = non_stochastic_expr.replace( self.symbols['__f'], replace_f) # Replace x by the respective variable non_stochastic_result = non_stochastic_result.subs( self.symbols['__x'], eq_symbols[var]) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], _symbol(temp_var + '_' + var)) for temp_var in temp_vars) non_stochastic_result = non_stochastic_result.subs( temp_var_replacements) non_stochastic_results.append(non_stochastic_result) elif isinstance(stochastic_variable, basestring): # Replace the f(x, t) part replace_f = lambda x, t: self.replace_func(x, t, non_stochastic, temp_vars, eq_symbols, stochastic_variable) non_stochastic_result = non_stochastic_expr.replace( self.symbols['__f'], replace_f) # Replace x by the respective variable non_stochastic_result = non_stochastic_result.subs( self.symbols['__x'], eq_symbols[var]) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], _symbol(temp_var + '_' + var + '_' + stochastic_variable)) for temp_var in temp_vars) non_stochastic_result = non_stochastic_result.subs( temp_var_replacements) non_stochastic_results.append(non_stochastic_result) else: # Replace the f(x, t) part replace_f = lambda x, t: self.replace_func(x, t, non_stochastic, temp_vars, eq_symbols) non_stochastic_result = non_stochastic_expr.replace( self.symbols['__f'], replace_f) # Replace x by the respective variable non_stochastic_result = non_stochastic_result.subs( self.symbols['__x'], eq_symbols[var]) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], reduce(operator.add, [ _symbol(temp_var + '_' + var + '_' + xi) for xi in stochastic_variable ])) for temp_var in temp_vars) non_stochastic_result = non_stochastic_result.subs( temp_var_replacements) non_stochastic_results.append(non_stochastic_result) return non_stochastic_results def _stochastic_part(self, eq_symbols, stochastic, stochastic_expr, stochastic_variable, temp_vars, var): stochastic_results = [] if isinstance(stochastic_variable, basestring): # Replace the g(x, t) part replace_f = lambda x, t: self.replace_func( x, t, stochastic.get(stochastic_variable, 0), temp_vars, eq_symbols, stochastic_variable) stochastic_result = stochastic_expr.replace( self.symbols['__g'], replace_f) # Replace x by the respective variable stochastic_result = stochastic_result.subs(self.symbols['__x'], eq_symbols[var]) # Replace dW by the respective variable stochastic_result = stochastic_result.subs(self.symbols['__dW'], stochastic_variable) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], _symbol(temp_var + '_' + var + '_' + stochastic_variable)) for temp_var in temp_vars) stochastic_result = stochastic_result.subs(temp_var_replacements) stochastic_results.append(stochastic_result) else: for xi in stochastic_variable: # Replace the g(x, t) part replace_f = lambda x, t: self.replace_func( x, t, stochastic.get(xi, 0), temp_vars, eq_symbols, xi) stochastic_result = stochastic_expr.replace( self.symbols['__g'], replace_f) # Replace x by the respective variable stochastic_result = stochastic_result.subs( self.symbols['__x'], eq_symbols[var]) # Replace dW by the respective variable stochastic_result = stochastic_result.subs( self.symbols['__dW'], xi) # Replace intermediate variables temp_var_replacements = dict( (self.symbols[temp_var], _symbol(temp_var + '_' + var + '_' + xi)) for temp_var in temp_vars) stochastic_result = stochastic_result.subs( temp_var_replacements) stochastic_results.append(stochastic_result) return stochastic_results def _generate_RHS(self, eqs, var, eq_symbols, temp_vars, expr, non_stochastic_expr, stochastic_expr, stochastic_variable=()): ''' Helper function used in `__call__`. Generates the right hand side of an abstract code statement by appropriately replacing f, g and t. For example, given a differential equation ``dv/dt = -(v + I) / tau`` (i.e. `var` is ``v` and `expr` is ``(-v + I) / tau``) together with the `rk2` step ``return x + dt*f(x + k/2, t + dt/2)`` (i.e. `non_stochastic_expr` is ``x + dt*f(x + k/2, t + dt/2)`` and `stochastic_expr` is ``None``), produces ``v + dt*(-v - _k_v/2 + I + _k_I/2)/tau``. ''' # Note: in the following we are silently ignoring the case that a # state updater does not care about either the non-stochastic or the # stochastic part of an equation. We do trust state updaters to # correctly specify their own abilities (i.e. they do not claim to # support stochastic equations but actually just ignore the stochastic # part). We can't really check the issue here, as we are only dealing # with one line of the state updater description. It is perfectly valid # to write the euler update as: # non_stochastic = dt * f(x, t) # stochastic = dt**.5 * g(x, t) * xi # return x + non_stochastic + stochastic # # In the above case, we'll deal with lines which do not define either # the stochastic or the non-stochastic part. non_stochastic, stochastic = expr.split_stochastic() if non_stochastic_expr is not None: # We do have a non-stochastic part in the state updater description non_stochastic_results = self._non_stochastic_part( eq_symbols, non_stochastic, non_stochastic_expr, stochastic_variable, temp_vars, var) else: non_stochastic_results = [] if not (stochastic is None or stochastic_expr is None): # We do have a stochastic part in the state # updater description stochastic_results = self._stochastic_part(eq_symbols, stochastic, stochastic_expr, stochastic_variable, temp_vars, var) else: stochastic_results = [] RHS = sympy.Number(0) # All the parts (one non-stochastic and potentially more than one # stochastic part) are combined with addition for non_stochastic_result in non_stochastic_results: RHS += non_stochastic_result for stochastic_result in stochastic_results: RHS += stochastic_result return sympy_to_str(RHS) def __call__(self, eqs, variables=None, method_options=None): ''' Apply a state updater description to model equations. Parameters ---------- eqs : `Equations` The equations describing the model variables: dict-like, optional The `Variable` objects for the model. Ignored by the explicit state updater. method_options : dict, optional Additional options to the state updater (not used at the moment for the explicit state updaters). Examples -------- >>> from brian2 import * >>> eqs = Equations('dv/dt = -v / tau : volt') >>> print(euler(eqs)) _v = -dt*v/tau + v v = _v >>> print(rk4(eqs)) __k_1_v = -dt*v/tau __k_2_v = -dt*(0.5*__k_1_v + v)/tau __k_3_v = -dt*(0.5*__k_2_v + v)/tau __k_4_v = -dt*(__k_3_v + v)/tau _v = 0.166666666666667*__k_1_v + 0.333333333333333*__k_2_v + 0.333333333333333*__k_3_v + 0.166666666666667*__k_4_v + v v = _v ''' method_options = extract_method_options(method_options, {}) # Non-stochastic numerical integrators should work for all equations, # except for stochastic equations if eqs.is_stochastic: if self.stochastic is None: raise UnsupportedEquationsException( 'Cannot integrate ' 'stochastic equations with ' 'this state updater.') if (self.stochastic != 'multiplicative' and eqs.stochastic_type == 'multiplicative'): raise UnsupportedEquationsException( 'Cannot integrate ' 'equations with ' 'multiplicative noise with ' 'this state updater.') if self.custom_check: self.custom_check(eqs, variables) # The final list of statements statements = [] stochastic_variables = eqs.stochastic_variables # The variables for the intermediate results in the state updater # description, e.g. the variable k in rk2 intermediate_vars = [var for var, expr in self.statements] # A dictionary mapping all the variables in the equations to their # sympy representations eq_variables = dict(((var, _symbol(var)) for var in eqs.eq_names)) # Generate the random numbers for the stochastic variables for stochastic_variable in stochastic_variables: statements.append(stochastic_variable + ' = ' + 'dt**.5 * randn()') substituted_expressions = eqs.get_substituted_expressions(variables) # Process the intermediate statements in the stateupdater description for intermediate_var, intermediate_expr in self.statements: # Split the expression into a non-stochastic and a stochastic part non_stochastic_expr, stochastic_expr = split_expression( intermediate_expr) # Execute the statement by appropriately replacing the functions f # and g and the variable x for every equation in the model. # We use the model equations where the subexpressions have # already been substituted into the model equations. for var, expr in substituted_expressions: for xi in stochastic_variables: RHS = self._generate_RHS(eqs, var, eq_variables, intermediate_vars, expr, non_stochastic_expr, stochastic_expr, xi) statements.append(intermediate_var + '_' + var + '_' + xi + ' = ' + RHS) if not stochastic_variables: # no stochastic variables RHS = self._generate_RHS(eqs, var, eq_variables, intermediate_vars, expr, non_stochastic_expr, stochastic_expr) statements.append(intermediate_var + '_' + var + ' = ' + RHS) # Process the "return" line of the stateupdater description non_stochastic_expr, stochastic_expr = split_expression(self.output) # Assign a value to all the model variables described by differential # equations for var, expr in substituted_expressions: RHS = self._generate_RHS(eqs, var, eq_variables, intermediate_vars, expr, non_stochastic_expr, stochastic_expr, stochastic_variables) statements.append('_' + var + ' = ' + RHS) # Assign everything to the final variables for var, expr in substituted_expressions: statements.append(var + ' = ' + '_' + var) return '\n'.join(statements)
def _parse_folder_spec(spec, groups, sort_key): """Parse the folder specification into a nested list. Args: spec (str): folder specification groups (dict): map of group name to list of folders in group sort_key (callable): map of folder name to sortable object. Returns: list: list of parsed tokens Raises: ValueError: if `spec` cannot be parsed. """ group_names = list(groups.keys()) def convert_to_slice(parse_string, loc, tokens): """Convert SliceSpec tokens to slice instance.""" parts = "".join(tokens[1:-1]).split(':') if len(parts) == 1: i = int(parts[0]) if i == -1: return slice(i, None, None) else: return slice(i, i + 1, None) else: parts += [''] * (3 - len(parts)) # pad to length 3 start, stop, step = (int(v) if len(v) > 0 else None for v in parts) return slice(start, stop, step) def convert_to_callable_filter(parse_string, loc, tokens): """Convert ConditionSpec to a callable filter. The returned filter takes a single argument `folder` and return True if the `folder` passes the filter. """ op, arg = tokens[0], tokens[1] def _filter(folder, _op, _list): folder = parse_version(folder) _list = [parse_version(v) for v in _list] if _op == 'in': return folder in _list elif _op == 'not in': return folder not in _list elif _op == '<=': return all([folder <= v for v in _list]) elif _op == '<': return all([folder < v for v in _list]) elif _op == '==': return all([folder == v for v in _list]) elif _op == '!=': return all([folder != v for v in _list]) elif _op == '>=': return all([folder >= v for v in _list]) elif _op == '>': return all([folder > v for v in _list]) else: # pragma: nocover raise ValueError("Unknown operator: %r" % _op) if isinstance(arg, str): _list = [arg] else: _list = _resolve_folder_spec([arg.asList()], groups, sort_key=sort_key) return partial(_filter, _op=op, _list=_list) Int = Word(nums + "-", nums) Colon = Literal(':') SliceSpec = ("[" + Optional(Int) + Optional(Colon + Optional(Int)) + Optional(Colon + Optional(Int)) + "]").setParseAction(convert_to_slice) LogicalOperator = (Literal('in') | Literal('not in') | Literal('<=') | Literal('<') | Literal('==') | Literal('!=') | Literal('>=') | Literal('>')) GroupName = Group("<" + oneOf(group_names, caseless=True) + ">") FolderName = Word(alphanums, alphanums + ".-_+") ParenthesizedListSpec = Forward() ConditionSpec = Forward() ParenthesizedListSpec <<= Group("(" + delimitedList(GroupName | FolderName | ParenthesizedListSpec) + ConditionSpec[...] + ")" + Optional(SliceSpec)) ConditionSpec <<= LogicalOperator + (FolderName | GroupName | ParenthesizedListSpec) ConditionSpec = ConditionSpec.setParseAction(convert_to_callable_filter) ListSpec = delimitedList(GroupName | FolderName | ParenthesizedListSpec) Spec = ListSpec | ParenthesizedListSpec if spec.strip() == '': return [] try: return Spec.parseString(spec, parseAll=True).asList() except ParseException as exc: raise ValueError("Invalid specification (marked '*'): %r" % exc.markInputline('*'))
print("tokens.tables =", tokens.tables) print("tokens.where =", tokens.where) except ParseException as err: print(" "*err.loc + "^") print(err) print('') # define SQL tokens selectStmt = Forward() selectToken = Keyword("select", caseless=True) fromToken = Keyword("from", caseless=True) ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = ( delimitedList( ident, ".", combine=True ) ).addParseAction(upcaseTokens) columnNameList = Group( delimitedList( columnName ) ) tableName = ( delimitedList( ident, ".", combine=True ) ).addParseAction(upcaseTokens) tableNameList = Group( delimitedList( tableName ) ) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) +
def _parse_formula(text): """ >>> formula = "p(a,b)" >>> print(parse_string(formula)) ['p', (['a', 'b'], {})] >>> formula = "~p(a,b)" >>> print(parse_string(formula)) ['~','p', (['a', 'b'], {})] >>> formula = "=(a,b)" >>> print(parse_string(formula)) ['=', (['a', 'b'], {})] >>> formula = "<(a,b)" >>> print(parse_string(formula)) ['<', (['a', 'b'], {})] >>> formula = "~p(a)" >>> print(parse_string(formula)) ['~', 'p', (['a'], {})] >>> formula = "~p(a)|a(p)" >>> print(parse_string(formula)) [(['~', 'p', (['a'], {})], {}), '|', (['a', (['p'], {})], {})] >>> formula = "p(a) | p(b)" >>> print(parse_string(formula)) [(['p', (['a'], {})], {}), '|', (['p', (['b'], {})], {})] >>> formula = "~p(a) | p(b)" >>> print(parse_string(formula)) [(['~', 'p', (['a'], {})], {}), '|', (['p', (['b'], {})], {})] >>> formula = "p(f(a)) | p(b)" >>> print(parse_string(formula)) [(['p', ([(['f', (['a'], {})], {})], {})], {}), '|', (['p', (['b'], {})], {})] >>> formula = "p(a) | p(b) | p(c)" >>> print(parse_string(formula)) [(['p', ([(['f', (['a'], {})], {})], {})], {}), '|', (['p', (['b'], {})], {})] """ left_parenthesis, right_parenthesis, colon = map(Suppress, "():") exists = Keyword("exists") forall = Keyword("forall") implies = Literal("->") or_ = Literal("|") and_ = Literal("&") not_ = Literal("~") equiv_ = Literal("%") symbol = Word(alphas + "_" + "?" + ".", alphanums + "_" + "?" + "." + "-") term = Forward() term << (Group(symbol + Group(left_parenthesis + delimitedList(term) + right_parenthesis)) | symbol) pred_symbol = Word(alphas + "_" + ".", alphanums + "_" + "." + "-") | Literal("=") | Literal("<") literal = Forward() literal << (Group(pred_symbol + Group( left_parenthesis + delimitedList(term) + right_parenthesis)) | Group( not_ + pred_symbol + Group(left_parenthesis + delimitedList(term) + right_parenthesis))) formula = Forward() forall_expression = Group(forall + delimitedList(symbol) + colon + formula) exists_expression = Group(exists + delimitedList(symbol) + colon + formula) operand = forall_expression | exists_expression | literal formula << operatorPrecedence(operand, [(not_, 1, opAssoc.RIGHT), (and_, 2, opAssoc.LEFT), (or_, 2, opAssoc.LEFT), (equiv_, 2, opAssoc.RIGHT), (implies, 2, opAssoc.RIGHT)]) result = formula.parseString(text, parseAll=True) return result.asList()[0]
def _make_default_parser(): escapechar = "\\" #wordchars = printables #for specialchar in '*?^():"{}[] ' + escapechar: # wordchars = wordchars.replace(specialchar, "") #wordtext = Word(wordchars) wordtext = CharsNotIn('\\*?^():"{}[] ') escape = Suppress(escapechar) + (Word(printables, exact=1) | White(exact=1)) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word("?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars, or the next token wildstart = wildchars + (OneOrMore(wordtoken + Optional(wildchars)) | FollowedBy(White() | StringEnd())) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") | Literal("{") endfence = Literal("]") | Literal("}") rangeitem = QuotedString('"') | wordtoken openstartrange = Group( Empty()) + Suppress(Keyword("TO") + White()) + Group(rangeitem) openendrange = Group(rangeitem) + Suppress(White() + Keyword("TO")) + Group(Empty()) normalrange = Group(rangeitem) + Suppress(White() + Keyword("TO") + White()) + Group(rangeitem) range = Group(startfence + (normalrange | openstartrange | openendrange) + endfence).setResultsName("Range") # A word-like thing generalWord = range | wildcard | plainWord # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group(boostableUnit + Suppress("^") + Word("0123456789", ".0123456789")).setResultsName( "Boost") # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group(Word(alphanums + "_") + Suppress(':') + fieldableUnit).setResultsName("Field") # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress(Keyword("not", caseless=True)) + Suppress(White()) + unit).setResultsName("Not") generalUnit = operatorNot | unit andToken = Keyword("AND", caseless=False) orToken = Keyword("OR", caseless=False) andNotToken = Keyword("ANDNOT", caseless=False) operatorAnd = Group(generalUnit + Suppress(White()) + Suppress(andToken) + Suppress(White()) + expression).setResultsName("And") operatorOr = Group(generalUnit + Suppress(White()) + Suppress(orToken) + Suppress(White()) + expression).setResultsName("Or") operatorAndNot = Group(unit + Suppress(White()) + Suppress(andNotToken) + Suppress(White()) + unit).setResultsName("AndNot") expression << (OneOrMore(operatorAnd | operatorOr | operatorAndNot | generalUnit | Suppress(White())) | Empty()) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def input_from_blif(blif, block=None, merge_io_vectors=True, clock_name='clk'): """ Read an open blif file or string as input, updating the block appropriately Assumes the blif has been flattened and their is only a single module. Assumes that there is only one single shared clock and reset Assumes that output is generated by Yosys with formals in a particular order Ignores reset signal (which it assumes is input only to the flip flops) """ import pyparsing import six from pyparsing import (Word, Literal, OneOrMore, ZeroOrMore, Suppress, Group, Keyword, Optional, oneOf) block = working_block(block) try: blif_string = blif.read() except AttributeError: if isinstance(blif, six.string_types): blif_string = blif else: raise PyrtlError('input_blif expecting either open file or string') def SKeyword(x): return Suppress(Keyword(x)) def SLiteral(x): return Suppress(Literal(x)) def twire(x): """ find or make wire named x and return it """ s = block.get_wirevector_by_name(x) if s is None: s = WireVector(bitwidth=1, name=x) if isinstance(s, Output) and (merge_io_vectors or len(x) == 1): # To allow an output wire to be used as an argument (legal in BLIF), # use the intermediate wire that was created in its place. extract_outputs() # creates this intermediate wire. s = block.get_wirevector_by_name(x + '[0]') return s # Begin BLIF language definition signal_start = pyparsing.alphas + r'$:[]_<>\\\/?' signal_middle = pyparsing.alphas + pyparsing.nums + r'$:[]_<>\\\/.?' signal_id = Word(signal_start, signal_middle) header = SKeyword('.model') + signal_id('model_name') input_list = Group(SKeyword('.inputs') + OneOrMore(signal_id))('input_list') output_list = Group(SKeyword('.outputs') + OneOrMore(signal_id))('output_list') cover_atom = Word('01-') cover_list = Group(ZeroOrMore(cover_atom))('cover_list') namesignal_list = Group(OneOrMore(signal_id))('namesignal_list') name_def = Group(SKeyword('.names') + namesignal_list + cover_list)('name_def') # asynchronous Flip-flop dffas_formal = (SLiteral('C=') + signal_id('C') + SLiteral('R=') + signal_id('R') + SLiteral('D=') + signal_id('D') + SLiteral('Q=') + signal_id('Q')) dffas_keyword = SKeyword('$_DFF_PN0_') | SKeyword('$_DFF_PP0_') dffas_def = Group(SKeyword('.subckt') + dffas_keyword + dffas_formal)('dffas_def') # synchronous Flip-flop dffs_init_val = Optional(oneOf("0 1 2 3"), default=Literal("0")) # TODO I think <type> and <control> ('re' and 'C') below are technically optional too dffs_def = Group(SKeyword('.latch') + signal_id('D') + signal_id('Q') + SLiteral('re') + signal_id('C') + dffs_init_val('I'))('dffs_def') command_def = name_def | dffas_def | dffs_def command_list = Group(OneOrMore(command_def))('command_list') footer = SKeyword('.end') model_def = Group(header + input_list + output_list + command_list + footer) model_list = OneOrMore(model_def) parser = model_list.ignore(pyparsing.pythonStyleComment) # Begin actually reading and parsing the BLIF file result = parser.parseString(blif_string, parseAll=True) # Blif file with multiple models (currently only handles one flattened models) assert(len(result) == 1) clk_set = set([]) ff_clk_set = set([]) def extract_inputs(model): start_names = [re.sub(r'\[([0-9]+)\]$', '', x) for x in model['input_list']] name_counts = collections.Counter(start_names) for input_name in name_counts: bitwidth = name_counts[input_name] if input_name == clock_name: clk_set.add(input_name) elif not merge_io_vectors or bitwidth == 1: block.add_wirevector(Input(bitwidth=1, name=input_name)) else: wire_in = Input(bitwidth=bitwidth, name=input_name, block=block) for i in range(bitwidth): bit_name = input_name + '[' + str(i) + ']' bit_wire = WireVector(bitwidth=1, name=bit_name, block=block) bit_wire <<= wire_in[i] def extract_outputs(model): start_names = [re.sub(r'\[([0-9]+)\]$', '', x) for x in model['output_list']] name_counts = collections.Counter(start_names) for output_name in name_counts: bitwidth = name_counts[output_name] if not merge_io_vectors or bitwidth == 1: # To allow an output wire to be used as an argument (legal in BLIF), # create an intermediate wire that will be used in its place. twire() # checks for this and uses the intermediate wire when needed w = WireVector(bitwidth=1, name=output_name + '[0]') out = Output(bitwidth=1, name=output_name) out <<= w else: wire_out = Output(bitwidth=bitwidth, name=output_name, block=block) bit_list = [] for i in range(bitwidth): bit_name = output_name + '[' + str(i) + ']' bit_wire = WireVector(bitwidth=1, name=bit_name, block=block) bit_list.append(bit_wire) wire_out <<= concat_list(bit_list) def extract_commands(model): # for each "command" (dff or net) in the model for command in model['command_list']: # if it is a net (specified as a cover) if command.getName() == 'name_def': extract_cover(command) # else if the command is a d flop flop elif command.getName() == 'dffas_def' or command.getName() == 'dffs_def': extract_flop(command) else: raise PyrtlError('unknown command type') def extract_cover(command): # pylint: disable=invalid-unary-operand-type netio = command['namesignal_list'] if len(command['cover_list']) == 0: output_wire = twire(netio[0]) output_wire <<= Const(0, bitwidth=1, block=block) # const "FALSE" elif command['cover_list'].asList() == ['1']: output_wire = twire(netio[0]) output_wire <<= Const(1, bitwidth=1, block=block) # const "TRUE" elif command['cover_list'].asList() == ['1', '1']: # Populate clock list if one input is already a clock if(netio[1] in clk_set): clk_set.add(netio[0]) elif(netio[0] in clk_set): clk_set.add(netio[1]) else: output_wire = twire(netio[1]) output_wire <<= twire(netio[0]) # simple wire elif command['cover_list'].asList() == ['0', '1']: output_wire = twire(netio[1]) output_wire <<= ~ twire(netio[0]) # not gate elif command['cover_list'].asList() == ['11', '1']: output_wire = twire(netio[2]) output_wire <<= twire(netio[0]) & twire(netio[1]) # and gate elif command['cover_list'].asList() == ['00', '1']: output_wire = twire(netio[2]) output_wire <<= ~ (twire(netio[0]) | twire(netio[1])) # nor gate elif command['cover_list'].asList() == ['1-', '1', '-1', '1']: output_wire = twire(netio[2]) output_wire <<= twire(netio[0]) | twire(netio[1]) # or gate elif command['cover_list'].asList() == ['10', '1', '01', '1']: output_wire = twire(netio[2]) output_wire <<= twire(netio[0]) ^ twire(netio[1]) # xor gate elif command['cover_list'].asList() == ['1-0', '1', '-11', '1']: output_wire = twire(netio[3]) output_wire <<= (twire(netio[0]) & ~ twire(netio[2])) \ | (twire(netio[1]) & twire(netio[2])) # mux elif command['cover_list'].asList() == ['-00', '1', '0-0', '1']: output_wire = twire(netio[3]) output_wire <<= (~twire(netio[1]) & ~twire(netio[2])) \ | (~twire(netio[0]) & ~twire(netio[2])) else: raise PyrtlError('Blif file with unknown logic cover set "%s"' '(currently gates are hard coded)' % command['cover_list']) def extract_flop(command): if(command['C'] not in ff_clk_set): ff_clk_set.add(command['C']) # Create register and assign next state to D and output to Q regname = command['Q'] + '_reg' flop = Register(bitwidth=1, name=regname) flop.next <<= twire(command['D']) flop_output = twire(command['Q']) init_val = command['I'] if init_val == "1": # e.g. in Verilog: `initial reg <= 1;` raise PyrtlError("Initializing latches to 1 is not supported. " "Acceptable values are: 0, 2 (don't care), and 3 (unknown); " "in any case, PyRTL will ensure all stateful elements come up 0. " "For finer control over the initial value, use specialized reset " "logic.") flop_output <<= flop for model in result: extract_inputs(model) extract_outputs(model) extract_commands(model)
matchStmt = Forward() relExpr = Forward() whereExpr = Forward() matchToken = Keyword("match", caseless=True) returnToken = Keyword("return", caseless=True) pathToken = Keyword("path", caseless=True) whereToken = Keyword("where", caseless=True) ident = Word(alphas, alphanums).setName("identifier") prop = Word(alphas, alphanums).setName("property") binOp = oneOf("== != >= <= > < =~") trOp = oneOf("- -> -> <- <-") intNum = Word(nums) whereVal = quotedString | intNum whereCondition = Group((ident) + '.' + prop + binOp + whereVal) whereExpr << whereToken + whereCondition.setResultsName("whereCond") relExpr << '[' + Optional( (ident ).setResultsName("relVar")) + ':' + ident.setResultsName("relName") + ']' matchStmt << (matchToken + '(' + (ident).setResultsName("srcNode") + ')' + Optional( trOp.setResultsName("firstTrOp") + Optional(relExpr) + trOp.setResultsName("secondTrOp") + '(' + ident.setResultsName("dstNode") + ')') + Optional(whereExpr) + returnToken + ((ident) + Optional('.' + prop)).setResultsName("returnData"))
# # A partial implementation of a parser of Excel formula expressions. # from pyparsing import (CaselessKeyword, Suppress, Word, alphas, alphanums, nums, Optional, Group, oneOf, Forward, Regex, infixNotation, opAssoc, dblQuotedString, delimitedList, Combine, Literal, QuotedString, ParserElement) ParserElement.enablePackrat() EQ, EXCL, LPAR, RPAR, COLON, COMMA = map(Suppress, '=!():,') EXCL, DOLLAR = map(Literal, "!$") sheetRef = Word(alphas, alphanums) | QuotedString("'", escQuote="''") colRef = Optional(DOLLAR) + Word(alphas, max=2) rowRef = Optional(DOLLAR) + Word(nums) cellRef = Combine( Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row"))) cellRange = (Group(cellRef("start") + COLON + cellRef("end"))("range") | cellRef | Word(alphas, alphanums)) expr = Forward() COMPARISON_OP = oneOf("< = > >= <= != <>") condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") + LPAR + Group(condExpr)("condition") + COMMA + Group(expr)("if_true") + COMMA + Group(expr)("if_false") + RPAR) statFunc = lambda name: Group( CaselessKeyword(name) + Group(LPAR + delimitedList(expr) + RPAR)) sumFunc = statFunc("sum")
def create_bnf(stack): point = Literal(".") e = CaselessLiteral("E") inumber = Word(nums) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) _of = Literal('of') _in = Literal('in') _by = Literal('by') _copy = Literal('copy') _mv = Literal('-v').setParseAction(replace('OA_SubV')) _me = Literal('-e').setParseAction(replace('OA_SubE')) _mf = Literal('-f').setParseAction(replace('OA_SubF')) _mc = Literal('-c').setParseAction(replace('OA_SubC')) _ms = Literal('-s').setParseAction(replace('OA_SubS')) _pv = Literal('+v').setParseAction(replace('OA_AddV')) _pe = Literal('+e').setParseAction(replace('OA_AddE')) _pf = Literal('+f').setParseAction(replace('OA_AddF')) _pc = Literal('+c').setParseAction(replace('OA_AddC')) _ps = Literal('+s').setParseAction(replace('OA_AddS')) _inv = Literal('*v').setParseAction(replace('OA_IntersectV')) _ine = Literal('*e').setParseAction(replace('OA_IntersectE')) _inf = Literal('*f').setParseAction(replace('OA_IntersectF')) _inc = Literal('*c').setParseAction(replace('OA_IntersectC')) _ins = Literal('*s').setParseAction(replace('OA_IntersectS')) regop = (_mv | _me | _mf | _mc | _ms | _pv | _pe | _pf | _pc | _ps | _inv | _ine | _inf | _inc | _ins) lpar = Literal("(").suppress() rpar = Literal(")").suppress() _all = Literal('all').setParseAction(replace('KW_All')) vertex = Literal('vertex') vertices = Literal('vertices') cell = Literal('cell') cells = Literal('cells') group = Literal('group') _set = Literal('set') surface = Literal('surface') ident = Word(alphas + '_.', alphanums + '_.') set_name = Word(nums) | ident function = Word(alphas + '_', alphanums + '_') function = Group(function).setParseAction(join_tokens) region = Combine( Literal('r.') + Word(alphas + '_', '_' + alphas + nums + '.')) region = Group(Optional(_copy, default='nocopy') + region) region.setParseAction(replace('KW_Region', keep=True)) coor = oneOf('x y z') boolop = oneOf('& |') relop = oneOf('< > <= >= != ==') bool_term = (ZeroOrMore('(') + (coor | fnumber) + relop + (coor | fnumber) + ZeroOrMore(')')) relation = Forward() relation << (ZeroOrMore('(') + bool_term + ZeroOrMore(boolop + relation) + ZeroOrMore(')')) relation = Group(relation).setParseAction(join_tokens) nos = Group(vertices + _of + surface).setParseAction(replace('E_VOS')) nir = Group(vertices + _in + relation).setParseAction( replace('E_VIR', keep=True)) nbf = Group(vertices + _by + function).setParseAction( replace('E_VBF', keep=True)) ebf = Group(cells + _by + function).setParseAction( replace('E_CBF', keep=True)) eog = Group(cells + _of + group + Word(nums)).setParseAction( replace('E_COG', keep=True)) nog = Group(vertices + _of + group + Word(nums)).setParseAction( replace('E_VOG', keep=True)) onir = Group(vertex + _in + region).setParseAction( replace_with_region('E_OVIR', 2)) ni = Group(vertex + delimitedList(inumber)).setParseAction( replace('E_VI', keep=True)) ei = Group(cell + delimitedList(inumber)).setParseAction( replace('E_CI', keep=True)) noset = Group(vertices + _of + _set + set_name).setParseAction( replace('E_VOSET', keep=True)) eoset = Group(cells + _of + _set + set_name).setParseAction( replace('E_COSET', keep=True)) region_expression = Forward() atom1 = (_all | region | ni | onir | nos | nir | nbf | ei | ebf | eog | nog | noset | eoset) atom1.setParseAction(to_stack(stack)) atom2 = (lpar + region_expression.suppress() + rpar) atom = (atom1 | atom2) aux = (regop + region_expression) aux.setParseAction(to_stack(stack)) region_expression << atom + ZeroOrMore(aux) region_expression = StringStart() + region_expression + StringEnd() return region_expression
def parse(self): """ Parses the class's :attr:`line_contents` attribute using pyparsing. :rtype: pyparsing.ParseResults :returns: A pyparsing ParseResults instance, which can be used much like a dict. """ # Some generic parsing patterns. integer = Word(nums) generic_alphanum = Word(alphanums) alpha_or_dash = Word(alphas + "-") num_or_dash = Word(nums + '-') # S3 bucket key name. Restrictions apply. bucket_str = Word(alphanums + '-_.') # IE: +400 or -400 time_zone_offset = Word("+-", nums) # Abbreviated month: Jan, Feb, etc. month = Word(alphas, exact=3) # [04/Aug/2006:22:34:02 +0000] server_dtime = Group( Suppress("[") + Combine(integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer) + time_zone_offset + Suppress("]")) # 72.21.206.5 ip_address = delimitedList(integer, ".", combine=True) # 314159b66967d86f031c7249d1d9a80 or - requester = Word(alphanums + "-") # IE: SOAP.CreateBucket or REST.PUT.OBJECT operation = Word(alphas + "._") # S3 key: /photos/2006/08/puppy.jpg key = Word(alphanums + "/-_.?=%&:+<>#~[]") # One of GET, POST, or PUT http_method = Word(alphas) # HTTP/1.1 http_protocol = Word(alphanums + "/.") # "GET /mybucket/photos/2006/08/ HTTP/1.1" uri = Suppress('"') + \ http_method('request_method') + \ key("request_uri") + \ http_protocol('http_version') + \ Suppress('"') dash_or_uri = ("-" | uri) # "http://www.amazon.com/webservices" referrer_uri = Suppress('"') + key + Suppress('"') # Referrer can be empty double quotes sometimes. empty_dquotes = Suppress('"') + Suppress('"') # Either a referrer, a dash, or empty double quotes. referrer_or_dash = referrer_uri | "-" | empty_dquotes # "curl/7.15.1" user_agent = Suppress('"') + \ Word(alphanums + "/-_.?=%&:(); ,+$@!^<>~[]'{}#*`") + \ Suppress('"') # User agent field can either be a user agent string or a dash. user_agent_or_dash = user_agent | "-" | empty_dquotes # The string value for each field below is what you refer to when # accessing the parsed values. log_line_bnf = ( generic_alphanum("bucket_owner") + bucket_str("bucket") + server_dtime("request_dtime").setParseAction( self._action_dtime_parse) + ip_address("remote_ip") + requester("requester") + generic_alphanum("request_id") + operation("operation") + key("key") + dash_or_uri("request_uri") + integer('http_status') + alpha_or_dash('error_code') + num_or_dash('bytes_sent') + num_or_dash('object_size') + num_or_dash('total_time') + num_or_dash('turnaround_time') + referrer_or_dash('referrer') + user_agent_or_dash('user_agent') + alpha_or_dash('version_id')) return log_line_bnf.parseString(self.line_contents)
def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress()
def SPICE_BNF(): global bnf if not bnf: # punctuation colon = Literal(":").suppress() lbrace = Literal("{").suppress() rbrace = Literal("}").suppress() lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() lparen = Literal("(").suppress() rparen = Literal(")").suppress() equals = Literal("=").suppress() comma = Literal(",").suppress() semi = Literal(";").suppress() # primitive types int8_ = Keyword("int8").setParseAction(replaceWith(ptypes.int8)) uint8_ = Keyword("uint8").setParseAction(replaceWith(ptypes.uint8)) int16_ = Keyword("int16").setParseAction(replaceWith(ptypes.int16)) uint16_ = Keyword("uint16").setParseAction(replaceWith(ptypes.uint16)) int32_ = Keyword("int32").setParseAction(replaceWith(ptypes.int32)) uint32_ = Keyword("uint32").setParseAction(replaceWith(ptypes.uint32)) int64_ = Keyword("int64").setParseAction(replaceWith(ptypes.int64)) uint64_ = Keyword("uint64").setParseAction(replaceWith(ptypes.uint64)) # keywords channel_ = Keyword("channel") enum32_ = Keyword("enum32").setParseAction(replaceWith(32)) enum16_ = Keyword("enum16").setParseAction(replaceWith(16)) enum8_ = Keyword("enum8").setParseAction(replaceWith(8)) flags32_ = Keyword("flags32").setParseAction(replaceWith(32)) flags16_ = Keyword("flags16").setParseAction(replaceWith(16)) flags8_ = Keyword("flags8").setParseAction(replaceWith(8)) channel_ = Keyword("channel") server_ = Keyword("server") client_ = Keyword("client") protocol_ = Keyword("protocol") typedef_ = Keyword("typedef") struct_ = Keyword("struct") message_ = Keyword("message") image_size_ = Keyword("image_size") bytes_ = Keyword("bytes") cstring_ = Keyword("cstring") switch_ = Keyword("switch") default_ = Keyword("default") case_ = Keyword("case") identifier = Word( alphas, alphanums + "_" ) enumname = Word( alphanums + "_" ) integer = ( Combine( CaselessLiteral("0x") + Word( nums+"abcdefABCDEF" ) ) | Word( nums+"+-", nums ) ).setName("int").setParseAction(cvtInt) typename = identifier.copy().setParseAction(lambda toks : ptypes.TypeRef(str(toks[0]))) # This is just normal "types", i.e. not channels or messages typeSpec = Forward() attributeValue = integer ^ identifier attribute = Group(Combine ("@" + identifier) + Optional(lparen + delimitedList(attributeValue) + rparen)) attributes = Group(ZeroOrMore(attribute)) arraySizeSpecImage = Group(image_size_ + lparen + integer + comma + identifier + comma + identifier + rparen) arraySizeSpecBytes = Group(bytes_ + lparen + identifier + comma + identifier + rparen) arraySizeSpecCString = Group(cstring_ + lparen + rparen) arraySizeSpec = lbrack + Optional(identifier ^ integer ^ arraySizeSpecImage ^ arraySizeSpecBytes ^arraySizeSpecCString, default="") + rbrack variableDef = Group(typeSpec + Optional("*", default=None) + identifier + Optional(arraySizeSpec, default=None) + attributes - semi) \ .setParseAction(parseVariableDef) switchCase = Group(Group(OneOrMore(default_.setParseAction(replaceWith(None)) + colon | Group(case_.suppress() + Optional("!", default="") + identifier) + colon)) + variableDef) \ .setParseAction(lambda toks: ptypes.SwitchCase(toks[0][0], toks[0][1])) switchBody = Group(switch_ + lparen + delimitedList(identifier,delim='.', combine=True) + rparen + lbrace + Group(OneOrMore(switchCase)) + rbrace + identifier + attributes - semi) \ .setParseAction(lambda toks: ptypes.Switch(toks[0][1], toks[0][2], toks[0][3], toks[0][4])) messageBody = structBody = Group(lbrace + ZeroOrMore(variableDef | switchBody) + rbrace) structSpec = Group(struct_ + identifier + structBody + attributes).setParseAction(lambda toks: ptypes.StructType(toks[0][1], toks[0][2], toks[0][3])) # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "channel_type" typeSpec << ( structSpec ^ int8_ ^ uint8_ ^ int16_ ^ uint16_ ^ int32_ ^ uint32_ ^ int64_ ^ uint64_ ^ typename).setName("type") flagsBody = enumBody = Group(lbrace + delimitedList(Group (enumname + Optional(equals + integer))) + Optional(comma) + rbrace) messageSpec = Group(message_ + messageBody + attributes).setParseAction(lambda toks: ptypes.MessageType(None, toks[0][1], toks[0][2])) | typename channelParent = Optional(colon + typename, default=None) channelMessage = Group(messageSpec + identifier + Optional(equals + integer, default=None) + semi) \ .setParseAction(lambda toks: ptypes.ChannelMember(toks[0][1], toks[0][0], toks[0][2])) channelBody = channelParent + Group(lbrace + ZeroOrMore( server_ + colon | client_ + colon | channelMessage) + rbrace) enum_ = (enum32_ | enum16_ | enum8_) flags_ = (flags32_ | flags16_ | flags8_) enumDef = Group(enum_ + identifier + enumBody + attributes - semi).setParseAction(lambda toks: ptypes.EnumType(toks[0][0], toks[0][1], toks[0][2], toks[0][3])) flagsDef = Group(flags_ + identifier + flagsBody + attributes - semi).setParseAction(lambda toks: ptypes.FlagsType(toks[0][0], toks[0][1], toks[0][2], toks[0][3])) messageDef = Group(message_ + identifier + messageBody + attributes - semi).setParseAction(lambda toks: ptypes.MessageType(toks[0][1], toks[0][2], toks[0][3])) channelDef = Group(channel_ + identifier + channelBody - semi).setParseAction(lambda toks: ptypes.ChannelType(toks[0][1], toks[0][2], toks[0][3])) structDef = Group(struct_ + identifier + structBody + attributes - semi).setParseAction(lambda toks: ptypes.StructType(toks[0][1], toks[0][2], toks[0][3])) typedefDef = Group(typedef_ + identifier + typeSpec + attributes - semi).setParseAction(lambda toks: ptypes.TypeAlias(toks[0][1], toks[0][2], toks[0][3])) definitions = typedefDef | structDef | enumDef | flagsDef | messageDef | channelDef protocolChannel = Group(typename + identifier + Optional(equals + integer, default=None) + semi) \ .setParseAction(lambda toks: ptypes.ProtocolMember(toks[0][1], toks[0][0], toks[0][2])) protocolDef = Group(protocol_ + identifier + Group(lbrace + ZeroOrMore(protocolChannel) + rbrace) + semi) \ .setParseAction(lambda toks: ptypes.ProtocolType(toks[0][1], toks[0][2])) bnf = ZeroOrMore (definitions) + protocolDef + StringEnd() singleLineComment = "//" + restOfLine bnf.ignore( singleLineComment ) bnf.ignore( cStyleComment ) return bnf
# could be extended to include where clauses etc. # # Copyright (c) 2003,2016, Paul McGuire # from pyparsing import Word, delimitedList, Optional, \ Group, alphas, alphanums, Forward, oneOf, quotedString, \ ZeroOrMore, restOfLine, CaselessKeyword, pyparsing_common # define SQL tokens selectStmt = Forward() SELECT, FROM, WHERE = map(CaselessKeyword, "select from where".split()) ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = delimitedList(ident, ".", combine=True).setName("column name") columnName.addParseAction(pyparsing_common.upcaseTokens) columnNameList = Group(delimitedList(columnName)) tableName = delimitedList(ident, ".", combine=True).setName("table name") tableName.addParseAction(pyparsing_common.upcaseTokens) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_, or_, in_ = map(CaselessKeyword, "and or in".split()) binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) realNum = pyparsing_common.real() intNum = pyparsing_common.signed_integer() columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group((columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")")
and_, or_, not_, to_ = map(CaselessKeyword, "AND OR NOT TO".split()) keyword = and_ | or_ | not_ | to_ expression = Forward() valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))+' ).setName("word") valid_word.setParseAction(lambda t: t[0].replace('\\\\', chr(127)).replace( '\\', '').replace(chr(127), '\\')) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t: int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = pyparsing_common.fnumber() fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE) range_search = incl_range_search("incl_range") | excl_range_search( "excl_range") boost = (CARAT + number("boost")) string_expr = Group(string + proximity_modifier) | string word_expr = Group(valid_word + fuzzy_modifier) | valid_word
def parser(text): cvtTuple = lambda toks: tuple(toks.asList()) cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList()))) #cvtDict = lambda toks: dict(toks.asList()) cvtGlobDict = lambda toks: GlobDict(toks.asList()) cvtDict = cvtGlobDict extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end]) def pythonize(toks): s = toks[0] if s == 'true': return True elif s == 'false': return False elif s == 'none': return [None] elif s.isdigit(): return int(s) elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s): return float(s) return toks[0] def noneDefault(s, loc, t): return t if len(t) else [RawEOL] # define punctuation as suppressed literals lbrace, rbrace = map(Suppress, "{}") identifier = Word(printables, excludeChars='{}"\'') quotedStr = QuotedString('"', escChar='\\', multiline=True) | \ QuotedString('\'', escChar='\\', multiline=True) quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \ QuotedString('\'', escChar='\\', unquoteResults=False) dictStr = Forward() setStr = Forward() objStr = Forward() #anyIdentifier = identifier | quotedIdentifier oddIdentifier = identifier + quotedIdentifier dictKey = dictStr | quotedStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) dictKey.setParseAction(cvtRaw) dictValue = quotedStr | dictStr | setStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) if OLD_STYLE_KEYS: dictKey |= Combine(identifier + ZeroOrMore(White(' ') + (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd())))) dictValue |= identifier.setParseAction(pythonize) else: dictKey |= identifier dictValue |= delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True).setParseAction(pythonize) ParserElement.setDefaultWhitespaceChars(' \t') #dictEntry = Group(Combine(OneOrMore(identifier | quotedIdentifier)).setParseAction(cvtRaw) + dictEntry = Group(dictKey + Optional(White(' ').suppress() + dictValue).setParseAction(noneDefault) + Optional(White(' ').suppress()) + LineEnd().suppress()) #dictEntry = Group(SkipTo(dictKey + LineEnd() + dictKey)) dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace) dictStr.setParseAction(cvtDict) ParserElement.setDefaultWhitespaceChars(' \t\r\n') setEntry = identifier.setParseAction(pythonize) | quotedString.setParseAction(removeQuotes) setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace) setStr.setParseAction(cvtTuple) # TODO: take other literals as arguments blobObj = Group(((Literal('ltm') + Literal('rule') + identifier) | \ (Literal('rule') + identifier)).setParseAction(cvtRaw) + originalTextFor(nestedExpr('{', '}')).setParseAction(extractText)) objEntry = Group(OneOrMore(identifier | quotedIdentifier).setParseAction(cvtRaw) + Optional(dictStr).setParseAction(noneDefault)) objStr << (Optional(delimitedList(blobObj | objEntry, delim=LineEnd()))) objStr.setParseAction(cvtGlobDict) #objStr.setParseAction(cvtTuple) objStr.ignore(pythonStyleComment) return objStr.parseString(text)[0]
nums, Group, Optional, ZeroOrMore, alphas, alphas8bit, delimitedList) # Change these if you need more flexibility: entry_type = kwd("article") | kwd("unpublished") cite_key = Word(alphanums + ":/._") LCURLY = Suppress('{') RCURLY = Suppress('}') COMMA = Suppress(',') AT = Suppress('@') EQUALS = Suppress('=') field_val = Word(nums) | QuotedString( '{', endQuoteChar='}', multiline=True, convertWhitespaceEscapes=False) title_field = Group(kwd('title') + EQUALS + field_val) journal_field = Group(kwd('journal') + EQUALS + field_val) year_field = Group(kwd('year') + EQUALS + field_val) volume_field = Group(kwd('volume') + EQUALS + field_val) pages_field = Group(kwd('pages') + EQUALS + field_val) abstract_field = Group(kwd('abstract') + EQUALS + field_val) doi_field = Group(kwd('doi') + EQUALS + field_val) other_field = Group(Word(alphanums) + EQUALS + field_val) author = OneOrMore(~kwd('and') + Word(alphas + alphas8bit + '.,-')) author.setParseAction(lambda xx: ' '.join(str(x) for x in xx)) author_list = LCURLY + delimitedList(author, 'and') + RCURLY author_field = Group(kwd('author') + EQUALS + Group(author_list)) entry_item = (title_field | author_field | journal_field | year_field | volume_field | pages_field | abstract_field | doi_field
class NameError(Exception): pass class ParseError(Exception): pass sq_string = QuotedString( quoteChar="'" ) dq_string = QuotedString( quoteChar='"' ) STRING = sq_string ^ dq_string IDENTIFIER = Word( alphas + "_", alphanums + "_" ) FUNCTION_CALL = Group( IDENTIFIER + ZeroOrMore( STRING ) ) PROGRAM = OneOrMore( FUNCTION_CALL ) def tokenize(program): # String literals are defined as being UTF-8; # skip any characters that don't decode. def decode(s): if six.PY2: return s.decode('utf-8', errors='ignore') else: return s try: return [{'function': el[0], 'arguments': map(decode, el[1:])} for el in PROGRAM.parseString(program)]
VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) VERSION_CMP = (L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<")) MARKER_OP = VERSION_CMP | L("not in") | L("in") MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) MARKER_VALUE = QuotedString("'") | QuotedString('"') MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) BOOLOP = L("and") | L("or") MARKER_VAR = VARIABLE | MARKER_VALUE MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) LPAREN = L("(").suppress() RPAREN = L(")").suppress() MARKER_EXPR = Forward() MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) MARKER = stringStart + MARKER_EXPR + stringEnd def _coerce_parse_result(results): if isinstance(results, ParseResults): return [_coerce_parse_result(i) for i in results]
check = oneOf("+ ++") mate = Literal("#") annotation = Word("!?", max=2) nag = " $" + Word(nums) decoration = check | mate | annotation | nag variant = Forward() half_move = ( Combine((m3 | m1 | m2 | m4 | m5 | m6 | m7 | m8) + Optional(decoration)) + Optional(comment) + Optional(variant)) move = Suppress(move_number) + half_move + Optional(half_move) variant << "(" + OneOrMore(move) + ")" # grouping the plies (half-moves) for each move: useful to group annotations, variants... # suggested by Paul McGuire :) move = Group(Suppress(move_number) + half_move + Optional(half_move)) variant << Group("(" + OneOrMore(move) + ")") game_terminator = oneOf("1-0 0-1 1/2-1/2 *") pgnGrammar = (Suppress(ZeroOrMore(tag)) + ZeroOrMore(move) + Optional(Suppress(game_terminator))) def parsePGN(pgn, bnf=pgnGrammar, fn=None): try: return bnf.parseString(pgn) except ParseException as err: print(err.line) print(" " * (err.column - 1) + "^") print(err)
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count, fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count, fragment*). """ # Recursive composite = Forward() mixture = Forward() # whitespace and separators space = Optional(White().suppress()) separator = space + Literal('+').suppress() + space # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s, l, t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White() + openiso + Regex("[1-9][0-9]*") + closeiso, default='0') isotope = isotope.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 0) # Translate ion openion = Literal('{').suppress() closeion = Literal('}').suppress() ion = Optional(~White() + openion + Regex("([1-9][0-9]*)?[+-]") + closeion, default='0+') ion = ion.setParseAction( lambda s, l, t: int(t[0][-1] + (t[0][:-1] if len(t[0]) > 1 else '1'))) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s, l, t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 1) count = Optional(~White() + (fract | whole), default=1) # Convert symbol, isotope, ion, count to (count, isotope) element = symbol + isotope + ion + count def convert_element(string, location, tokens): """interpret string as element""" #print "convert_element received", tokens symbol, isotope, ion, count = tokens[0:4] if isotope != 0: symbol = symbol[isotope] if ion != 0: symbol = symbol.ion[ion] return (count, symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count + OneOrMore(element) def convert_implicit(string, location, tokens): """convert count followed by fragment""" #print "implicit", tokens count = tokens[0] fragment = tokens[1:] return fragment if count == 1 else (count, fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(composite) count" to a pair opengrp = space + Literal('(').suppress() + space closegrp = space + Literal(')').suppress() + space explicit_group = opengrp + composite + closegrp + count def convert_explicit(string, location, tokens): """convert (fragment)count""" #print "explicit", tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count, fragment) explicit_group = explicit_group.setParseAction(convert_explicit) # Build composite from a set of groups group = implicit_group | explicit_group implicit_separator = separator | space composite << group + ZeroOrMore(implicit_separator + group) density = Literal('@').suppress() + count + Optional(Regex("[ni]"), default='i') compound = composite + Optional(density, default=None) def convert_compound(string, location, tokens): """convert material @ density""" #print "compound", tokens if tokens[-1] is None: return Formula(structure=_immutable(tokens[:-1])) elif tokens[-1] == 'n': return Formula(structure=_immutable(tokens[:-2]), natural_density=tokens[-2]) else: return Formula(structure=_immutable(tokens[:-2]), density=tokens[-2]) compound = compound.setParseAction(convert_compound) partsep = space + Literal('//').suppress() + space percent = Literal('%').suppress() weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space by_weight = (count + weight_percent + mixture + ZeroOrMore(partsep + count + (weight_percent | percent) + mixture) + partsep + mixture) def convert_by_weight(string, location, tokens): """convert mixture by %wt or %mass""" #print "by weight", tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100 - sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture") if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_weight_pairs(zip(piece, fract)) mixture_by_weight = by_weight.setParseAction(convert_by_weight) volume_percent = Regex("%v(ol(ume)?)?").suppress() + space by_volume = (count + volume_percent + mixture + ZeroOrMore(partsep + count + (volume_percent | percent) + mixture) + partsep + mixture) def convert_by_volume(string, location, tokens): """convert mixture by %vol""" #print "by volume", tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100 - sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture " + string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_volume_pairs(zip(piece, fract)) mixture_by_volume = by_volume.setParseAction(convert_by_volume) mixture_by_layer = Forward() layer_thick = Group(count + Regex(LENGTH_RE) + space) layer_part = (layer_thick + mixture) | (opengrp + mixture_by_layer + closegrp + count) mixture_by_layer << layer_part + ZeroOrMore(partsep + layer_part) def convert_by_layer(string, location, tokens): """convert layer thickness '# nm material'""" if len(tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absthick * float(p2) p = p1 else: f = float(p1[0]) * LENGTH_UNITS[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [(v / total) * 100 for v in fract] result = _mix_by_volume_pairs(zip(piece, vfract)) result.thickness = total return result mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer) mixture_by_absmass = Forward() absmass_mass = Group(count + Regex(MASS_VOLUME_RE) + space) absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass + closegrp + count) mixture_by_absmass << absmass_part + ZeroOrMore(partsep + absmass_part) def convert_by_absmass(string, location, tokens): """convert mass '# mg material'""" if len(tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): p = p1 f = p1.total_mass * float(p2) else: p = p2 value = float(p1[0]) if p1[1] in VOLUME_UNITS: # convert to volume in liters to mass in grams before mixing if p.density is None: raise ValueError("Need the mass density of " + str(p)) f = value * VOLUME_UNITS[p1[1]] * 1000. * p.density else: f = value * MASS_UNITS[p1[1]] piece.append(p) fract.append(f) total = sum(fract) mfract = [(m / total) * 100 for m in fract] result = _mix_by_weight_pairs(zip(piece, mfract)) result.total_mass = total return result mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass) ungrouped_mixture = (mixture_by_weight | mixture_by_volume | mixture_by_layer | mixture_by_absmass) grouped_mixture = opengrp + ungrouped_mixture + closegrp + Optional( density, default=None) def convert_mixture(string, location, tokens): """convert (mixture) @ density""" formula = tokens[0] if tokens[-1] == 'n': formula.natural_density = tokens[-2] elif tokens[-1] == 'i': formula.density = tokens[-2] # elif tokens[-1] is None return formula grouped_mixture = grouped_mixture.setParseAction(convert_mixture) mixture << (compound | grouped_mixture) formula = (compound | ungrouped_mixture | grouped_mixture) grammar = Optional(formula, default=Formula()) + StringEnd() grammar.setName('Chemical Formula') return grammar
kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import syntax""" for kw in kwds.split(): exec("%s_ = Keyword('%s')" % (kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = (oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""") | ident) rvalue = integer | TRUE_ | FALSE_ | ident fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK fieldDefn = ((REQUIRED_ | OPTIONAL_ | REPEATED_)("fieldQualifier") - typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI + ZeroOrMore(SEMI)) # enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}' enumDefn = (ENUM_("typespec") - ident("name") + LBRACE + Dict( ZeroOrMore( Group(ident + EQ + (hex_integer | integer) + ZeroOrMore(fieldDirective) + SEMI)))("values") + RBRACE) # extensionsDefn ::= 'extensions' integer 'to' integer ';' extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI # messageExtension ::= 'extend' ident '{' messageBody '}' messageExtension = EXTEND_ - ident + LBRACE + messageBody + RBRACE
sqlString = Regex(r"\'(\'\'|\\.|[^'])*\'").addParseAction(to_string) identString = Regex(r'\"(\"\"|\\.|[^"])*\"').addParseAction(unquote) mysqlidentString = Regex(r'\`(\`\`|\\.|[^`])*\`').addParseAction(unquote) ident = Combine(~RESERVED + (delimitedList(Literal("*") | Word(alphas + "_", alphanums + "_$") | identString | mysqlidentString, delim=".", combine=True))).setName("identifier") # EXPRESSIONS expr = Forward() # CASE case = (CASE + Group( ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") + Optional(ELSE + expr("else")) + END).addParseAction(to_case_call) selectStmt = Forward() compound = ( (Keyword("not", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | (Keyword("distinct", caseless=True)("op").setDebugActions(*debug) + expr("params")).addParseAction(to_json_call) | Keyword("null", caseless=True).setName("null").setDebugActions(*debug) | case | (Literal("(").setDebugActions(*debug).suppress() + selectStmt + Literal(")").suppress()) | (Literal("(").setDebugActions(*debug).suppress() + Group(delimitedList(expr)) + Literal(")").suppress()) | realNum.setName("float").setDebugActions(*debug)
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums+'_:', min=1) ref_phase_name = symbol_name = Word(alphanums+'_-:()/', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums+'+-*/_.', min=1) + Optional(Suppress('%')) # constituent arrays are colon-delimited # each subarray can be comma- or space-delimited constituent_array = Group(delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':')) param_types = MatchFirst([TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = (float_number | ZeroOrMore(',').setParseAction(lambda t: 0.01)) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1) | White())) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + ref_phase_name + \ float_number + float_number + float_number + LineEnd() # SPECIES cmd_species = TCCommand('SPECIES') + species_name + chemical_formula + LineEnd() # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # DATABASE_INFO cmd_database_info = TCCommand('DATABASE_INFO') + SkipTo(LineEnd()) # VERSION_DATE cmd_version_date = TCCommand('VERSION_DATE') + SkipTo(LineEnd()) # REFERENCE_FILE cmd_reference_file = TCCommand('REFERENCE_FILE') + SkipTo(LineEnd()) # ADD_REFERENCES cmd_add_ref = TCCommand('ADD_REFERENCES') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # TEMPERATURE_LIMITS cmd_templim = TCCommand('TEMPERATURE_LIMITS') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + \ Suppress(SkipTo(LineEnd())) # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + \ Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \ Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + \ Suppress(')') + func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_species | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_database_info | \ cmd_version_date | \ cmd_reference_file | \ cmd_add_ref | \ cmd_lor | \ cmd_templim | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
return Group( Suppress(backslash) + Literal(key) + Suppress(White()) + Word(nums + '-()') + Suppress(White())) # Define grammar # NOTE: We separate fields like \mt and \mt1, \s and \s1 # so that we could conceivably rewrite the file without changing the convention used # even though it does increase the complexity a little. # phrase = Word(alphas + "-.,!? —–‘“”’;:()'\"[]/&%=*…{}" + nums) phrase = CharsNotIn('\n\\') backslash = Literal('\\') plus = Literal('+') textBlock = Group(Optional(NoMatch(), "text") + phrase) unknown = Group( Optional(NoMatch(), "unknown") + Suppress(backslash) + CharsNotIn(' \n\t\\')) escape = usfmTokenValue('\\', phrase) id = usfmTokenValue('id', phrase) ide = usfmTokenValue('ide', phrase) usfmV = usfmTokenValue('usfm', phrase) # USFM version marker (new with USFM 3.0) h = usfmTokenValue('h', phrase) mt = usfmTokenValue('mt', phrase) mt1 = usfmTokenValue('mt1', phrase) mt2 = usfmTokenValue('mt2', phrase) mt3 = usfmTokenValue('mt3', phrase)
def __init__(self): point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(self.push_first)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.push_uminus) # By defining exp as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", # the exponents are parsed right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), instead of (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.push_first)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.push_first)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.push_first)) addop_term = (addop + term).setParseAction(self.push_first) general_term = term + ZeroOrMore(addop_term) | OneOrMore(addop_term) expr << general_term self.bnf = expr # Here the code maps operator symbols to their corresponding arithmetic operations # decided to go for * instead of x to be used in multiplications, for obvious reasons: # it is commonly used that way with computer keypads that have numbers using one hand # And yeah, I know where you keep the other hand... lewd! epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow } # After getting the correct operators, now to make use of strings for more advanced # mathematical calculations, haven't tried them all intensively, so they might break, # if that happens, use your phone's calculator instead, kthx. self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and (a > 0) - (a < 0) or 0 }
def usfmBackslashToken(key): return Group(Literal(key))
class Parser(object): FREE_TEXT = Word(printables + ' ', excludeChars='()') INTEGER = Word(nums) ID_TEXT = Word(alphanums, alphanums + ':/_-.') ONTOLOGY_SUFFIX = (Keyword('ABI') | Keyword('FM') | Keyword('FMA') | Keyword('ILX') | Keyword('MA') | Keyword('NCBITaxon') | Keyword('UBERON')) ONTOLOGY_ID = Combine(ONTOLOGY_SUFFIX + ':' + ID_TEXT) IDENTIFIER = Group(Keyword('id') + Suppress('(') + ID_TEXT + Suppress(')')) MODELS = Group( Keyword('models') + Suppress('(') + ONTOLOGY_ID + Suppress(')')) #=============================================================================== BACKGROUND = Group( Keyword('background-for') + Suppress('(') + IDENTIFIER + Suppress(')')) DESCRIPTION = Group( Keyword('description') + Suppress('(') + FREE_TEXT + Suppress(')')) SELECTION_FLAGS = Group( Keyword('not-selectable') | Keyword('selected') | Keyword('queryable')) ZOOM = Group( Keyword('zoom') + Suppress('(') + Group(INTEGER + Suppress(',') + INTEGER + Suppress(',') + INTEGER) + Suppress(')')) LAYER_DIRECTIVES = BACKGROUND | DESCRIPTION | IDENTIFIER | MODELS | SELECTION_FLAGS | ZOOM LAYER_DIRECTIVE = '.' + ZeroOrMore(LAYER_DIRECTIVES) #=============================================================================== @staticmethod def layer_directive(s): result = {} try: parsed = Parser.LAYER_DIRECTIVE.parseString(s, parseAll=True) result['selectable'] = True for directive in parsed[1:]: if directive[0] == 'not-selectable': result['selectable'] = False elif Parser.SELECTION_FLAGS.matches(directive[0]): result[directive[0]] = True elif directive[0] == 'zoom': result['zoom'] = [int(z) for z in directive[1]] else: result[directive[0]] = directive[1] except ParseException: result['error'] = 'Syntax error in layer directive' return result #=============================================================================== # LABEL = Group(Keyword('label') + Suppress('(') + FREE_TEXT + Suppress(')')) # LAYER = Group(Keyword('layer') + Suppress('(') + ONTOLOGY_ID + Suppress(')')) ## WIP: DETAILS = Group(Keyword('details') + Suppress('(') + Suppress(')')) ## Zoom start, slide/layer ID ## Details are positioned within polygon's boundary on a layer "above" the polygon's ## fill layer. Say positioned on an invisible place holder that is grouped with the polygon?? CLASS = Group(Keyword('class') + Suppress('(') + ID_TEXT + Suppress(')')) PATH = Group(Keyword('path') + Suppress('(') + ID_TEXT + Suppress(')')) STYLE = Group(Keyword('style') + Suppress('(') + INTEGER + Suppress(')')) FEATURE_PROPERTIES = CLASS | IDENTIFIER | STYLE SHAPE_FLAGS = Group( Keyword('boundary') | Keyword('children') | Keyword('closed') | Keyword('interior')) DEPRECATED_FLAGS = Group(Keyword('siblings') | Keyword('marker')) FEATURE_FLAGS = Group( Keyword('group') | Keyword('invisible') | Keyword('divider') | Keyword('region')) SHAPE_MARKUP = '.' + ZeroOrMore(DEPRECATED_FLAGS | FEATURE_FLAGS | FEATURE_PROPERTIES | PATH | SHAPE_FLAGS) #=============================================================================== @staticmethod def shape_properties(name_text): properties = {} try: parsed = Parser.SHAPE_MARKUP.parseString(name_text, parseAll=True) for prop in parsed[1:]: if (Parser.FEATURE_FLAGS.matches(prop[0]) or Parser.SHAPE_FLAGS.matches(prop[0])): properties[prop[0]] = True elif Parser.DEPRECATED_FLAGS.matches(prop[0]): properties[ 'warning'] = "'{}' property is deprecated".format( prop[0]) elif prop[0] == 'id': # Keep separate from feature's unique id properties['external-id'] = prop[1] else: properties[prop[0]] = prop[1] except ParseException: properties['error'] = 'Syntax error in shape markup' return properties @staticmethod def ignore_property(name): return Parser.DEPRECATED_FLAGS.matches( name) or Parser.SHAPE_FLAGS.matches(name) #=============================================================================== NERVES = delimitedList(ID_TEXT) PATH_LINES_GROUP = ID_TEXT | Group( Suppress('(') + delimitedList(ID_TEXT) + Suppress(')')) PATH_LINES = delimitedList(PATH_LINES_GROUP) ROUTE_NODE_GROUP = ID_TEXT | Group( Suppress('(') + delimitedList(ID_TEXT) + Suppress(')')) ROUTE_NODES = delimitedList(ROUTE_NODE_GROUP) #=============================================================================== @staticmethod def path_lines(line_ids): try: path_lines = Parser.PATH_LINES.parseString(line_ids, parseAll=True) except ParseException: raise ValueError( 'Syntax error in path lines list: {}'.format(line_ids)) return path_lines @staticmethod def route_nodes(node_ids): try: route_nodes = Parser.ROUTE_NODES.parseString(node_ids, parseAll=True) except ParseException: raise ValueError( 'Syntax error in route node list: {}'.format(node_ids)) return route_nodes @staticmethod def nerves(node_ids): try: nerves = Parser.NERVES.parseString(node_ids, parseAll=True) except ParseException: raise ValueError('Syntax error in nerve list: {}'.format(node_ids)) return nerves