def json_text(): """Returns the parser for Json formatted data""" # Taken from https://github.com/vlasovskikh/funcparserlib/blob/master/funcparserlib/tests/json.py # and modified slightly unwrap = lambda x: x.value null = (n('null') | n('Null')) >> const(None) >> unwrap value = forward_decl() member = (string >> unwrap) + op_(u':') + value >> tuple object = ( op_(u'{') + maybe(member + many(op_(u',') + member) + maybe(op_(','))) + op_(u'}') >> make_object) array = ( op_(u'[') + maybe(value + many(op_(u',') + value) + maybe(op_(','))) + op_(u']') >> make_array) value.define( null | (true >> unwrap) | (false >> unwrap) | object | array | (number >> unwrap) | (string >> unwrap)) json_text = object | array return json_text
def parse(seq): 'Sequence(Token) -> object' unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) value_flatten = lambda l: sum([[l[0]]] + list(l[1:]), []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id = some(lambda t: t.type in ['Name', 'Number', 'Color', 'String']).named('id') >> tokval make_chart_attr = lambda args: DefAttrs(u'chart', [Attr(*args)]) node_id = id # + maybe(port) pair = ( op_('(') + id + skip(maybe(op(','))) + id + op_(')') >> tuple) value = (id | pair) value_list = ( value + many(op_(',') + value) >> value_flatten) a_list = ( id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) chart_attr = id + (op_('=') | op_(':')) + value_list >> make_chart_attr node_stmt = node_id + attr_list >> unarg(Node) stmt = ( chart_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) chart_type = ( n('p') | n('pie') | n('piechart') | n('p3') | n('pie3d') | n('piechart_3d') | n('lc') | n('line') | n('linechart') | n('lxy') | n('linechartxy') | n('bhs') | n('holizontal_barchart') | n('bvs') | n('vertical_barchart') | n('bhg') | n('holizontal_bargraph') | n('bvg') | n('vertical_bargraph') | n('v') | n('venn') | n('venndiagram') | n('s') | n('plot') | n('plotchart') ) chart = ( chart_type + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Chart)) dotfile = chart + skip(finished) return dotfile.parse(seq)
def test_non_halting_left_recursive(): h1 = fwd() h1.define(x + h1) ok_(not non_halting(h1)) h2 = fwd() h2.define(x + (h2 | x)) ok_(not non_halting(h2)) nh1 = fwd() nh1.define(nh1 + x) ok_(non_halting(nh1)) nh2 = fwd() nh2.define(x | nh2) ok_(non_halting(nh2)) nh3_fwd = fwd() nh3_fwd.define(nh3_fwd) nh3 = x + nh3_fwd + x ok_(non_halting(nh3)) nh4 = fwd() nh4.define(maybe(x) + nh4 + x) ok_(non_halting(nh4)) nh5 = fwd() nh5.define(many(x) + maybe(x) + nh5 + x) ok_(non_halting(nh5)) h3 = fwd() h3.define(maybe(x) + many(x) + x + h3) ok_(not non_halting(h3))
def parse(seq): 'Sequence(Token) -> object' unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) date = some(lambda s: a(Token('Date', s))).named('date') >> tokval id = some(lambda t: t.type in ['Name', 'Number', 'String']).named( 'id') >> tokval make_chart_attr = lambda args: DefAttrs(u'chart', [Attr(*args)]) node_id = id # + maybe(port) term = date + op_('-') + date value = (id | term | date) a_list = (id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = (many(op_('[') + many(a_list) + op_(']')) >> flatten) chart_attr = id + (op_('=') | op_(':')) + value >> make_chart_attr node_stmt = node_id + attr_list >> unarg(Node) stmt = (chart_attr | node_stmt) stmt_list = many(stmt + skip(maybe(op(';')))) chart = (maybe(n('diagram')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Chart)) dotfile = chart + skip(finished) return dotfile.parse(seq)
def test_non_halting_left_recursive(): h1 = fwd() h1.define(x + h1) ok_(not non_halting(h1)) h2 = fwd() h2.define(x + (h2 | x)) ok_(not non_halting(h2)) nh1 = fwd() nh1.define(nh1 + x) ok_(non_halting(nh1)) nh2 = fwd() nh2.define(x | nh2) ok_(non_halting(nh2)) nh3_fwd = fwd() nh3_fwd.define(nh3_fwd) nh3 = x + nh3_fwd + x ok_(non_halting(nh3)) nh4 = fwd() nh4.define(maybe(x) + nh4 + x) ok_(non_halting(nh4)) nh5 = fwd() nh5.define(many(x) + maybe(x) + nh5 + x) ok_(non_halting(nh5)) h3 = fwd() h3.define(maybe(x) + many(x) + x + h3) ok_(not non_halting(h3))
def when_equation(tokens, state): kw = keyword parser = (kw("when") + expression + kw("then") + maybe(many(equation + op(";"))) + maybe( many( kw("elsewhen") + expression + kw("then") + maybe(many(equation + op(";"))))) + kw("end") + kw("when")) >> WhenEquation return parser.run(tokens, state)
def when_statement(tokens, state): kw = keyword parser = (kw("when") + expression + kw("then") + maybe(many(statement + op(";"))) + maybe( many( kw("elsewhen") + expression + kw("then") + maybe(many(statement + op(";"))))) + kw("end") + kw("when")) >> WhenStatement return parser.run(tokens, state)
def if_equation(tokens, state): kw = keyword parser = (kw("if") + expression + kw("then") + maybe(many(equation + op(";"))) + maybe( many( kw("elseif") + expression + kw("then") + maybe(many(equation + op(";"))))) + maybe(kw("else") + maybe(many(equation + op(";")))) + kw("end") + kw("if")) >> IfEquation return parser.run(tokens, state)
def if_statement(tokens, state): kw = keyword parser = (kw("if") + expression + kw("then") + maybe(many(statement + op(";"))) + maybe( many( kw("elseif") + expression + kw("then") + maybe(many(statement + op(";"))))) + maybe(kw("else") + maybe(many(statement + op(";")))) + kw("end") + kw("if")) >> IfStatement return parser.run(tokens, state)
def short_class_definition(tokens, state): # circular import! from modparc.syntax.class_definition import (class_prefixes, enum_list, base_prefix) parser = (class_prefixes + token_type("ident") + op("=") + (base_prefix + name + maybe(array_subscript) + maybe(class_modification) + comment | keyword('enumeration') + op('(') + (maybe(enum_list) | op(":")) + op(')') + comment)) return (parser >> ShortClassDefinition).run(tokens, state)
def field_query_parser(): """Return a parser for numeric queries. Example queries: '1900-1995' or '>= 1998' """ number = token_type('number') field_name = token_type('name') lt = token_type('lt') le = token_type('le') gt = token_type('gt') ge = token_type('ge') eq = token_type('equals') approx = token_type('approx') # Simple comparisons # NOTE: We put le before lt to parse both comparison = parser.maybe(token_type('not'))\ + field_name\ + (le | lt | ge | gt)\ + number # Values can be given as intervals ('1990-2000') interval = parser.maybe(token_type('not'))\ + field_name\ + skip('equals')\ + number\ + skip('dash')\ + number # Values can be given as ranges ('1990<=year<=2000') # NOTE: We put le before lt to parse both range_ = parser.maybe(token_type('not'))\ + number\ + (le | lt)\ + field_name\ + (le | lt)\ + number # Field value queries ('year=2000' or 'author~Augustus') field_value = parser.maybe(token_type('not'))\ + field_name\ + (eq | approx)\ + (token_type('name') | token_type('number') | token_type('any')) # Field occurrence ('publisher' or '^publisher') field_occurrence = parser.maybe(token_type('not')) + field_name return (interval >> make_query_result('interval') | comparison >> make_query_result('comparison') | range_ >> make_query_result('range') | field_value >> make_query_result('value') | field_occurrence >> make_query_result('occurrence'))\ + parser.skip(parser.finished)
def date_parser(): """Return a parser for biblatex dates.""" dash = skip('dash') forward_slash = token_type('slash') year = if_token_type('number', lambda v: len(v) == 4) >> token_value month = if_token_type('number', lambda v: len(v) == 2) >> token_value day = month date = year + parser.maybe(dash + month) + parser.maybe(dash + day) return date\ + parser.maybe((forward_slash + date) | forward_slash)\ + parser.skip(parser.finished)\ >> make_date
def test_first_maybe(): eq_(first(maybe(a('x'))), ['x', _EPSYLON]) eq_(first(maybe(a('x')) + a('y')), ['x', 'y']) eq_(first(maybe(a('x')) + maybe(a('y'))), ['x', 'y', _EPSYLON]) eq_(first(maybe(a('x')) | a('y')), ['x', _EPSYLON, 'y']) eq_(first(a('x') | maybe(a('y'))), ['x', 'y', _EPSYLON]) eq_(first(maybe(a('x')) | maybe(a('y'))), ['x', _EPSYLON, 'y', _EPSYLON])
def test_first_maybe(): eq_(first(maybe(a('x'))), ['x', _EPSYLON]) eq_(first(maybe(a('x')) + a('y')), ['x', 'y']) eq_(first(maybe(a('x')) + maybe(a('y'))), ['x', 'y', _EPSYLON]) eq_(first(maybe(a('x')) | a('y')), ['x', _EPSYLON, 'y']) eq_(first(a('x') | maybe(a('y'))), ['x', 'y', _EPSYLON]) eq_(first(maybe(a('x')) | maybe(a('y'))), ['x', _EPSYLON, 'y', _EPSYLON])
def parse(seq): 'Sequence(Token) -> object' tokval = lambda x: x.value op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id = some(lambda t: t.type in ['Name', 'Number', 'Color', 'String']).named( 'id') >> tokval date = some(lambda t: t.type == 'Date').named('date') >> tokval make_node = lambda args: Node(*args) node_stmt = id + op_(':') + date + maybe(op_('-') + date) >> make_node chart = (many(node_stmt + skip(maybe(op(';')))) >> Chart) dotfile = chart + skip(finished) return dotfile.parse(seq)
def expression(tokens, state): kw = keyword parser = (simple_expression | kw("if") + expression + kw("then") + expression + maybe( many(kw("elseif") + expression + kw("then") + expression)) + kw("else") + expression) >> Expression return parser.run(tokens, state)
def parse(tokens): '''Parses an SQL date range. Parses a list of Token object to see if it's a valid SQL clause meeting the following conditions: An optional sequence of ANDed simple conditions ANDed with an optional sequence of ORed complex condtions. Where a simple condition is a date unit, a sign, and a date value. And a complex condition is any legal SQL combination of simple conditions ANDed or ORed together. Date unit: YYYY, MM, DD, HH, MIN Sign: <, <=, =, >=, > Date value: any integer value, with an optional leading zero Returns: True if the tokens reprsent a valid SQL date range, False otherwise. ''' try: left_paren = some(lambda t: t.value in '(') right_paren = some(lambda t: t.value in ')') oper = some(lambda t: t.value in SIGNS) unit = some(lambda t: t.value in UNITS) padded_num = some(lambda t: t.code == 2) + some( lambda t: t.code == 2) # hmmm, better way??? raw_num = some(lambda t: t.code == 2) num = padded_num | raw_num cond = unit + oper + num endmark = a(Token(token.ENDMARKER, '')) end = skip(endmark + finished) ands = maybe(cond + maybe(many(a(Token(token.NAME, 'AND')) + cond))) or_ands = left_paren + ands + right_paren ors_without_ands = or_ands + maybe( many(a(Token(token.NAME, 'OR')) + or_ands)) ors_with_ands = (a(Token(token.NAME, 'AND')) + left_paren + or_ands + maybe(many(a(Token(token.NAME, 'OR')) + or_ands)) + right_paren) ors = maybe(ors_without_ands | ors_with_ands) full = left_paren + ands + ors + right_paren + end full.parse(tokens) except NoParseError: return False except TokenError: return False return True
def element(tokens, state): kw = keyword parser = (import_clause | extends_clause | km('redeclare') + km('final') + km('inner') + km('outer') + ((class_definition | component_clause) | kw('replaceable') + (class_definition | component_clause) + maybe(constraining_clause + comment))) return (parser >> Element).run(tokens, state)
def class_specifier(tokens, state): normal = (token_type('ident') + string_comment + composition + keyword('end') + token_type('ident')) derived = (token_type('ident') + op("=") + base_prefix + name + maybe(array_subscript) + maybe(class_modification) + comment) enum_def = (token_type('ident') + op("=") + keyword('enumeration') + op("(") + (maybe(enum_list) | op(":")) + op(")") + comment) derivative = (token_type('ident') + op("=") + keyword('der') + op("(") + name + op(",") + token_type('ident') + maybe(many(op(",") + token_type('ident'))) + op(")") + comment) extended = (keyword('extends') + token_type('ident') + maybe(class_modification) + string_comment + composition + keyword("end") + token_type('ident')) parser = (normal | derived | enum_def | derivative | extended) return (parser >> ClassSpecifier).run(tokens, state)
def parser(last_error=None): last_error = LastError() if last_error is None else last_error def apl(f): return lambda x: f(*x) def delim(t): return skip(_tok(t)) symbol = _tok(Token.SYMBOL) >> _gen(Symbol) string = _tok(Token.STRING) >> _gen(String) placeholder = _tok(Token.PLACEHOLDER) >> _gen(Placeholder) keyword = _tok(Token.KEYWORD) >> _gen(Keyword) # Note: tokenizer guarantee that value consists of dots and digits # TODO: convert exceptions number = _tok(Token.NUMBER) >> _gen(Number, literal_eval) expr = forward_decl() implicit_tuple = forward_decl() list_ = ((_tok(Token.OPEN_BRACKET) + many(expr | keyword) + _tok(Token.CLOSE_BRACKET)) >> apl(_list)) dict_ = (error_ctx( _tok(Token.OPEN_BRACE) + many(keyword + expr) + _tok(Token.CLOSE_BRACE), last_error, DICT_ERROR) >> apl(_dict)) inline_args = many(expr | keyword) explicit_tuple = (error_ctx( _tok(Token.OPEN_PAREN) + symbol + inline_args + _tok(Token.CLOSE_PAREN), last_error, EXPLICIT_TUPLE_ERROR) >> apl(_tuple)) indented_arg = ( oneplus(implicit_tuple | expr + delim(Token.NEWLINE)) >> _maybe_join) indented_kwarg = (((keyword + expr + delim(Token.NEWLINE)) | (keyword + delim(Token.NEWLINE) + delim(Token.INDENT) + indented_arg + delim(Token.DEDENT)))) indented_args_kwargs = ( (many(indented_kwarg) + many(indented_arg)) >> apl(lambda pairs, args: list(chain(*(pairs + [args]))))) implicit_tuple.define( error_ctx( symbol + inline_args + delim(Token.NEWLINE) + maybe( delim(Token.INDENT) + indented_args_kwargs + delim(Token.DEDENT)), last_error, IMPLICIT_TUPLE_ERROR) >> apl( _implicit_tuple)) expr.define(symbol | string | number | explicit_tuple | list_ | dict_ | placeholder) body = ((many(implicit_tuple) + _tok(Token.EOF)) >> apl(_module)) return body
def get_marginal_parser(): """Return parser for tokens describing marginals.""" solution_type = parser.skip(parser.a(Token(token.NAME, 'MAR'))) minus = parser.a(Token(token.OP, '-')) begin = parser.skip( parser.maybe(minus + parser.a(Token(token.NAME, 'BEGIN')) + minus)) marginal_parser = (solution_type + parser.many(number_parser + begin) + end_parser) return marginal_parser
def between(parser, left=op_('('), right=op_(')'), empty=False): """Return a parser that parses an occurrence of parser between left and right. Can omit parser if empty is True.""" if empty: return left + maybe(parser) + right else: return left + parser + right
def between(parser, left=op_('('), right=op_(')'), empty=False): """Return a parser that parses an occurrence of parser between left and right. Can omit parser if empty is True.""" if empty: return left + maybe(parser) + right else: return left + parser + right
def parse(tokens): ## building blocks kw_priority = some(toktype("kw_priority")) kw_probability = some(toktype("kw_probability")) kw_reaction = some(toktype("kw_reaction")) kw_exists = some(toktype("kw_exists")) kw_as = some(toktype("kw_as")) op_tilde = some(toktype("op_tilde")) op_priority_maximal = some(toktype("op_priority_maximal")) op_production = some(toktype("op_production")) atom = some(toktype("name")) number = some(toktype("number")) dissolve = some(toktype("op_dissolve")) osmose = some(toktype("op_osmose")) osmose_location = some(toktype("op_osmose_location")) env_open = some(toktype("env_open")) env_close = some(toktype("env_close")) membrane_open = some(toktype("membrane_open")) membrane_close = some(toktype("membrane_close")) ## grammar from the bottom up name = atom | number symbol = atom | (dissolve + maybe(name)) | (osmose + name + maybe(osmose_location + name)) priority = kw_priority + op_tilde + name + op_priority_maximal + name reaction = (kw_reaction + maybe(kw_as + name) + op_tilde + oneplus(name) + op_production + many(symbol)) exists = kw_exists + op_tilde + oneplus(name) expr = (exists | reaction | priority) statement = with_forward_decls(lambda: membrane | expr) >> Statement body = maybe(name) + many(statement) membrane = (skip(membrane_open) + body + skip(membrane_close)) >> Membrane env = (skip(env_open) + body + skip(env_close)) >> Environment program = many(env) + skip(finished) >> Program return program.parse(tokens)
def function_arguments(tokens, state): from modparc.syntax.equations import for_indices # circular dependency # Since funcparserlib doesn't have full backtracking # the `named_arguments` parser is matched first to avoid problems parser = ( named_arguments | function_argument + maybe(op(",") + function_arguments | keyword('for') + for_indices)) return (parser >> FunctionArguments).run(tokens, state)
def parse( tokenSequence ): """Sequence(Token) -> object""" # Top-level Parser expression = scanCode_expression | usbCode_expression | variable_expression | capability_expression | define_expression kll_text = many( expression ) kll_file = maybe( kll_text ) + skip( finished ) return kll_file.parse( tokenSequence )
def parse(tokenSequence): """Sequence(Token) -> object""" # Top-level Parser expression = ignore_expression | scanCode_expression | usbCode_expression | variable_expression | capability_expression | define_expression kll_text = many(expression) kll_file = maybe(kll_text) + skip(finished) return kll_file.parse(tokenSequence)
def parse(tokens): ## building blocks kw_priority = some(toktype("kw_priority")) kw_probability = some(toktype("kw_probability")) kw_reaction = some(toktype("kw_reaction")) kw_exists = some(toktype("kw_exists")) kw_as = some(toktype("kw_as")) op_tilde = some(toktype("op_tilde")) op_priority_maximal = some(toktype("op_priority_maximal")) op_production = some(toktype("op_production")) atom = some(toktype("name")) number = some(toktype("number")) dissolve = some(toktype("op_dissolve")) osmose = some(toktype("op_osmose")) osmose_location = some(toktype("op_osmose_location")) env_open = some(toktype("env_open")) env_close = some(toktype("env_close")) membrane_open = some(toktype("membrane_open")) membrane_close = some(toktype("membrane_close")) ## grammar from the bottom up name = atom | number symbol = atom | (dissolve + maybe(name)) | (osmose + name + maybe(osmose_location + name)) priority = kw_priority + op_tilde + name + op_priority_maximal + name reaction = kw_reaction + maybe(kw_as + name) + op_tilde + oneplus(name) + op_production + many(symbol) exists = kw_exists + op_tilde + oneplus(name) expr = exists | reaction | priority statement = with_forward_decls(lambda: membrane | expr) >> Statement body = maybe(name) + many(statement) membrane = (skip(membrane_open) + body + skip(membrane_close)) >> Membrane env = (skip(env_open) + body + skip(env_close)) >> Environment program = many(env) + skip(finished) >> Program return program.parse(tokens)
def parse(input): period = sometok("period") string = p.oneplus(sometok("string")) >> (lambda x: " ".join(x)) number = sometok("number") title = string + p.skip(period) >> RecipeTitle ingredients_start = sometok("ingredients_start") + p.skip(period) >> IngredientStart dry_measure = p.maybe(sometok("measure_type")) + sometok("dry_measure") liquid_measure = sometok("liquid_measure") mix_measure = sometok("mix_measure") # is this valid ? 'g of butter', unit w/o initial_value ingredient = p.maybe(number) + p.maybe(dry_measure | liquid_measure | mix_measure) + string >> unarg(Ingredient) ingredients = p.many(ingredient) cooking_time = p.skip(sometok("cooking_time")) + (number >> unarg(CookingTime)) + p.skip(sometok("period")) oven_temp = p.skip(sometok("oven")) + p.many(number) + p.skip(sometok("oven_temp")) >> unarg(Oven) method_start = sometok("method_start") + p.skip(period) comment = p.skip(p.many(string | period)) header = title + p.maybe(comment) instruction = (string + p.skip(period)) >> parse_instruction instructions = p.many(instruction) program = (method_start + instructions) >> unarg(MethodStart) serves = (sometok("serve") + number >> (lambda x: Serve("serve", x[1]))) + p.skip(period) ingredients_section = (ingredients_start + ingredients) >> unarg(IngredientSection) recipe = ( header + p.maybe(ingredients_section) + p.maybe(cooking_time) + p.maybe(oven_temp) + p.maybe(program) + p.maybe(serves) ) >> RecipeNode main_parser = p.oneplus(recipe) return main_parser.parse(tokenize(input))
def _next(y, prefix=_prefix_nl): if isinstance(y, _OptionalBlock): if y.avoid: # stop parsing if we see the distractor distractor = prefix(y.avoid) p_next = _not_followed_by(distractor) + prefix(y.p) else: p_next = prefix(y.p) return fp.maybe(p_next) else: return prefix(y)
def class_prefixes(tokens, state): kw = keyword function_prefix = (maybe(kw("pure") | kw("impure")) + km("operator") + kw("function")) parser = (km("partial") + ((kw("class") | kw("model") | km("operator") + kw("record") | kw("block") | (km("expandable") + kw("connector")) | kw("type") | kw("package") | function_prefix | kw("operator")))) return (parser >> ClassPrefixes).run(tokens, state)
def _parse_rule(seq): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval sep = lambda s: a(Token(u'Sep', s)) >> tokval s_sep = lambda s: skip(sep(s)) level = toktype(u'Level') comparator = toktype(u'Comparator') >> COMPARATORS.get number = toktype(u'Number') >> float historical = toktype(u'Historical') unit = toktype(u'Unit') operator = toktype(u'Operator') logical_operator = toktype(u'LogicalOperator') >> LOGICAL_OPERATORS.get exp = comparator + ( (number + maybe(unit)) | historical) + maybe(operator + number) rule = (level + s_sep(':') + exp + many(logical_operator + exp)) overall = rule + skip(finished) return overall.parse(seq)
def _next(y, prefix=_prefix_nl): if isinstance(y, _OptionalBlock): if y.avoid: # stop parsing if we see the distractor distractor = prefix(y.avoid) p_next = _not_followed_by(distractor) + prefix(y.p) else: p_next = prefix(y.p) return fp.maybe(p_next) else: return prefix(y)
def _create_type_rules(): comma = _token_type("comma") colon = _token_type("colon") question_mark = _token_type("question-mark") bar = _token_type("bar") equals = _token_type("equals") attr_name = type_name = arg_name = _token_type("name") >> _make_name primary_type = forward_decl() union_type = _one_or_more_with_separator(primary_type, bar) >> _make_union_type type_ = union_type type_ref = type_name >> _make_type_ref applied_type = ( type_ref + skip(_token_type("open")) + _one_or_more_with_separator(type_, comma) + skip(_token_type("close")) ) >> _make_apply arg = (maybe(question_mark) + maybe(arg_name + skip(colon)) + type_) >> _make_arg generic_params = maybe(type_name + _token_type("fat-arrow")) >> _make_params args = _zero_or_more_with_separator(arg, comma) signature = (generic_params + args + _token_type("arrow") + type_) >> _make_signature sub_signature = (_token_type("paren-open") + signature + _token_type("paren-close")) >> (lambda result: result[1]) primary_type.define(sub_signature | applied_type | type_ref) explicit_type = signature | type_ type_definition = (type_name + skip(equals) + type_ + skip(finished)) >> _make_type_definition structural_type_attr = (attr_name + skip(colon) + explicit_type) >> tuple structural_type_attrs = many(structural_type_attr) structural_type_definition = (type_name + skip(colon) + structural_type_attrs + skip(finished)) >> _make_structural_type_definition generic = (_one_or_more_with_separator(type_name, comma) + skip(finished)) >> _make_generic return explicit_type + skip(finished), type_definition, structural_type_definition, generic
def create_grammar(): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval null = n('null') true = n('true') false = n('false') number = toktype('Number') string = toktype('String') value = forward_decl() member = string + op_(':') + value object_ = (op_('{') + maybe(member + many(op_(',') + member)) + op_('}')) array = (op_('[') + maybe(value + many(op_(',') + value)) + op_(']')) value.define(null | true | false | object_ | array | number | string) json_text = object_ | array json_file = json_text + skip(finished) return json_file
def _parse_rule(seq): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval sep = lambda s: a(Token(u'Sep', s)) >> tokval s_sep = lambda s: skip(sep(s)) level = toktype(u'Level') comparator = toktype(u'Comparator') >> COMPARATORS.get number = toktype(u'Number') >> float historical = toktype(u'Historical') unit = toktype(u'Unit') operator = toktype(u'Operator') logical_operator = toktype(u'LogicalOperator') >> LOGICAL_OPERATORS.get exp = comparator + ((number + maybe(unit)) | historical) + maybe(operator + number) rule = ( level + s_sep(':') + exp + many(logical_operator + exp) ) overall = rule + skip(finished) return overall.parse(seq)
def _parse(seq): const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token(u'Op', s)) >> tokval op_ = lambda s: skip(op(s)) def make_string(args): context, value = args if not context: context = 'any:' return String(unescape_str(value[1:-1]), context[:-1]) def make_regex(args): context, value = args value, modifiers = value.rsplit('/', 1) value = value[1:] if not context: context = 'any:' return Regex(unescape_regex(value), modifiers, context[:-1]) def make_or(args): return Or(*args) def make_and(args): return And(*args) def make_not(x): return Not(x) context = maybe(toktype(u'Prefix')) string = (context + toktype(u'String')) >> make_string regex = (context + toktype(u'Regex')) >> make_regex par_term = forward_decl() simple_term = forward_decl() term = forward_decl() not_term = forward_decl() and_term = forward_decl() or_term = forward_decl() par_term.define(op_(u'(') + term + op_(u')')) simple_term.define(par_term | string | regex) not_term.define(op_('not') + not_term >> make_not | simple_term) and_term.define(not_term + op_('and') + and_term >> make_and | not_term) or_term.define(and_term + op_('or') + or_term >> make_or | and_term) term.define(or_term) eof = skip(toktype(u'EOF')) filter_expr = (term + eof) | (eof >> const(Any())) return filter_expr.parse(seq)
def primary(tokens, state): kw = keyword parser = (token_type("number") | token_type("string") | kw("false") | kw("true") | (name | kw("der") | kw("initial")) + function_call_args | component_reference | op("(") + output_expression_list + op(")") | (op("[") + expression_list + maybe(many(op(";") + expression_list)) + op("]")) | op("{") + function_arguments + op("}") | kw("end")) return (parser >> Primary).run(tokens, state)
def parse(seq): """ Parses the list of tokens and generates an AST. """ def eval_expr(z, list): return reduce(lambda s, (f, x): f(s, x), list, z) unarg = lambda f: lambda x: f(*x) tokval = lambda x: x.value # returns the value of a token toktype = lambda t: some(lambda x: x.type == t) >> tokval # checks type of token const = lambda x: lambda _: x # like ^^^ in Scala op = lambda s: a(Token('Op', s)) >> tokval # return the value if token is Op op_ = lambda s: skip(op(s)) # checks if token is Op and ignores it lst = lambda x: [x[0],] + x[1] tup = lambda x: (x[0], x[1]) makeop = lambda s, f: op(s) >> const(f) add = makeop('+', Add) sub = makeop('-', Sub) mul = makeop('*', Mul) div = makeop('/', Div) lt = makeop('<', Lt) gt = makeop('>', Gt) eq = makeop('=', Eq) operation = add | sub | mul | div | lt | gt | eq decl = with_forward_decls(lambda:toktype('Var') + op_('=') + (exp | fun) >> tup) decls = decl + many(skip(toktype('Semicolon')) + decl) >> lst variable = toktype('Var') >> Variable variables = variable + many(skip(toktype('Comma')) + variable) >> lst fun = with_forward_decls(lambda: skip(toktype('Fun')) + variables + skip(toktype('Arrow')) + exp + skip(toktype('End'))) >> unarg(Fun) parameters = with_forward_decls(lambda: exp + many(skip(toktype('Comma')) + exp) >> lst) call = skip(toktype('Call')) + (fun | variable) + skip(toktype('Lp')) + parameters + skip(toktype('Rp')) >> unarg(Call) ex = with_forward_decls(lambda:variable | toktype('Number') >> (lambda x: Const(int(x))) |\ toktype('True') >> (lambda x: Const(True)) | toktype('False') >> (lambda x: Const(False)) |\ skip(toktype('Let')) + decls + skip(toktype('In')) + exp + skip(toktype('End')) >> unarg(Let) |\ skip(toktype('If')) + exp + skip(toktype('Then')) + exp + maybe(skip(toktype('Else')) + exp) + skip(toktype('Fi')) >> unarg(If) |\ fun | call) exp = ex + many(operation + ex) >> unarg(eval_expr) prog = skip(toktype('Prog')) + exp >> Prog return prog.parse(seq)
def parse(source): task = Task() get_value = lambda x: x.value value_of = lambda t: some(lambda x: x.type == t) >> get_value keyword = lambda s: skip(value_of(s)) make_rule = lambda x: task.add_rule(Rule(**{x[0]: x[1][1:-1]})) set_root = lambda value: task.set_root_dir(value[1:-1]) set_mask = lambda value: task.set_mask(value[1:-1]) root = keyword('In') + value_of('Value') >> set_root mask = keyword('With') + value_of('Value') >> set_mask rule = keyword('Set') + \ value_of('Attribute') + \ keyword('Equals') + \ value_of('Value') \ >> make_rule parser = maybe(mask) + root + many(rule) parser.parse(source) return task
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token(u'Name', s)) >> tokval op = lambda s: a(Token(u'Op', s)) >> tokval op_ = lambda s: skip(op(s)) id_types = [u'Name', u'Number', u'String'] id = some(lambda t: t.type in id_types).named(u'id') >> tokval make_graph_attr = lambda args: DefAttrs(u'graph', [Attr(*args)]) make_edge = lambda x, xs, attrs: Edge([x] + xs, attrs) node_id = id # + maybe(port) a_list = ( id + maybe(op_(u'=') + id) + skip(maybe(op(u','))) >> unarg(Attr)) attr_list = (many(op_(u'[') + many(a_list) + op_(u']')) >> flatten) attr_stmt = ( (n(u'graph') | n(u'node') | n(u'edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_(u'=') + id >> make_graph_attr node_stmt = node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> # stmt -> subgraph -> stmt_list) subgraph = forward_decl() edge_rhs = skip(op(u'->') | op(u'--')) + (subgraph | node_id) edge_stmt = ((subgraph | node_id) + oneplus(edge_rhs) + attr_list >> unarg(make_edge)) stmt = (attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt) stmt_list = many(stmt + skip(maybe(op(u';')))) subgraph.define( skip(n(u'subgraph')) + maybe(id) + op_(u'{') + stmt_list + op_(u'}') >> unarg(SubGraph)) graph = (maybe(n(u'strict')) + maybe(n(u'graph') | n(u'digraph')) + maybe(id) + op_(u'{') + stmt_list + op_(u'}') >> unarg(Graph)) dotfile = graph + skip(finished) return dotfile.parse(seq)
lambda: lparen + expr + rparen ) # *Mark here are not really required, but if you are going to do # anything complex that requires that you discern between different # parsing paths, marks are often give you least hassle. expr = with_forward_decls( lambda: (number + pure(NumberMark) + expr_rest | paren_expr + pure(ParenMark) + expr_rest) >> make_expr) # This one allows us to add more complex expressions like function # application and ternary operators to the above definition with ease. # Otherwise terms such as `apply = expr lparen many(expr) rpanen` # would be impossible to add, always leading to infinite left recursion. expr_rest = maybe(op + expr) toplev = expr + skip(eof) @py.test.mark.parametrize("given, parser, expected", [ ("1", number, Number("1")), ("+", op, "+"), ("-", op, "-"), ("*", op, "*"), ("^", op, "^"), ]) def test_parse_primitives(given, parser, expected): data = parser.parse(list(tokenize(given))[:-1]) assert data == expected
return (n[0], n[1]) def make_object(n): return PLObject(n) def make_name(n): assert len(n) == 2 return PLName(n[0], n[1]) def make_array(n): return PLArray(n) number = sometok('number') >> make_number string = sometok('string') >> make_string comment = sometok('comment') >> make_comment name = (sometok('name') | sometok('string')) + maybe(comment) >> make_name value = fwd() member = name + op_('=') + value >> make_member section = maybe(comment) + oneplus(member + op_(';')) + maybe(comment) >> make_section object = ( op_('{') + many(section) + op_('}')) >> make_object array = ( op_('(') + many(value + op_(',')) + op_(')')) >> make_array value.define( object | number
def parse(seq): """Sequence(Token) -> object""" const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval def make_array(n): if n is None: return [] else: return [n[0]] + n[1] def make_object(n): return dict(make_array(n)) def make_number(n): try: return int(n) except ValueError: return float(n) def unescape(s): std = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } def sub(m): if m.group('standard') is not None: return std[m.group('standard')] else: return chr(int(m.group('unicode'), 16)) return re_esc.sub(sub, s) def make_string(n): return unescape(n[1:-1]) null = n('null') >> const(None) true = n('true') >> const(True) false = n('false') >> const(False) number = toktype('Number') >> make_number string = toktype('String') >> make_string value = forward_decl() member = string + op_(':') + value >> tuple object = ( op_('{') + maybe(member + many(op_(',') + member)) + op_('}') >> make_object) array = ( op_('[') + maybe(value + many(op_(',') + value)) + op_(']') >> make_array) value.define( null | true | false | object | array | number | string) json_text = object | array json_file = json_text + skip(finished) return json_file.parse(seq)
raise p.NoParseError("parsing failed", s) _not_parser.name = "!{}".format(parser.name) return _not_parser a = lambda value: p.some(lambda tok: tok.value == value) string = lambda s: p.some(lambda tok: tok.value.lower() == s.lower()).named(s) some_type = lambda t: p.some(lambda tok: tok.type == t).named(t) not_type = lambda t: p.some(lambda tok: tok.type != t).named("!{}".format(t)) any_type = p.some(lambda _: True).named("Any") eof = p.finished.named("") some = p.some maybe = lambda parser: p.maybe(parser).named("[{}]".format(parser.name)) many = lambda parser: p.many(parser).named("[{}]...".format(parser.name)) skip = lambda parser: p.skip(parser).named("") oneplus = lambda parser: p.oneplus(parser).named("{},[{}]...".format(parser.name, parser.name)) sparsed = lambda parser: (skip(many(not_parser(parser))) + parser).named("_{}".format(parser.name)) integer = (some_type(TokenType.Integer) >> to_i).named("I") number = ((some_type(TokenType.Integer) | some_type(TokenType.Float)) >> to_f).named("N") word = (some_type(TokenType.Word) >> to_s).named("W") mention = (some_type(TokenType.Mention) >> extract_mention_id).named("M") channel = (some_type(TokenType.Channel) >> extract_channel_id).named("C") emoji = (some_type(TokenType.Emoji) >> extract_emoji_id).named("E") snippet = (some_type(TokenType.Snippet) >> extract_snippet).named("S") # High level helpers on_off_switch = (string("on") >> const(True)) | (string("off") >> const(False))
def evaluate(expression, environment): """Evaluate an expression in the specified variable environment.""" # Well known functions const = lambda x: lambda _: x unarg = lambda f: lambda args: f(*args) # Semantic actions and auxiliary functions tokval = lambda tok: tok.value makeop = lambda s, f: op(s) >> const(f) sometok = lambda type: some(lambda tok: tok.type == type) def eval_name(s): try: return environment[s] # Case-sensitive except KeyError: raise ValueError('unbound variable: %s' % s) def make_number(s): try: return int(s) except ValueError: return float(s) def eval_expr(expr, op_expr_pairs): result = expr for op, expr in op_expr_pairs: result = op(result, expr) return result def eval_call(func_name, maybe_expr_and_exprs): if maybe_expr_and_exprs: expr, exprs = maybe_expr_and_exprs args = [expr] + exprs else: args = [] f = eval_name(func_name) if not callable(f): raise TypeError('variable is not callable: %s' % func_name) argcount = len(args) f_argcount = f.func_code.co_argcount if f_argcount != argcount: raise TypeError('%s takes %d arguments (%d given)' % (func_name, f_argcount, argcount)) return f(*args) # Primitives number = ( sometok('number') >> tokval >> make_number) raw_name = sometok('name') >> tokval name = raw_name >> eval_name op = lambda s: a(Token('op', s)) >> tokval op_ = lambda s: skip(op(s)) add = makeop('+', operator.add) sub = makeop('-', operator.sub) mul = makeop('*', operator.mul) div = makeop('/', operator.div) mul_op = mul | div add_op = add | sub # Means of composition expr = forward_decl() call = ( raw_name + op_('(') + maybe(expr + many(op_(',') + expr)) + op_(')') >> unarg(eval_call)) primary = ( number | call | name | op_('(') + expr + op_(')')) term = ( primary + many(mul_op + primary) >> unarg(eval_expr)) expr.define( term + many(add_op + term) >> unarg(eval_expr)) # Toplevel parsers toplevel = maybe(expr) + skip(finished) return toplevel.parse(tokenize(expression))
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id_types = ['Name', 'Number', 'String'] id = some(lambda t: t.type in id_types).named('id') >> tokval make_graph_attr = lambda args: DefAttrs('graph', [Attr(*args)]) make_edge = lambda x, xs, attrs: Edge([x] + xs, attrs) node_id = id # + maybe(port) a_list = ( id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) attr_stmt = ( (n('graph') | n('node') | n('edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_('=') + id >> make_graph_attr node_stmt = node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> # stmt -> subgraph -> stmt_list) subgraph = forward_decl() edge_rhs = skip(op('->') | op('--')) + (subgraph | node_id) edge_stmt = ( (subgraph | node_id) + oneplus(edge_rhs) + attr_list >> unarg(make_edge)) stmt = ( attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) subgraph.define( skip(n('subgraph')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(SubGraph)) graph = ( maybe(n('strict')) + maybe(n('graph') | n('digraph')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Graph)) dotfile = graph + skip(finished) return dotfile.parse(seq)
Return a parser that tries to parse p, and raises a CustomParseError when it fails. """ @Parser def _try_p(tokens, s): try: return p.run(tokens, s) except NoParseError as err: raise CustomParseError(msg, err) return _try_p # Grammar starts here test = with_forward_decls(lambda: choice([ or_test + maybe(IF + or_test + ELSE + test), lambdef])) not_test = with_forward_decls(lambda: (NOT + not_test) | comparison) and_test = sep_by(not_test, AND) or_test = sep_by(and_test, OR) test_nocond = with_forward_decls(lambda: or_test | lambdef_nocond) testlist = with_forward_decls(lambda: sep_by(test)) atom = with_forward_decls(lambda: choice([ between(yield_expr | testlist_comp, empty=True), between(testlist_comp, op_('['), op_(']'), empty=True), between(dictorsetmaker, op_('{'), op_('}'), empty=True), IDENTIFIER, NUMBER, STRING + many(STRING),
def sep_by(parser, sep=COMMA, trailing=True): """Parses one or more instances of p separated by sep.""" p = parser + many(sep + parser) if trailing: p += maybe(sep) return p
unseqString = tokenType('SequenceString') >> Make.unseqString # For use with variables colRowOperator = lambda s: a( Token( 'ColRowOperator', s ) ) relCROperator = lambda s: a( Token( 'RelCROperator', s ) ) pixelOperator = tokenType('PixelOperator') # Code variants code_begin = tokenType('CodeBegin') code_end = tokenType('CodeEnd') # Specifier specifier_basic = ( timing >> Make.specifierTiming ) | ( name >> Make.specifierState ) specifier_complex = ( name + skip( operator(':') ) + timing ) >> unarg( Make.specifierState ) specifier_state = specifier_complex | specifier_basic specifier_analog = number >> Make.specifierAnalog specifier_list = skip( parenthesis('(') ) + many( ( specifier_state | specifier_analog ) + skip( maybe( comma ) ) ) + skip( parenthesis(')') ) # Scan Codes scanCode_start = tokenType('ScanCodeStart') scanCode_range = number + skip( dash ) + number >> Make.scanCode_range scanCode_listElem = number >> Make.scanCode scanCode_specifier = ( scanCode_range | scanCode_listElem ) + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_innerList = many( scanCode_specifier + skip( maybe( comma ) ) ) >> flatten scanCode_expanded = skip( scanCode_start ) + scanCode_innerList + skip( code_end ) + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_elem = scanCode + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_combo = oneplus( ( scanCode_expanded | scanCode_elem ) + skip( maybe( plus ) ) ) scanCode_sequence = oneplus( scanCode_combo + skip( maybe( comma ) ) ) scanCode_single = ( skip( scanCode_start ) + scanCode_listElem + skip( code_end ) ) | scanCode scanCode_il_nospec = oneplus( ( scanCode_range | scanCode_listElem ) + skip( maybe( comma ) ) ) scanCode_nospecifier = skip( scanCode_start ) + scanCode_il_nospec + skip( code_end )
def parser(last_error=None): last_error = LastError() if last_error is None else last_error def apl(f): return lambda x: f(*x) def delim(t): return skip(_tok(t)) symbol = _tok(Token.SYMBOL) >> _gen(Symbol) string = _tok(Token.STRING) >> _gen(String) placeholder = _tok(Token.PLACEHOLDER) >> _gen(Placeholder) keyword = _tok(Token.KEYWORD) >> _gen(Keyword) # Note: tokenizer guarantee that value consists of dots and digits # TODO: convert exceptions number = _tok(Token.NUMBER) >> _gen(Number, literal_eval) expr = forward_decl() implicit_tuple = forward_decl() list_ = ( (_tok(Token.OPEN_BRACKET) + many(expr | keyword) + _tok(Token.CLOSE_BRACKET)) >> apl(_list) ) dict_ = ( error_ctx(_tok(Token.OPEN_BRACE) + many(keyword + expr) + _tok(Token.CLOSE_BRACE), last_error, DICT_ERROR) >> apl(_dict) ) inline_args = many(expr | keyword) explicit_tuple = ( error_ctx(_tok(Token.OPEN_PAREN) + symbol + inline_args + _tok(Token.CLOSE_PAREN), last_error, EXPLICIT_TUPLE_ERROR) >> apl(_tuple) ) indented_arg = ( oneplus(implicit_tuple | expr + delim(Token.NEWLINE)) >> _maybe_join ) indented_kwarg = ( ((keyword + expr + delim(Token.NEWLINE)) | (keyword + delim(Token.NEWLINE) + delim(Token.INDENT) + indented_arg + delim(Token.DEDENT))) ) indented_args_kwargs = ( (many(indented_kwarg) + many(indented_arg)) >> apl(lambda pairs, args: list(chain(*(pairs + [args])))) ) implicit_tuple.define( error_ctx(symbol + inline_args + delim(Token.NEWLINE) + maybe(delim(Token.INDENT) + indented_args_kwargs + delim(Token.DEDENT)), last_error, IMPLICIT_TUPLE_ERROR) >> apl(_implicit_tuple) ) expr.define(symbol | string | number | explicit_tuple | list_ | dict_ | placeholder) body = ( (many(implicit_tuple) + _tok(Token.EOF)) >> apl(_module) ) return body
application = forward_decl() renaming = forward_decl() set_context = forward_decl() # string expression strexpr = string + many(add_op + string) >> u(concatenate) # numerical expression numeric = application | lambda_ | number | name | (op_('(') + nexpr + op_(')')) factor = numeric + many(power + numeric) >> u(make_expression) term = factor + many(mul_op + factor) >> u(make_expression) nexpr.define( term + many(add_op + term) >> u(make_expression) ) # boolean expression proposition = (sexpr | strexpr | nexpr) pred = proposition + maybe(cmp_op + proposition) >> u(make_predicate) formula = true | false | pred | (op_('(') + bexpr + op_(')')) conjunction = formula + many(and_ + formula) >> u(make_expression) disjunction = conjunction + many(or_ + conjunction) >> u(make_expression) bexpr.define( maybe(not_) + disjunction >> make_boolean ) # set expression enumeration = op_('{') + maybe(expr + many(op_(',') + expr)) + op_('}') >> make_enum range_ = op_('{') + nexpr + op_(':') + nexpr + op_('}') >> u(make_range) set_ = op_('{') + expr + maybe(kw_('for') + set_context) + op_('}') >> u(make_set) sexpr.define( enumeration | range_ | set_ ) # anonymous function lambda_.define( op_('[') + maybe(name + many(op_(',') + name)) + op_(':') + expr + op_(']') >> make_lambda )
# =========== Expressions parser # FIXME: it should be rewritten using full Lua 5.2 grammar. BINARY_OPS = set("+-*/^%><") | {"..", "==", "~=", ">=", "<=", "and", "or"} UNARY_OPS = {"not", "-", "#"} binary_op = p.some(lambda t: t.value in BINARY_OPS) >> token_value unary_op = p.some(lambda t: t.value in UNARY_OPS) >> token_value # expressions with binary and unary ops + parenthesis @p.with_forward_decls def value(): single_value = table | tok_number | tok_string | tok_constant | iden return single_value | (close_rnd_brace + expr + open_rnd_brace) _term = value + p.skip(p.maybe(unary_op)) expr = _term + p.many(binary_op + _term) >> flat # [expression] _index_lookup = p.skip(close_sq_brace) + expr + p.skip(open_sq_brace) # foo=expr # [foo]=expr _key = iden | _index_lookup _keyvalue = expr + token("=") + _key # foo=expr, ["bar"]=42, _table_sep = token(",") | token(";") table_parameters = ( p.maybe(_table_sep) + # allow trailing comma/semicolon (_keyvalue | expr) +
dash = tokenType("Dash") plus = tokenType("Plus") content = tokenType("VariableContents") string = tokenType("String") >> make_string unString = tokenType("String") # When the double quotes are still needed for internal processing seqString = tokenType("SequenceString") >> make_seqString unseqString = tokenType("SequenceString") >> make_unseqString # For use with variables # Code variants code_end = tokenType("CodeEnd") # Scan Codes scanCode_start = tokenType("ScanCodeStart") scanCode_range = number + skip(dash) + number >> make_scanCode_range scanCode_listElem = number >> listElem scanCode_innerList = oneplus((scanCode_range | scanCode_listElem) + skip(maybe(comma))) >> flatten scanCode_expanded = skip(scanCode_start) + scanCode_innerList + skip(code_end) scanCode_elem = scanCode >> listElem scanCode_combo = oneplus((scanCode_expanded | scanCode_elem) + skip(maybe(plus))) scanCode_sequence = oneplus(scanCode_combo + skip(maybe(comma))) # USB Codes usbCode_start = tokenType("USBCodeStart") usbCode_number = number >> make_usbCode_number usbCode_range = (usbCode_number | unString) + skip(dash) + (number | unString) >> make_usbCode_range usbCode_listElemTag = unString >> make_usbCode usbCode_listElem = (usbCode_number | usbCode_listElemTag) >> listElem usbCode_innerList = oneplus((usbCode_range | usbCode_listElem) + skip(maybe(comma))) >> flatten usbCode_expanded = skip(usbCode_start) + usbCode_innerList + skip(code_end) usbCode_elem = usbCode >> listElem usbCode_combo = oneplus((usbCode_expanded | usbCode_elem) + skip(maybe(plus))) >> listElem
from __future__ import absolute_import import funcparserlib.parser as p string = lambda x: x or '' cat = ''.join negative = p.maybe(p.a('-')) >> string digits = p.oneplus(p.some(lambda char: char.isdigit())) >> cat decimal_part = (p.maybe(p.a('.') + digits)) >> string >> cat number = (negative + digits + decimal_part) >> cat >> float addition = number + p.skip(p.a('+')) + number >> sum expression = addition | number expression = expression + p.finished def calculate(text): return expression.parse(text)[0]
rest = parsed[1] rest.insert(0, first) return rest return ( parser + many(by + parser) ) >> append """ E.g. "An individual carrying two gain-of-function alleles or one functional allele and one gain-of-function allele" => [('gain-of-function', 'gain-of-function'), ('functional', 'gain-of-function')] """ _phenotype_genotype = ( skip( _ipattern(r'an', r'individual', r'carrying') ) + _separated( ( _two_alleles >> (lambda allele: ( allele, allele )) ) | ( _one_allele + skip(_ipattern(r'and')) + _one_allele ), skip(maybe(_ipattern(r'or'))) ) ) _parsers = { 'phenotype_genotype': _phenotype_genotype, } class ParserError(Exception): pass def parse(parser_name, string): """ Invoke the parser identified by ``parser_name`` on the provided string. Parsers: * **phenotype_genotype:**
return s element = forward_decl() elements = forward_decl() skip_of = skip_str('of') any_class = some(lambda x: re.match('[a-zA-Z0-9_]+', x.string)) >> to_simple_type set_parser = (a_str('set') + skip_of + element) >> compile_set_type list_parser = (a_str('list') + skip_of + element) >> compile_list_type dict_parser = (a_str('dict') + skip_of + skip_str('{') + element + skip_str(',') + element + skip_str('}')) >> \ compile_dict_type tuple_parser = (a_str('tuple') + skip_of + skip_str('(') + elements + skip_str(')')) >> compile_tuple_type element.define(set_parser | list_parser | dict_parser | tuple_parser | any_class) elements.define((many(element + skip_str(',')) + element) >> (lambda x: x[0] + [x[1]])) type_contract_parser = skip_str('(') + maybe(elements) + skip_str(')') + skip_str('->') + element docstring_description = many(some(lambda token: '>>>' not in token.line)) >> (lambda tokens: ' '.join(token.string for token in tokens)) docstring_doctest = many(some(lambda token: True)) >> (lambda tokens: ' '.join(token.string for token in tokens)) entire_docstring = maybe(type_contract_parser) + docstring_description +\ docstring_doctest + finished def parse_csc108_docstring(docstring): """Reads a docstring in the CSC108 format and extracts the argument types. @param str docstring: The docstring to read. @return: A parsed output of the docstring. """ output = list(generate_tokens(StringIO(docstring.strip()).readline))
def parse_instruction(spec): string = p.oneplus(sometok("string")) >> (lambda x: " ".join(x)) ordinal = sometok("ordinal") bowl = sometok("bowl") the = sometok("the") dish = sometok("dish") to = sometok("to") into = sometok("into") concat = lambda list: " ".join(list) take_i = sometok("take") + (p.oneplus(string) >> concat) + sometok("from") + sometok("refrigerator") put_i = ( sometok("put") + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + p.skip(into) + p.maybe(ordinal | the) + bowl ) liquefy_1 = sometok("liquefy") + sometok("contents") + p.maybe(ordinal) + bowl liquefy_2 = sometok("liquefy") + (p.oneplus(string) >> concat) liquefy_i = liquefy_1 | liquefy_2 pour_i = ( sometok("pour") + sometok("contents") + p.maybe(ordinal) + bowl + sometok("into") + the + p.maybe(ordinal) + dish ) fold_i = ( sometok("fold") + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + into + p.maybe(ordinal | the) + bowl ) # cleanup repitition add_i = sometok("add") + (p.oneplus(string) >> concat) + p.maybe(to + p.maybe(ordinal | the) + bowl) remove_i = ( sometok("remove") + (p.oneplus(string) >> concat) + p.maybe(sometok("from") + p.maybe(ordinal | the) + bowl) ) combine_i = sometok("combine") + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal | the) + bowl) divide_i = sometok("divide") + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal | the) + bowl) add_dry_i = sometok("add_dry") + p.maybe(to + p.maybe(ordinal | the) + bowl) stir_1 = ( sometok("stir") + p.maybe(the + p.maybe(ordinal | the) + bowl) + sometok("for") + sometok("number") + (sometok("minute") | sometok("minutes")) ) stir_2 = sometok("stir") + (p.oneplus(string) >> concat) + into + the + p.maybe(ordinal) + bowl stir_i = stir_1 | stir_2 mix_i = sometok("mix") + p.maybe(the + p.maybe(ordinal) + bowl) + sometok("well") clean_i = sometok("clean") + p.maybe(ordinal | the) + bowl loop_start_i = (sometok("string") + p.maybe(the) + (p.oneplus(string) >> concat)) >> (lambda x: ("loop_start", x)) loop_end_i = ( sometok("string") + p.maybe(p.maybe(the) + (p.oneplus(string) >> concat)) + sometok("until") + string ) >> (lambda x: ("loop_end", x)) set_aside_i = sometok("set") >> (lambda x: (x, None)) serve_with_i = sometok("serve_with") + (p.oneplus(string) >> concat) refrigerate_i = sometok("refrigerate") + p.maybe( sometok("for") + sometok("number") + (sometok("hour") | sometok("hours")) ) instruction = ( take_i | put_i | liquefy_i | pour_i | add_i | fold_i | remove_i | combine_i | divide_i | add_dry_i | stir_i | mix_i | clean_i | loop_end_i # -| ORDER matters | loop_start_i # -| | set_aside_i | serve_with_i | refrigerate_i ) >> (lambda x: Instruction(x[0].lower().replace(" ", "_"), x[1:])) return instruction.parse(tokenize_instruction(spec))
def parse(seq): """Sequence(Token) -> object""" id_tokens = ['Name', 'IPAddr', 'Number', 'String'] tokval = lambda x: x.value op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) _id = some(lambda t: t.type in id_tokens) >> tokval keyword = lambda s: a(Token('Name', s)) >> tokval def make_peer(first, edge_type, second, followers, attrs): edges = [Edge(first, edge_type, second, attrs)] from_node = second for edge_type, to_node in followers: edges.append(Edge(from_node, edge_type, to_node, attrs)) from_node = to_node return Peer(edges) def make_route(first, edge_type, second, followers, attrs): edges = [Edge(first, edge_type, second, attrs)] from_node = second for edge_type, to_node in followers: edges.append(Edge(from_node, edge_type, to_node, attrs)) from_node = to_node return Route(edges) # # parts of syntax # node_list = ( _id + many(op_(',') + _id) >> create_mapper(oneplus_to_list) ) option_stmt = ( _id + maybe(op_('=') + _id) >> create_mapper(Attr) ) option_list = ( maybe(op_('[') + option_stmt + many(op_(',') + option_stmt) + op_(']')) >> create_mapper(oneplus_to_list, default_value=[]) ) # node statement:: # A; # B [attr = value, attr = value]; # node_stmt = ( _id + option_list >> create_mapper(Node) ) # peer network statement:: # A -- B; # edge_stmt = ( _id + op('--') + _id + many(op('--') + _id) + option_list >> create_mapper(make_peer) ) # attributes statement:: # default_shape = box; # default_fontsize = 16; # attribute_stmt = ( _id + op_('=') + _id >> create_mapper(Attr) ) # extension statement (class, plugin):: # class red [color = red]; # plugin attributes [name = Name]; # extension_stmt = ( (keyword('class') | keyword('plugin')) + _id + option_list >> create_mapper(Extension) ) # group statement:: # group { # A; # } # group_inline_stmt = ( attribute_stmt | node_stmt ) group_inline_stmt_list = ( many(group_inline_stmt + skip(maybe(op(';')))) ) group_stmt = ( skip(keyword('group')) + maybe(_id) + op_('{') + group_inline_stmt_list + op_('}') >> create_mapper(Group) ) # network statement:: # network { # A; # } # network_inline_stmt = ( attribute_stmt | group_stmt | node_stmt ) network_inline_stmt_list = ( many(network_inline_stmt + skip(maybe(op(';')))) ) network_stmt = ( skip(keyword('network')) + maybe(_id) + op_('{') + network_inline_stmt_list + op_('}') >> create_mapper(Network) ) # route statement:: # route { # A -> B -> C; # } # route_inline_stmt = ( _id + op_('->') + _id + many(op_('->') + _id) + option_list >> create_mapper(make_route) ) route_stmt = ( skip(keyword('route')) + maybe(_id) + op_('{') + network_inline_stmt_list + op_('}') >> create_mapper(Network) ) # # diagram statement:: # nwdiag { # A; # } # diagram_id = ( (keyword('diagram') | keyword('nwdiag')) + maybe(_id) >> list ) diagram_inline_stmt = ( extension_stmt | network_stmt | group_stmt | attribute_stmt | route_stmt | edge_stmt | node_stmt ) diagram_inline_stmt_list = ( many(diagram_inline_stmt + skip(maybe(op(';')))) ) diagram = ( maybe(diagram_id) + op_('{') + diagram_inline_stmt_list + op_('}') >> create_mapper(Diagram) ) dotfile = diagram + skip(finished) return dotfile.parse(seq)