def parse(seq): 'Sequence(Token) -> object' unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) date = some(lambda s: a(Token('Date', s))).named('date') >> tokval id = some(lambda t: t.type in ['Name', 'Number', 'String']).named( 'id') >> tokval make_chart_attr = lambda args: DefAttrs(u'chart', [Attr(*args)]) node_id = id # + maybe(port) term = date + op_('-') + date value = (id | term | date) a_list = (id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = (many(op_('[') + many(a_list) + op_(']')) >> flatten) chart_attr = id + (op_('=') | op_(':')) + value >> make_chart_attr node_stmt = node_id + attr_list >> unarg(Node) stmt = (chart_attr | node_stmt) stmt_list = many(stmt + skip(maybe(op(';')))) chart = (maybe(n('diagram')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Chart)) dotfile = chart + skip(finished) return dotfile.parse(seq)
def get_number_parser(): """Return parser that reads (float and int) numbers with whitespace.""" number = (parser.some(lambda tok: tok.type == 'NUMBER') >> token_value >> string_to_number) indent = parser.some(lambda t: t.code == token.INDENT) dedent = parser.a(Token(token.DEDENT, '')) newline = parser.a(Token(54, '\n')) ignored_whitespace = parser.skip(indent | dedent | newline) return parser.oneplus(number | ignored_whitespace)
def parse(seq): 'Sequence(Token) -> object' const = lambda x : lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval #css_text = #css = skip(finished) number = some(lambda tok: tok.type == 'NUMBER') >> tokval >> int return type(number.parse(seq))
def parse_expression(expression): """Parse an expression that appears in an execution node, i.e. a block delimited by ``{% %}``. This can be a compound expression like a ``for`` statement with several sub-expressions, or it can just be a single statement such as ``endif``. :param list expression: Tokenised expression. """ from funcparserlib.parser import a, skip, some # For if expressions, we rely on the Python parser to process the # expression rather than using our own parser. if expression[0] == 'if': return IfNode(ast.parse(' '.join(expression[1:]), mode="eval")) variable_name = some(lambda x: re.match(r'[a-zA-Z_]+', x)) # TODO We use the same function twice, first to match the token # and then to extract the value we care about from the token # (namely the contents of the quoted string). This smells wrong. def extract_quoted_string(x): result = re.match(r'\"([^\"]*)\"', x) if result: return result.groups(1) quoted_string = some(extract_quoted_string) for_expression = (skip(a('for')) + (variable_name >> str) + skip(a('in')) + (variable_name >> str)) extends_expression = (skip(a('extends')) + (quoted_string >> extract_quoted_string)) block_expression = (skip(a('block')) + (variable_name >> str)) def make_for_node(x): return ForNode(*x) def make_extends_node(x): return ExtendsNode(*x) parser = ((for_expression >> make_for_node) | (extends_expression >> make_extends_node) | (block_expression >> BlockNode)) try: return parser.parse(expression) except funcparserlib.parser.NoParseError as e: raise Exception("Invalid expression '%s'" % expression)
def parse(seq): 'Sequence(Token) -> object' tokval = lambda x: x.value op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id = some(lambda t: t.type in ['Name', 'Number', 'Color', 'String']).named( 'id') >> tokval date = some(lambda t: t.type == 'Date').named('date') >> tokval make_node = lambda args: Node(*args) node_stmt = id + op_(':') + date + maybe(op_('-') + date) >> make_node chart = (many(node_stmt + skip(maybe(op(';')))) >> Chart) dotfile = chart + skip(finished) return dotfile.parse(seq)
def parse_identifier(line: str): """Parses just the identifer (first element) of the write""" tokval = lambda t: t.value joinval = "".join someToken = lambda type: some(lambda t: t.type == type) char = someToken('Char') >> tokval space = someToken('Space') >> tokval comma = someToken('Comma') >> tokval quote = someToken('Quote') >> tokval escape = someToken('Escape') >> tokval equal = someToken('Equal') >> tokval escape_space = skip(escape) + space >> joinval escape_comma = skip(escape) + comma >> joinval escape_equal = skip(escape) + equal >> joinval escape_escape = skip(escape) + escape >> joinval plain_int_text = someToken('Int') >> tokval plain_float_text = someToken('Float') >> tokval identifier = many(char | plain_float_text | plain_int_text | escape_space | escape_comma | escape_equal | escape_escape | plain_int_text | quote) >> joinval toplevel = identifier >> (lambda x: x) parsed = toplevel.parse(LineTokenizer.tokenize(line)) if len(parsed) == 0: raise NoParseError('parsed nothing') else: return parsed
def parse(seq): """Returns the AST of the given token sequence.""" global depth unarg = lambda f: lambda x: f(*x) tokval = lambda x: x.value # returns the value of a token toktype = lambda t: some(lambda x: x.type == t) >> tokval # checks type of token paren = lambda s: a(Token('Parentheses', s)) >> tokval # return the value if token is Op paren_ = lambda s: skip(paren(s)) # checks if token is Op and ignores it def application(z, list): return reduce(lambda s, x: Application(s, x), list, z) depth = 0 variable = lambda x: Variable(str(x)+":"+str(depth)) def abstraction(x): global depth abst = Abstraction(str(x[0])+":"+str(depth), x[1]) depth += 1 return abst variable = toktype('Name') >> variable term = variable | with_forward_decls(lambda: paren_('(') + exp + paren_(')')) | \ with_forward_decls(lambda: skip(toktype('Lambda')) + toktype('Name') + \ skip(toktype('Dot')) + exp >> abstraction) exp = term + many(term) >> unarg(application) return exp.parse(seq)
def parse(sequence, query): tokval = lambda x: x.value toktype = lambda t: (some(lambda x: x.type == t).named('(type %s)' % t) >> tokval) operation = lambda s: a(Token('Op', s)) >> tokval operation_ = lambda s: skip(operation(s)) create_param = lambda param_name: query.get_aliased_param(param_name) make_and = lambda params: And(params[0], params[1]) make_or = lambda params: Or(params[0], params[1]) make_not = lambda inner: Not(inner) word = toktype('Word') inner_bracket = forward_decl() left_of_and = forward_decl() right_of_and = forward_decl() left_of_or = forward_decl() not_ = forward_decl() bracket = operation_('(') + inner_bracket + operation_(')') and_ = left_of_and + operation_('&') + right_of_and >> make_and or_ = left_of_or + operation_('|') + inner_bracket >> make_or param = word >> create_param not_.define(operation_('!') + (bracket | param)) not_ = not_ >> make_not left_of_or.define(and_ | bracket | not_ | param) left_of_and.define(bracket | not_ | param) right_of_and.define(left_of_and) inner_bracket.define(or_ | and_ | bracket | not_ | param) definition = (bracket | inner_bracket) + finished return definition.parse(sequence)
def unpack(kind): "Parse an unpacking form, returning it unchanged." return some(lambda x: isinstance(x, HyExpression) and len(x) > 0 and isinstance(x[0], HySymbol) and x[0] == "unpack-" + kind)
def parse(seq): 'Sequence(Token) -> object' unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) value_flatten = lambda l: sum([[l[0]]] + list(l[1:]), []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id = some(lambda t: t.type in ['Name', 'Number', 'Color', 'String']).named('id') >> tokval make_chart_attr = lambda args: DefAttrs(u'chart', [Attr(*args)]) node_id = id # + maybe(port) pair = ( op_('(') + id + skip(maybe(op(','))) + id + op_(')') >> tuple) value = (id | pair) value_list = ( value + many(op_(',') + value) >> value_flatten) a_list = ( id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) chart_attr = id + (op_('=') | op_(':')) + value_list >> make_chart_attr node_stmt = node_id + attr_list >> unarg(Node) stmt = ( chart_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) chart_type = ( n('p') | n('pie') | n('piechart') | n('p3') | n('pie3d') | n('piechart_3d') | n('lc') | n('line') | n('linechart') | n('lxy') | n('linechartxy') | n('bhs') | n('holizontal_barchart') | n('bvs') | n('vertical_barchart') | n('bhg') | n('holizontal_bargraph') | n('bvg') | n('vertical_bargraph') | n('v') | n('venn') | n('venndiagram') | n('s') | n('plot') | n('plotchart') ) chart = ( chart_type + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Chart)) dotfile = chart + skip(finished) return dotfile.parse(seq)
def int_range(low, high): def predicate(token): if token.type != TokenType.Integer: return False return to_i(token) in range(low, high + 1) return p.some(predicate) >> to_i
def parse(seq): """Returns the AST of the given token sequence.""" def eval_expr(z, list): return reduce(lambda s, (f, x): f(s, x), list, z) unarg = lambda f: lambda x: f(*x) const = lambda x: lambda _: x # like ^^^ in Scala tokval = lambda x: x.value # returns the value of a token op = lambda s: a(Token('Op', s)) >> tokval # return the value if token is Op op_ = lambda s: skip(op(s)) # checks if token is Op and ignores it toktype = lambda t: some(lambda x: x.type == t) >> tokval # checks type of token def lst(h,t): return [h,] + t makeop = lambda s, f: op(s) >> const(f) or_op = makeop('|', Or) char = with_forward_decls(lambda: toktype('Char') >> Char | op_('(') + exp + op_(')')) star = char + op_('*') >> Star | char lst2_exp = star + many(star) >> unarg(lst) lst_exp = lst2_exp >> Lst exp = lst_exp + many(or_op + lst_exp) >> unarg(eval_expr) return exp.parse(seq)
def tok(type, name=None): """Parse a single token. :type type: unicode :type name: unicode or None """ return some(lambda t: t.type == type and (name is None or t.name == name))
def token_type(tok_type): """ Get a parser matching a certain type of tokens :param tok_type: predefined token type to be matched :return: a parser that matches token of type `tok_type` """ return some(lambda tok: tok.type == tok_type)
def notpexpr(*disallowed_heads): """Parse any object other than a HyExpression beginning with a HySymbol equal to one of the disallowed_heads.""" return some(lambda x: not ( isinstance(x, HyExpression) and x and isinstance(x[0], HySymbol) and x[0] in disallowed_heads))
def parse(tokens): t = lambda s: some(lambda tok: tok.type == s) inttype = t('Int') chartype = t('Char') unsignedtype = t('Unsigned') name = t('Name') star = t('Star') void = t('Void') lpar = skip(t('LPar')) rpar = skip(t('RPar')) comma = skip(t('Comma')) semicolon = skip(t('SemiColon')) def collapse(x): bp() if len(x[1]) > 0: # TODO: handle multiple stars return Token("UserTypePointer", x[0].value + " " + x[1][0].value) else: return Token("UserType", x[0].value) def make_func(x): return Token('Function', x.value) def make_type(x): if len(x) == 3: return Token("UnsignedTypePointer", x) elif len(x) == 2: if x[0].type == "Unsigned": return Token("UnsignedType", x) else: return Token("TypePointer", x) else: return Token("Type", x) udt = name + many(star) >> collapse prim = (inttype | chartype | unsignedtype + inttype | unsignedtype + chartype ) + many(star) >> make_type voidptr = void + star + many(star) func = name >> make_func accepted_types = voidptr | prim | udt # Return Type rettype = void | accepted_types # Argument List decl = accepted_types + name decl_list = decl + many(comma + decl) arg_list = void | decl_list func_decl = rettype + func + lpar + arg_list + rpar + semicolon return func_decl.parse(tokens)
def a(value: str) -> "Parser[Token, Token]": """Eq(a) -> Parser(a, a) Returns a parser that parses a token that is equal to the value value. """ def _is_value_eq(token: Token) -> bool: return token.value == value return some(_is_value_eq).named(f'(a "{value}")')
def read_model(filename): # Slurp file with open(filename) as file: character_lines = file.read().splitlines() # Tokenize token_lines = [] for line in character_lines: tokens_line = token_phase(line) if tokens_line: # Keep only non-blank lines token_lines.append(tokens_line) class Section(aenum.AutoNumberEnum): options = () components = () active_section = Section.components components = [] tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval header = op_("%") + toktype("name") for line in token_lines: maybe_section_name = test_all(header, line) if isinstance(maybe_section_name, str): # Header line encountered if maybe_section_name == "options": active_section = Section.options elif maybe_section_name == "components": active_section = Section.components else: raise ValueError() # TODO (drhagen): better error continue if active_section == Section.options: pass elif active_section == Section.components: component_i = test_all(component, line) if isinstance(component_i, NoParseError): raise component_i # TODO (drhagen): collect all errors are report them at once components.append(component_i) else: raise ValueError() # Unreachable parts, events = collapse_components(components) new_model = Model(parts, events) return new_model
def create_parser(): # operator: '~=' | '>=' | '<=' | '<' | '>' | '=' operator = some(lambda tok: tok.type == 'CMP') >> choose_class # value: STRING | WORD word = some(lambda tok: tok.type == 'WORD') >> Text string = some(lambda tok: tok.type == 'STRING') >> QuotedText value = string | word # function: WORD '(' ')' open_brace = skip(a(Token('BR', '('))) close_brace = skip(a(Token('BR', ')'))) function = word + open_brace + close_brace >> Function # field_expr: WORD operator value fieldexpr = (word + operator + (function | value)) >> (lambda x: x[1] ([x[0], x[2]])) OR = a(Token('OP', 'OR')) >> choose_class AND = a(Token('OP', 'AND')) >> choose_class def eval(data): arg1, lst = data for f, arg2 in lst: arg1 = f([arg1, arg2]) return arg1 def eval(data): lft, args = data return reduce(lambda arg1, (f, arg2): f([arg1, arg2]), args, lft) expr = forward_decl() basexpr = open_brace + expr + close_brace | fieldexpr andexpr = (basexpr + many(AND + basexpr)) >> eval orexpr = (andexpr + many(OR + andexpr)) >> eval expr.define(orexpr) return expr
def _parse(seq): const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token(u'Op', s)) >> tokval op_ = lambda s: skip(op(s)) def make_string(args): context, value = args if not context: context = 'any:' return String(unescape_str(value[1:-1]), context[:-1]) def make_regex(args): context, value = args value, modifiers = value.rsplit('/', 1) value = value[1:] if not context: context = 'any:' return Regex(unescape_regex(value), modifiers, context[:-1]) def make_or(args): return Or(*args) def make_and(args): return And(*args) def make_not(x): return Not(x) context = maybe(toktype(u'Prefix')) string = (context + toktype(u'String')) >> make_string regex = (context + toktype(u'Regex')) >> make_regex par_term = forward_decl() simple_term = forward_decl() term = forward_decl() not_term = forward_decl() and_term = forward_decl() or_term = forward_decl() par_term.define(op_(u'(') + term + op_(u')')) simple_term.define(par_term | string | regex) not_term.define(op_('not') + not_term >> make_not | simple_term) and_term.define(not_term + op_('and') + and_term >> make_and | not_term) or_term.define(and_term + op_('or') + or_term >> make_or | and_term) term.define(or_term) eof = skip(toktype(u'EOF')) filter_expr = (term + eof) | (eof >> const(Any())) return filter_expr.parse(seq)
def parse(constraints): """Using funcparserlib turn constraints into a mongo query NOTE: this uses functors, see: http://spb-archlinux.ru/2009/funcparserlib/Tutorial """ tokval = lambda tok: tok.value char = lambda tok: tok.code == 'CHAR' chars = some(char) >> tokval operator = lambda s: a(Token('OP', s)) >> tokval const = lambda x: lambda _: x makeop = lambda s: operator(s) >> const(s) item = many(chars) >> (lambda x: ''.join(x)) test1 = item.parse(tokenize('hello123')) assert test1 == 'hello123' test1b = item.parse(tokenize('42a')) assert test1b == '42a' test1c = item.parse(tokenize('cam-oeprod-123299-master')) assert test1c == 'cam-oeprod-123299-master' test1d = item.parse(tokenize('Hello world')) assert test1d == 'Hello world' equals = makeop('=') assert equals.parse(tokenize('=')) == '=' slash = makeop('/') value = item >> possible_int term = (item + equals + value) >> (lambda x: (x[0], x[2])) test2 = term.parse(tokenize('dut=catgut')) assert test2 == ('dut', 'catgut') endmark = a(Token('END', '')) seq = (many(((slash + term) >> (lambda x: x[1]))) >> dict) top = (seq + endmark) >> (lambda x: x[0]) test3 = seq.parse( tokenize('/dut=catgut/foo=bar/n=30/bet=a42a/message=Hello World')) assert test3 == { 'dut': 'catgut', 'foo': 'bar', 'n': 30, 'message': 'Hello World', 'bet': 'a42a' } test4 = seq.parse(tokenize('/suppress=,bar')) assert test4 == {'suppress': ',bar'} lexemes = tokenize(constraints) return top.parse(lexemes)
def parse(seq): """ Parses the list of tokens and generates an AST. """ def eval_expr(z, list): return reduce(lambda s, (f, x): f(s, x), list, z) unarg = lambda f: lambda x: f(*x) tokval = lambda x: x.value # returns the value of a token toktype = lambda t: some(lambda x: x.type == t) >> tokval # checks type of token const = lambda x: lambda _: x # like ^^^ in Scala op = lambda s: a(Token('Op', s)) >> tokval # return the value if token is Op op_ = lambda s: skip(op(s)) # checks if token is Op and ignores it lst = lambda x: [x[0],] + x[1] tup = lambda x: (x[0], x[1]) makeop = lambda s, f: op(s) >> const(f) add = makeop('+', Add) sub = makeop('-', Sub) mul = makeop('*', Mul) div = makeop('/', Div) lt = makeop('<', Lt) gt = makeop('>', Gt) eq = makeop('=', Eq) operation = add | sub | mul | div | lt | gt | eq decl = with_forward_decls(lambda:toktype('Var') + op_('=') + (exp | fun) >> tup) decls = decl + many(skip(toktype('Semicolon')) + decl) >> lst variable = toktype('Var') >> Variable variables = variable + many(skip(toktype('Comma')) + variable) >> lst fun = with_forward_decls(lambda: skip(toktype('Fun')) + variables + skip(toktype('Arrow')) + exp + skip(toktype('End'))) >> unarg(Fun) parameters = with_forward_decls(lambda: exp + many(skip(toktype('Comma')) + exp) >> lst) call = skip(toktype('Call')) + (fun | variable) + skip(toktype('Lp')) + parameters + skip(toktype('Rp')) >> unarg(Call) ex = with_forward_decls(lambda:variable | toktype('Number') >> (lambda x: Const(int(x))) |\ toktype('True') >> (lambda x: Const(True)) | toktype('False') >> (lambda x: Const(False)) |\ skip(toktype('Let')) + decls + skip(toktype('In')) + exp + skip(toktype('End')) >> unarg(Let) |\ skip(toktype('If')) + exp + skip(toktype('Then')) + exp + maybe(skip(toktype('Else')) + exp) + skip(toktype('Fi')) >> unarg(If) |\ fun | call) exp = ex + many(operation + ex) >> unarg(eval_expr) prog = skip(toktype('Prog')) + exp >> Prog return prog.parse(seq)
def test_error_info(self): tokenize = make_tokenizer([ ('keyword', (r'(is|end)',)), ('id', (r'[a-z]+',)), ('space', (r'[ \t]+',)), ('nl', (r'[\n\r]+',)), ]) try: list(tokenize('f is ф')) except LexerError as e: self.assertEqual(str(e), 'cannot tokenize data: 1,6: "f is \u0444"') else: self.fail('must raise LexerError') sometok = lambda type: some(lambda t: t.type == type) keyword = lambda s: a(Token('keyword', s)) id = sometok('id') is_ = keyword('is') end = keyword('end') nl = sometok('nl') equality = id + skip(is_) + id >> tuple expr = equality + skip(nl) file = many(expr) + end msg = """\ spam is eggs eggs isnt spam end""" toks = [x for x in tokenize(msg) if x.type != 'space'] try: file.parse(toks) except NoParseError as e: self.assertEqual(e.msg, "got unexpected token: 2,11-2,14: id 'spam'") self.assertEqual(e.state.pos, 4) self.assertEqual(e.state.max, 7) # May raise KeyError t = toks[e.state.max] self.assertEqual(t, Token('id', 'spam')) self.assertEqual((t.start, t.end), ((2, 11), (2, 14))) else: self.fail('must raise NoParseError')
def test_error_info(self): tokenize = make_tokenizer([ ('keyword', (r'(is|end)',)), ('id', (r'[a-z]+',)), ('space', (r'[ \t]+',)), ('nl', (r'[\n\r]+',)), ]) try: list(tokenize('f is ф')) except LexerError as e: self.assertEqual(six.text_type(e), 'cannot tokenize data: 1,6: "f is \u0444"') else: self.fail('must raise LexerError') sometok = lambda type: some(lambda t: t.type == type) keyword = lambda s: a(Token('keyword', s)) id = sometok('id') is_ = keyword('is') end = keyword('end') nl = sometok('nl') equality = id + skip(is_) + id >> tuple expr = equality + skip(nl) file = many(expr) + end msg = """\ spam is eggs eggs isnt spam end""" toks = [x for x in tokenize(msg) if x.type != 'space'] try: file.parse(toks) except NoParseError as e: self.assertEqual(e.msg, "got unexpected token: 2,11-2,14: id 'spam'") self.assertEqual(e.state.pos, 4) self.assertEqual(e.state.max, 7) # May raise KeyError t = toks[e.state.max] self.assertEqual(t, Token('id', 'spam')) self.assertEqual((t.start, t.end), ((2, 11), (2, 14))) else: self.fail('must raise NoParseError')
def parse(tokens): '''Parses an SQL date range. Parses a list of Token object to see if it's a valid SQL clause meeting the following conditions: An optional sequence of ANDed simple conditions ANDed with an optional sequence of ORed complex condtions. Where a simple condition is a date unit, a sign, and a date value. And a complex condition is any legal SQL combination of simple conditions ANDed or ORed together. Date unit: YYYY, MM, DD, HH, MIN Sign: <, <=, =, >=, > Date value: any integer value, with an optional leading zero Returns: True if the tokens reprsent a valid SQL date range, False otherwise. ''' try: left_paren = some(lambda t: t.value in '(') right_paren = some(lambda t: t.value in ')') oper = some(lambda t: t.value in SIGNS) unit = some(lambda t: t.value in UNITS) padded_num = some(lambda t: t.code == 2) + some( lambda t: t.code == 2) # hmmm, better way??? raw_num = some(lambda t: t.code == 2) num = padded_num | raw_num cond = unit + oper + num endmark = a(Token(token.ENDMARKER, '')) end = skip(endmark + finished) ands = maybe(cond + maybe(many(a(Token(token.NAME, 'AND')) + cond))) or_ands = left_paren + ands + right_paren ors_without_ands = or_ands + maybe( many(a(Token(token.NAME, 'OR')) + or_ands)) ors_with_ands = (a(Token(token.NAME, 'AND')) + left_paren + or_ands + maybe(many(a(Token(token.NAME, 'OR')) + or_ands)) + right_paren) ors = maybe(ors_without_ands | ors_with_ands) full = left_paren + ands + ors + right_paren + end full.parse(tokens) except NoParseError: return False except TokenError: return False return True
def parse(tokens): var = some(toktype("name")) | some(toktype("number")) open_form = some(toktype("form_open")) close_form = some(toktype("form_close")) op_lambda = some(toktype("op_lambda")) op_map = some(toktype("op_map")) prim_bind = some(toktype("kw_bind")) prim_halt = some(toktype("kw_halt")) exp = with_forward_decls(lambda: lam | var | prim_exp | exprn) >> Expression lam = open_form + op_lambda + many(var) + op_map + oneplus(exp) + close_form >> Lambda bind_exp = open_form + prim_bind + var + lam + close_form halt_exp = open_form + prim_halt + exp + close_form prim_exp = bind_exp | halt_exp exprn = open_form + oneplus(exp) + close_form >> Form prog = many(exp) + skip(finished) >> Program return prog.parse(tokens)
def parse(constraints): """Using funcparserlib turn constraints into a mongo query NOTE: this uses functors, see: http://spb-archlinux.ru/2009/funcparserlib/Tutorial """ tokval = lambda tok: tok.value char = lambda tok: tok.code == 'CHAR' chars = some( char )>>tokval operator = lambda s: a(Token('OP',s)) >> tokval const = lambda x : lambda _: x makeop = lambda s: operator(s) >> const(s) item = many (chars) >> ( lambda x: ''.join(x)) test1 = item.parse(tokenize('hello123')) assert test1 == 'hello123' test1b = item.parse(tokenize('42a')) assert test1b == '42a' test1c = item.parse(tokenize('cam-oeprod-123299-master')) assert test1c == 'cam-oeprod-123299-master' test1d = item.parse(tokenize('Hello world')) assert test1d == 'Hello world' equals = makeop('=') assert equals.parse(tokenize('=')) == '=' slash = makeop('/') value = item >> possible_int term = (item + equals + value) >> (lambda x: (x[0], x[2])) test2 = term.parse(tokenize('dut=catgut')) assert test2 == ('dut','catgut') endmark = a(Token('END', '')) seq = (many( ((slash + term) >> (lambda x: x[1])) ) >> dict) top = (seq + endmark) >> (lambda x: x[0]) test3 = seq.parse(tokenize( '/dut=catgut/foo=bar/n=30/bet=a42a/message=Hello World')) assert test3 == {'dut': 'catgut', 'foo': 'bar', 'n': 30, 'message':'Hello World', 'bet': 'a42a'} test4 = seq.parse(tokenize('/suppress=,bar')) assert test4 == {'suppress': ',bar'} lexemes = tokenize(constraints) return top.parse(lexemes)
def _parse_rule(seq): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval sep = lambda s: a(Token(u'Sep', s)) >> tokval s_sep = lambda s: skip(sep(s)) level = toktype(u'Level') comparator = toktype(u'Comparator') >> COMPARATORS.get number = toktype(u'Number') >> float historical = toktype(u'Historical') unit = toktype(u'Unit') operator = toktype(u'Operator') logical_operator = toktype(u'LogicalOperator') >> LOGICAL_OPERATORS.get exp = comparator + ( (number + maybe(unit)) | historical) + maybe(operator + number) rule = (level + s_sep(':') + exp + many(logical_operator + exp)) overall = rule + skip(finished) return overall.parse(seq)
def parse1(tokens): """ Experimenting with collapsing part of the parse tree to make it easier to work with. """ t = lambda s: some(lambda tok: tok.type == s) name = t('Name') star = t('Star') def collapse(x): if len(x[1]) > 0: # TODO: handle multiple stars return Token("UserTypePointer", x[0].value + " " + x[1].value) else: return Token("UserType", x[0].value) udt = name + many(star) >> collapse return udt.parse(tokens)
def parse(source): task = Task() get_value = lambda x: x.value value_of = lambda t: some(lambda x: x.type == t) >> get_value keyword = lambda s: skip(value_of(s)) make_rule = lambda x: task.add_rule(Rule(**{x[0]: x[1][1:-1]})) set_root = lambda value: task.set_root_dir(value[1:-1]) set_mask = lambda value: task.set_mask(value[1:-1]) root = keyword('In') + value_of('Value') >> set_root mask = keyword('With') + value_of('Value') >> set_mask rule = keyword('Set') + \ value_of('Attribute') + \ keyword('Equals') + \ value_of('Value') \ >> make_rule parser = maybe(mask) + root + many(rule) parser.parse(source) return task
def create_grammar(): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval null = n('null') true = n('true') false = n('false') number = toktype('Number') string = toktype('String') value = forward_decl() member = string + op_(':') + value object_ = (op_('{') + maybe(member + many(op_(',') + member)) + op_('}')) array = (op_('[') + maybe(value + many(op_(',') + value)) + op_(']')) value.define(null | true | false | object_ | array | number | string) json_text = object_ | array json_file = json_text + skip(finished) return json_file
def _parse_rule(seq): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval sep = lambda s: a(Token(u'Sep', s)) >> tokval s_sep = lambda s: skip(sep(s)) level = toktype(u'Level') comparator = toktype(u'Comparator') >> COMPARATORS.get number = toktype(u'Number') >> float historical = toktype(u'Historical') unit = toktype(u'Unit') operator = toktype(u'Operator') logical_operator = toktype(u'LogicalOperator') >> LOGICAL_OPERATORS.get exp = comparator + ((number + maybe(unit)) | historical) + maybe(operator + number) rule = ( level + s_sep(':') + exp + many(logical_operator + exp) ) overall = rule + skip(finished) return overall.parse(seq)
def parse(seq): """Returns the AST of the given token sequence.""" def eval_expr(z, list): return reduce(lambda s, (f, x): f(s, x), list, z) unarg = lambda f: lambda x: f(*x) const = lambda x: lambda _: x # like ^^^ in Scala tokval = lambda x: x.value # returns the value of a token op = lambda s: a(Token('Op', s)) >> tokval # return the value if token is Op op_ = lambda s: skip(op(s)) # checks if token is Op and ignores it toktype = lambda t: some(lambda x: x.type == t) >> tokval # checks type of token def lst(h,t): return [h,] + t call = lambda x: Call(x[0], x[1]) makeop = lambda s, f: op(s) >> const(f) add = makeop('+', Plus) sub = makeop('-', Minus) mul = makeop('*', Times) div = makeop('/', Div) def make_const(i): return const(int(i)) number = toktype('Number') >> Const mul_op = mul | div add_op = add | sub factor = with_forward_decls(lambda: number | op_('(') + exp + op_(')') | call) term = factor + many(mul_op + factor) >> unarg(eval_expr) exp = term + many(add_op + term) >> unarg(eval_expr) exp_lst = with_forward_decls(lambda: exp + many(op_(',') + exp) >> unarg(lst)) call = toktype('Name') + op_('(') + exp_lst + op_(')') >> call return exp.parse(seq)
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token(u'Name', s)) >> tokval op = lambda s: a(Token(u'Op', s)) >> tokval op_ = lambda s: skip(op(s)) id_types = [u'Name', u'Number', u'String'] id = some(lambda t: t.type in id_types).named(u'id') >> tokval make_graph_attr = lambda args: DefAttrs(u'graph', [Attr(*args)]) make_edge = lambda x, xs, attrs: Edge([x] + xs, attrs) node_id = id # + maybe(port) a_list = ( id + maybe(op_(u'=') + id) + skip(maybe(op(u','))) >> unarg(Attr)) attr_list = (many(op_(u'[') + many(a_list) + op_(u']')) >> flatten) attr_stmt = ( (n(u'graph') | n(u'node') | n(u'edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_(u'=') + id >> make_graph_attr node_stmt = node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> # stmt -> subgraph -> stmt_list) subgraph = forward_decl() edge_rhs = skip(op(u'->') | op(u'--')) + (subgraph | node_id) edge_stmt = ((subgraph | node_id) + oneplus(edge_rhs) + attr_list >> unarg(make_edge)) stmt = (attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt) stmt_list = many(stmt + skip(maybe(op(u';')))) subgraph.define( skip(n(u'subgraph')) + maybe(id) + op_(u'{') + stmt_list + op_(u'}') >> unarg(SubGraph)) graph = (maybe(n(u'strict')) + maybe(n(u'graph') | n(u'digraph')) + maybe(id) + op_(u'{') + stmt_list + op_(u'}') >> unarg(Graph)) dotfile = graph + skip(finished) return dotfile.parse(seq)
def parse(sequence, query): tokval = lambda x: x.value toktype = lambda t: ( some(lambda x: x.type == t).named('(type %s)' % t) >> tokval ) operation = lambda s: a(Token('Op', s)) >> tokval operation_ = lambda s: skip(operation(s)) create_param = lambda param_name: query.get_aliased_param( param_name ) make_and = lambda params: And(params[0], params[1]) make_or = lambda params: Or(params[0], params[1]) make_not = lambda inner: Not(inner) word = toktype('Word') inner_bracket = forward_decl() left_of_and = forward_decl() right_of_and = forward_decl() left_of_or = forward_decl() not_ = forward_decl() bracket = operation_('(') + inner_bracket + operation_(')') and_ = left_of_and + operation_('&') + right_of_and >> make_and or_ = left_of_or + operation_('|') + inner_bracket >> make_or param = word >> create_param not_.define(operation_('!') + (bracket | param)) not_ = not_ >> make_not left_of_or.define(and_ | bracket | not_ | param) left_of_and.define(bracket | not_ | param) right_of_and.define(left_of_and) inner_bracket.define(or_ | and_ | bracket | not_ | param) definition = (bracket | inner_bracket) + finished return definition.parse(sequence)
''' print( tokens ) return tokens ### Rules ### ## Base Rules const = lambda x: lambda _: x unarg = lambda f: lambda x: f(*x) flatten = lambda list: sum( list, [] ) tokenValue = lambda x: x.value tokenType = lambda t: some( lambda x: x.type == t ) >> tokenValue operator = lambda s: a( Token( 'Operator', s ) ) >> tokenValue parenthesis = lambda s: a( Token( 'Parenthesis', s ) ) >> tokenValue bracket = lambda s: a( Token( 'Bracket', s ) ) >> tokenValue eol = a( Token( 'EndOfLine', ';' ) ) def maybeFlatten( items ): ''' Iterate through top-level lists Flatten, only if the element is also a list [[1,2],3,[[4,5]]] -> [1,2,3,[4,5]] ''' new_list = [] for elem in items: # Flatten only if a list
def t(code): return some(lambda x: x.type == code)
def _satisfies_production(fn): return fp.some(fn)
def _satisfies_debug(fn): return fp.some(lambda t: fn(t.value))
def _cons(pair): head, tail = pair return [head] + tail def _mkstr_debug(x): return "".join(c.value for c in x) def _mkstr_production(x): return "".join(x) _any = fp.some(_const(True)) def _intersperse(d, xs): """ a -> [a] -> [a] """ xs2 = [] if xs: xs2.append(xs[0]) for x in xs[1:]: xs2.append(d) xs2.append(x) return xs2
left, mark, rest = args[:-2], args[-2], args[-1] if mark == NumberMark: item = left[0] elif mark == ParenMark: item = left[1] if rest is None: return item larg, fun, rarg = item, rest[0], rest[1] return Function(fun, [larg, rarg]) # # Parser. # lparen = some(lambda tok: tok == "(") rparen = some(lambda tok: tok == ")") op = some(lambda tok: tok in "+-*^&|") eof = some(lambda tok: tok == EOF) number = some(lambda tok: tok.isdigit()) >> make_number paren_expr = with_forward_decls( lambda: lparen + expr + rparen ) # *Mark here are not really required, but if you are going to do # anything complex that requires that you discern between different # parsing paths, marks are often give you least hassle. expr = with_forward_decls( lambda: (number + pure(NumberMark) + expr_rest | paren_expr + pure(ParenMark) + expr_rest) >> make_expr)
def sym(wanted): "Parse and skip the given symbol or keyword." if wanted.startswith(":"): return skip(a(HyKeyword(wanted[1:]))) return skip(some(lambda x: isinstance(x, HySymbol) and x == wanted))
#@+node:peckj.20140124085532.4010: *4* makeop op = lambda s: a(Token(token.OP, s)) >> tokval op_ = lambda s: skip(op(s)) const = lambda x: lambda _: x makeop = lambda s, f: op(s) >> const(f) #@+node:peckj.20140124085532.4008: *4* eval_expr def eval_expr(z, list): return reduce(lambda s, (f, x): f(s, x), list, z) f = unarg(eval_expr) #@+node:peckj.20140124085532.4003: *3* grammar posnumber = (some(lambda tok: tok.type == 'NUMBER') >> tokval >> make_number) add = makeop('+', operator.add) sub = makeop('-', operator.sub) mul = makeop('*', operator.mul) div = makeop('/', operator.div) pow = makeop('**', operator.pow) negnumber = (sub + posnumber) >> negate number = posnumber | negnumber mul_op = mul | div add_op = add | sub primary = with_forward_decls(lambda: number | (op_('(') + expr + op_(')'))) factor = primary + many(pow + primary) >> f
def parse(seq): """Sequence(Token) -> object""" const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval def make_array(n): if n is None: return [] else: return [n[0]] + n[1] def make_object(n): return dict(make_array(n)) def make_number(n): try: return int(n) except ValueError: return float(n) def unescape(s): std = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } def sub(m): if m.group('standard') is not None: return std[m.group('standard')] else: return chr(int(m.group('unicode'), 16)) return re_esc.sub(sub, s) def make_string(n): return unescape(n[1:-1]) null = n('null') >> const(None) true = n('true') >> const(True) false = n('false') >> const(False) number = toktype('Number') >> make_number string = toktype('String') >> make_string value = forward_decl() member = string + op_(':') + value >> tuple object = ( op_('{') + maybe(member + many(op_(',') + member)) + op_('}') >> make_object) array = ( op_('[') + maybe(value + many(op_(',') + value)) + op_(']') >> make_array) value.define( null | true | false | object | array | number | string) json_text = object | array json_file = json_text + skip(finished) return json_file.parse(seq)
def evaluate(expression, environment): """Evaluate an expression in the specified variable environment.""" # Well known functions const = lambda x: lambda _: x unarg = lambda f: lambda args: f(*args) # Semantic actions and auxiliary functions tokval = lambda tok: tok.value makeop = lambda s, f: op(s) >> const(f) sometok = lambda type: some(lambda tok: tok.type == type) def eval_name(s): try: return environment[s] # Case-sensitive except KeyError: raise ValueError('unbound variable: %s' % s) def make_number(s): try: return int(s) except ValueError: return float(s) def eval_expr(expr, op_expr_pairs): result = expr for op, expr in op_expr_pairs: result = op(result, expr) return result def eval_call(func_name, maybe_expr_and_exprs): if maybe_expr_and_exprs: expr, exprs = maybe_expr_and_exprs args = [expr] + exprs else: args = [] f = eval_name(func_name) if not callable(f): raise TypeError('variable is not callable: %s' % func_name) argcount = len(args) f_argcount = f.func_code.co_argcount if f_argcount != argcount: raise TypeError('%s takes %d arguments (%d given)' % (func_name, f_argcount, argcount)) return f(*args) # Primitives number = ( sometok('number') >> tokval >> make_number) raw_name = sometok('name') >> tokval name = raw_name >> eval_name op = lambda s: a(Token('op', s)) >> tokval op_ = lambda s: skip(op(s)) add = makeop('+', operator.add) sub = makeop('-', operator.sub) mul = makeop('*', operator.mul) div = makeop('/', operator.div) mul_op = mul | div add_op = add | sub # Means of composition expr = forward_decl() call = ( raw_name + op_('(') + maybe(expr + many(op_(',') + expr)) + op_(')') >> unarg(eval_call)) primary = ( number | call | name | op_('(') + expr + op_(')')) term = ( primary + many(mul_op + primary) >> unarg(eval_expr)) expr.define( term + many(add_op + term) >> unarg(eval_expr)) # Toplevel parsers toplevel = maybe(expr) + skip(finished) return toplevel.parse(tokenize(expression))
def _grouped(group_type, parsers): return ( some(lambda x: isinstance(x, group_type)) >> (lambda x: group_type(whole(parsers).parse(x)).replace(x, recursive=False))) def brackets(*parsers):
# Copyright 2018 the authors. # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. "Parser combinators for pattern-matching Hy model trees." from hy.models import HyExpression, HySymbol, HyKeyword, HyString, HyList from funcparserlib.parser import ( some, skip, many, finished, a, Parser, NoParseError, State) from functools import reduce from itertools import repeat from operator import add from math import isinf FORM = some(lambda _: True) SYM = some(lambda x: isinstance(x, HySymbol)) STR = some(lambda x: isinstance(x, HyString)) def sym(wanted): "Parse and skip the given symbol or keyword." if wanted.startswith(":"): return skip(a(HyKeyword(wanted[1:]))) return skip(some(lambda x: isinstance(x, HySymbol) and x == wanted)) def whole(parsers): """Parse the parsers in the given list one after another, then expect the end of the input.""" if len(parsers) == 0: return finished >> (lambda x: []) if len(parsers) == 1: return parsers[0] + finished >> (lambda x: x[:-1])
def token(tp): return p.some(lambda t: t.type == tp) >> token_value
# It works on a *reversed* sequence of tokens # (right to left), starting from a token at cursor. tok_number = token("number") tok_string = token("string") dot = token(".") colon = token(":") single_quote = token('"') double_quote = token("'") quote = (single_quote | double_quote) open_sq_brace = token("[") close_sq_brace = token("]") open_rnd_brace = token("(") close_rnd_brace = token(")") tok_constant = p.some(lambda t: t.value in {'nil', 'true', 'false'}) iden_start = p.skip(p.some(lambda t: t.type not in ".:")) tok_splash = (p.a(Token("iden", "splash")) + iden_start) >> token_value iden = token("iden") opt_iden = iden | p.pure("") # =========== Expressions parser # FIXME: it should be rewritten using full Lua 5.2 grammar. BINARY_OPS = set("+-*/^%><") | {"..", "==", "~=", ">=", "<=", "and", "or"} UNARY_OPS = {"not", "-", "#"} binary_op = p.some(lambda t: t.value in BINARY_OPS) >> token_value unary_op = p.some(lambda t: t.value in UNARY_OPS) >> token_value # expressions with binary and unary ops + parenthesis
def base_parser(validate_field, validate_entry): """Return the base parser which all other parsers are based on. The valid_fields and valid_entries arguments denote the allowable names for a grammar. """ # Simple expressions integer = token_type('number') name = token_type('name') variable = name # Braced expressions e.g. '{braced}' non_braced = if_token_type('content', always_true) braced_expr = parser.forward_decl() braced_expr.define( (if_token_type('lbrace', lambda v: True) + parser.many(braced_expr | non_braced) + if_token_type('rbrace', lambda v: True)) >> make_braced_expr) braced_expr = braced_expr >> remove_outer_braces # String expressions, e.g. '"This " # var # " that"' string_expr =\ full_delimited_list( parser.some(lambda x: x.type == 'string') >> make_string | parser.some(lambda x: x.type == 'name') >> token_value, 'concat' ) >> join_string_expr('') # The value of a field value = braced_expr | integer | string_expr | variable # Make sure we only parsed valid fields valid_field = if_token_type('name', validate_field) field = valid_field + skip('equals') + value >> make_field assignment = token_type('name') + skip('equals') + value # A regular comment: Any text outside of entries comment = token_type('comment') # @string string_entry = simple_entry(assignment, is_string_entry, make_string_entry) # @comment comment_entry = simple_entry(token_type('content'), is_comment_entry, make_comment_entry) # @preamble preamble_entry = simple_entry(token_type('content'), is_preamble_entry, make_preamble_entry) # Make sure we only parsed valid entries valid_entry = if_token_type('name', validate_entry) >> token_value # @article etc. entry = skip('entry')\ + valid_entry\ + skip('lbrace')\ + (token_type('name') | token_type('number')) + skip('comma')\ + parser.maybe(delimited_list(field, 'comma'))\ + parser.maybe(skip('comma'))\ + skip('rbrace')\ >> make_entry return parser.many(string_entry | comment_entry | preamble_entry | entry | comment) + parser.skip(parser.finished)
def if_token_type(token_type, pred): """Return a parser that parses a token type for which a predicate holds.""" return parser.some(lambda t: t.type == token_type and pred(t.value))
# Copyright 2018 the authors. # This file is part of Hy, which is free software licensed under the Expat # license. See the LICENSE. "Parser combinators for pattern-matching Hy model trees." from hy.models import HyExpression, HySymbol, HyKeyword, HyString, HyList from funcparserlib.parser import (some, skip, many, finished, a, Parser, NoParseError, State) from functools import reduce from itertools import repeat from operator import add from math import isinf FORM = some(lambda _: True) SYM = some(lambda x: isinstance(x, HySymbol)) STR = some(lambda x: isinstance(x, HyString)) def sym(wanted): "Parse and skip the given symbol or keyword." if wanted.startswith(":"): return skip(a(HyKeyword(wanted[1:]))) return skip(some(lambda x: isinstance(x, HySymbol) and x == wanted)) def whole(parsers): """Parse the parsers in the given list one after another, then expect the end of the input.""" if len(parsers) == 0: return finished >> (lambda x: [])
def skip(s): """Parse and skip a specific token type.""" return parser.skip(parser.some(lambda x: x.type == s))
def _grouped(group_type, parsers): return (some(lambda x: isinstance(x, group_type)) >> (lambda x: group_type( whole(parsers).parse(x)).replace(x, recursive=False)))
def some(tok_type): return (parser.some(lambda tok: tok.type == tok_type) >> (lambda tok: tok.value)).named(str(tok_type))