def comparison_(): """Returns the parse for a compound compare statement""" ops = op('==') | op('<') | op('>') | op('<=') | op('>=') | op('!=') op_vals = (boolean | number | timestamp_or_string) comp_op = string + ops + op_vals >> make(ASTCompOp) def multi(func): """For x + many(x) lists, call func only when there are multiple xs""" def multi_(args): x, xs = args if len(xs) == 0: return x return func(args) return multi_ comp_stmt = forward_decl() comp_base = forward_decl() comp_base.define((op_('(') + comp_stmt + op_(')')) | comp_op | ((n('not') + comp_base) >> make(ASTCompNot))) comp_and = comp_base + many(n_('and') + comp_base) >> multi( make(ASTCompAnd)) comp_or = comp_and + many(n_('or') + comp_and) >> multi(make(ASTCompOr)) comp_stmt.define(comp_or) return comp_stmt
def __init__(self, item_store): self.item_store = item_store self.r = self.item_store.r # redis client self.unique_ids_key = 'openrefine_wikidata:unique_ids' self.ttl = 4 * 24 * 60 * 60 # 4 days self.parser = forward_decl() atomic = forward_decl() atomic_subfield = forward_decl() concat_path = forward_decl() pipe_path = forward_decl() atomic.define((t('PID') >> self.make_leaf) | (t('TERM') >> self.make_term) | (t('DOT') >> self.make_empty) | (st('LBRA') + pipe_path + st('RBRA'))) atomic_subfield.define(( atomic + st('AT') + t('SUBFIELD') >> self.make_subfield) | atomic) concat_path.define(( (atomic_subfield + st('SLASH') + concat_path) >> self.make_slash) | atomic_subfield) pipe_path.define(( (concat_path + st('PIPE') + pipe_path) >> self.make_pipe) | concat_path) self.parser.define((pipe_path) + finished >> (lambda x: x[0]))
def __init__(self, item_store): self.item_store = item_store self.r = self.item_store.r # redis client self.unique_ids_key = redis_key_prefix + 'unique_ids' self.ttl = 1 * 24 * 60 * 60 # 1 day self.parser = forward_decl() atomic = forward_decl() atomic_subfield = forward_decl() concat_path = forward_decl() pipe_path = forward_decl() atomic.define( (t('PID') + st('UNDER') + t('PID') >> self.make_qualifier) | (t('PID') >> self.make_leaf) | (t('QID') >> self.make_qid) | (t('TERM') >> self.make_term) | (t('SITELINK') >> self.make_sitelink) | (t('DOT') >> self.make_empty) | (st('LBRA') + pipe_path + st('RBRA'))) atomic_subfield.define(( atomic + st('AT') + t('SUBFIELD') >> self.make_subfield) | atomic) concat_path.define(( (atomic_subfield + st('SLASH') + concat_path) >> self.make_slash) | atomic_subfield) pipe_path.define(( (concat_path + st('PIPE') + pipe_path) >> self.make_pipe) | concat_path) self.parser.define((pipe_path) + finished >> (lambda x: x[0]))
def parse(sequence, query): tokval = lambda x: x.value toktype = lambda t: (some(lambda x: x.type == t).named('(type %s)' % t) >> tokval) operation = lambda s: a(Token('Op', s)) >> tokval operation_ = lambda s: skip(operation(s)) create_param = lambda param_name: query.get_aliased_param(param_name) make_and = lambda params: And(params[0], params[1]) make_or = lambda params: Or(params[0], params[1]) make_not = lambda inner: Not(inner) word = toktype('Word') inner_bracket = forward_decl() left_of_and = forward_decl() right_of_and = forward_decl() left_of_or = forward_decl() not_ = forward_decl() bracket = operation_('(') + inner_bracket + operation_(')') and_ = left_of_and + operation_('&') + right_of_and >> make_and or_ = left_of_or + operation_('|') + inner_bracket >> make_or param = word >> create_param not_.define(operation_('!') + (bracket | param)) not_ = not_ >> make_not left_of_or.define(and_ | bracket | not_ | param) left_of_and.define(bracket | not_ | param) right_of_and.define(left_of_and) inner_bracket.define(or_ | and_ | bracket | not_ | param) definition = (bracket | inner_bracket) + finished return definition.parse(sequence)
def __init__(self, item_store): self.item_store = item_store self.r = self.item_store.r # redis client self.unique_ids_key = 'openrefine_wikidata:unique_ids' self.ttl = 4*24*60*60 # 4 days self.sparql = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql") self.parser = forward_decl() atomic = forward_decl() concat_path = forward_decl() pipe_path = forward_decl() atomic.define( (t('PID') >> self.make_leaf) | (t('DOT') >> self.make_empty) | (st('LBRA') + pipe_path + st('RBRA')) ) concat_path.define( ((atomic + st('SLASH') + concat_path) >> self.make_slash) | atomic ) pipe_path.define( ((concat_path + st('PIPE') + pipe_path) >> self.make_pipe) | concat_path ) self.parser.define( ( pipe_path ) + finished >> (lambda x: x[0]) )
def parser(last_error=None): last_error = LastError() if last_error is None else last_error def apl(f): return lambda x: f(*x) def delim(t): return skip(_tok(t)) symbol = _tok(Token.SYMBOL) >> _gen(Symbol) string = _tok(Token.STRING) >> _gen(String) placeholder = _tok(Token.PLACEHOLDER) >> _gen(Placeholder) keyword = _tok(Token.KEYWORD) >> _gen(Keyword) # Note: tokenizer guarantee that value consists of dots and digits # TODO: convert exceptions number = _tok(Token.NUMBER) >> _gen(Number, literal_eval) expr = forward_decl() implicit_tuple = forward_decl() list_ = ((_tok(Token.OPEN_BRACKET) + many(expr | keyword) + _tok(Token.CLOSE_BRACKET)) >> apl(_list)) dict_ = (error_ctx( _tok(Token.OPEN_BRACE) + many(keyword + expr) + _tok(Token.CLOSE_BRACE), last_error, DICT_ERROR) >> apl(_dict)) inline_args = many(expr | keyword) explicit_tuple = (error_ctx( _tok(Token.OPEN_PAREN) + symbol + inline_args + _tok(Token.CLOSE_PAREN), last_error, EXPLICIT_TUPLE_ERROR) >> apl(_tuple)) indented_arg = ( oneplus(implicit_tuple | expr + delim(Token.NEWLINE)) >> _maybe_join) indented_kwarg = (((keyword + expr + delim(Token.NEWLINE)) | (keyword + delim(Token.NEWLINE) + delim(Token.INDENT) + indented_arg + delim(Token.DEDENT)))) indented_args_kwargs = ( (many(indented_kwarg) + many(indented_arg)) >> apl(lambda pairs, args: list(chain(*(pairs + [args]))))) implicit_tuple.define( error_ctx( symbol + inline_args + delim(Token.NEWLINE) + maybe( delim(Token.INDENT) + indented_args_kwargs + delim(Token.DEDENT)), last_error, IMPLICIT_TUPLE_ERROR) >> apl( _implicit_tuple)) expr.define(symbol | string | number | explicit_tuple | list_ | dict_ | placeholder) body = ((many(implicit_tuple) + _tok(Token.EOF)) >> apl(_module)) return body
def __init__(self): self.toplevel = None makeop = lambda s: op(s) >> const(lambda l, r: BinaryExpression(l, s, r)) kw_class = kw('class') kw_new = kw('new') kw_if = kw('if') kw_else = kw('else') kw_while = kw('while') kw_return = kw('return') add = makeop('+') sub = makeop('-') mul = makeop('*') div = makeop('/') equ = op_('=') int_const = number >> IntConst variable = rawname >> Variable type_ = rawname >> Type var_decl = type_ + rawname >> unarg(VarDeclaration) var_decls = many(var_decl + semicolon) method_call = forward_decl() atom = int_const | method_call | variable expr1 = atom + many((mul | div) + atom) >> eval_expr expr2 = expr1 + many((add | sub) + expr1) >> eval_expr method_call.define(rawname + inparens(maybe_empty_listof(expr2)) >> unarg(MethodCall)) new_obj_expr = kw_new + type_ + openparen + closeparen >> NewObject assignment = variable + equ + (new_obj_expr | expr2) >> unarg(Assignment) return_stmt = kw_return + expr2 >> ReturnStatement simple_stmt = (assignment | method_call | return_stmt) + semicolon stmt_block = forward_decl() condition = openparen + expr2 + closeparen if_stmt = kw_if + condition + stmt_block + kw_else + stmt_block >> unarg(IfStatement) while_stmt = kw_while + condition + stmt_block >> unarg(WhileStatement) stmt = simple_stmt | if_stmt | while_stmt | return_stmt stmt_block.define(opencurlyparen + many(stmt) + closecurlyparen) method_decl = type_ + rawname + inparens(maybe_empty_listof(var_decl)) + \ opencurlyparen + var_decls + many(stmt) + \ closecurlyparen >> unarg(MethodDeclaration) method_decls = many(method_decl) class_decl = kw_class + rawname + opencurlyparen + var_decls + \ method_decls + closecurlyparen >> unarg(ClassDeclaration) program = many(class_decl) >> Program self.toplevel = program + end
def json_text(): """Returns the parser for Json formatted data""" # Taken from https://github.com/vlasovskikh/funcparserlib/blob/master/funcparserlib/tests/json.py # and modified slightly unwrap = lambda x: x.value null = (n('null') | n('Null')) >> const(None) >> unwrap value = forward_decl() member = (string >> unwrap) + op_(u':') + value >> tuple object = ( op_(u'{') + maybe(member + many(op_(u',') + member) + maybe(op_(','))) + op_(u'}') >> make_object) array = ( op_(u'[') + maybe(value + many(op_(u',') + value) + maybe(op_(','))) + op_(u']') >> make_array) value.define( null | (true >> unwrap) | (false >> unwrap) | object | array | (number >> unwrap) | (string >> unwrap)) json_text = object | array return json_text
def parse(sequence, query): tokval = lambda x: x.value toktype = lambda t: ( some(lambda x: x.type == t).named('(type %s)' % t) >> tokval ) operation = lambda s: a(Token('Op', s)) >> tokval operation_ = lambda s: skip(operation(s)) create_param = lambda param_name: query.get_aliased_param( param_name ) make_and = lambda params: And(params[0], params[1]) make_or = lambda params: Or(params[0], params[1]) make_not = lambda inner: Not(inner) word = toktype('Word') inner_bracket = forward_decl() left_of_and = forward_decl() right_of_and = forward_decl() left_of_or = forward_decl() not_ = forward_decl() bracket = operation_('(') + inner_bracket + operation_(')') and_ = left_of_and + operation_('&') + right_of_and >> make_and or_ = left_of_or + operation_('|') + inner_bracket >> make_or param = word >> create_param not_.define(operation_('!') + (bracket | param)) not_ = not_ >> make_not left_of_or.define(and_ | bracket | not_ | param) left_of_and.define(bracket | not_ | param) right_of_and.define(left_of_and) inner_bracket.define(or_ | and_ | bracket | not_ | param) definition = (bracket | inner_bracket) + finished return definition.parse(sequence)
def __init__(self): # TODO: Make your changes in this section ############################# value = number | string ####################################################################### attribute = rawname + op_("=") + value + semicolon >> unarg(Attribute) attributes = many(attribute) # For chicken-and-egg problems, forward_decl will be your friend widgets = forward_decl() widget = rawname + opencurlyparen + attributes + widgets + closecurlyparen >> unarg(Widget) widgets.define(many(widget)) # For the toplevel, we allow only one widget, not multiple widgets self.toplevel = widget + end
def _parse(seq): const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token(u'Op', s)) >> tokval op_ = lambda s: skip(op(s)) def make_string(args): context, value = args if not context: context = 'any:' return String(unescape_str(value[1:-1]), context[:-1]) def make_regex(args): context, value = args value, modifiers = value.rsplit('/', 1) value = value[1:] if not context: context = 'any:' return Regex(unescape_regex(value), modifiers, context[:-1]) def make_or(args): return Or(*args) def make_and(args): return And(*args) def make_not(x): return Not(x) context = maybe(toktype(u'Prefix')) string = (context + toktype(u'String')) >> make_string regex = (context + toktype(u'Regex')) >> make_regex par_term = forward_decl() simple_term = forward_decl() term = forward_decl() not_term = forward_decl() and_term = forward_decl() or_term = forward_decl() par_term.define(op_(u'(') + term + op_(u')')) simple_term.define(par_term | string | regex) not_term.define(op_('not') + not_term >> make_not | simple_term) and_term.define(not_term + op_('and') + and_term >> make_and | not_term) or_term.define(and_term + op_('or') + or_term >> make_or | and_term) term.define(or_term) eof = skip(toktype(u'EOF')) filter_expr = (term + eof) | (eof >> const(Any())) return filter_expr.parse(seq)
def __init__(self): self.toplevel = None makeop = lambda s: op(s) >> const(lambda l, r: BinaryExpression(l, s, r)) kw_while = kw('while') kw_if = kw('if') kw_else = kw('else') kw_put = kw('put') add = makeop('+') sub = makeop('-') mul = makeop('*') div = makeop('/') comparison_operator = makeop('<') | makeop('<=') | makeop('>') | \ makeop('>=') | makeop('==') | makeop('!=') equ = op_('=') sign = op('+') | op('-') int_const = maybe(sign) + number >> unarg(IntConst) variable = rawname >> Variable atom = int_const | variable expr1 = atom + many((mul | div) + atom) >> eval_expr expr2 = expr1 + many((add | sub) + expr1) >> eval_expr comparison_expr = expr2 + many(comparison_operator + expr2) >> eval_expr put_call = kw_put + inparens(comparison_expr) >> PutCall assignment = variable + equ + expr2 >> unarg(Assignment) condition = openparen + comparison_expr + closeparen stmt_block = forward_decl() while_stmt = kw_while + condition + stmt_block >> unarg(WhileStatement) if_stmt = kw_if + condition + stmt_block + maybe(kw_else + stmt_block) >> unarg(IfStatement) stmt = assignment | put_call | while_stmt | if_stmt stmt_block.define(opencurlyparen + many(stmt) + closecurlyparen) program = many(stmt) >> Program self.toplevel = program + end
def parse(tokens): makeop = lambda s: op(s) >> const(lambda l, r: BinaryExpression(l, s, r)) num = number >> Number var = rawname >> Variable add, sub, mul, div = map(makeop, ['+', '-', '*', '/']) lt, lte, gt, gte = map(makeop, ['<', '<=', '>', '>=']) method_call = forward_decl() atom = num | method_call | var expr1 = atom + many((mul | div) + atom) >> eval_expr expr2 = expr1 + many((add | sub) + expr1) >> eval_expr expr3 = expr2 + many((lt | lte | gt | gte) + expr2) >> eval_expr method_call.define(rawname + inparens(maybe_empty_listof(expr2)) >> unarg(MethodCall)) defn = expr3 + end return defn.parse(tokens)
def _create_type_rules(): comma = _token_type("comma") colon = _token_type("colon") question_mark = _token_type("question-mark") bar = _token_type("bar") equals = _token_type("equals") attr_name = type_name = arg_name = _token_type("name") >> _make_name primary_type = forward_decl() union_type = _one_or_more_with_separator(primary_type, bar) >> _make_union_type type_ = union_type type_ref = type_name >> _make_type_ref applied_type = ( type_ref + skip(_token_type("open")) + _one_or_more_with_separator(type_, comma) + skip(_token_type("close")) ) >> _make_apply arg = (maybe(question_mark) + maybe(arg_name + skip(colon)) + type_) >> _make_arg generic_params = maybe(type_name + _token_type("fat-arrow")) >> _make_params args = _zero_or_more_with_separator(arg, comma) signature = (generic_params + args + _token_type("arrow") + type_) >> _make_signature sub_signature = (_token_type("paren-open") + signature + _token_type("paren-close")) >> (lambda result: result[1]) primary_type.define(sub_signature | applied_type | type_ref) explicit_type = signature | type_ type_definition = (type_name + skip(equals) + type_ + skip(finished)) >> _make_type_definition structural_type_attr = (attr_name + skip(colon) + explicit_type) >> tuple structural_type_attrs = many(structural_type_attr) structural_type_definition = (type_name + skip(colon) + structural_type_attrs + skip(finished)) >> _make_structural_type_definition generic = (_one_or_more_with_separator(type_name, comma) + skip(finished)) >> _make_generic return explicit_type + skip(finished), type_definition, structural_type_definition, generic
def __init__(self): # TODO: Make your changes in this section ############################# nr = number >> Number plus_operator = op('+') >> Operator calculation = nr + plus_operator + nr >> unarg(Calculation) string_list = inbrackets(listof(string)) value = calculation | number | string | string_list ####################################################################### attribute = rawname + op_("=") + value + semicolon >> unarg(Attribute) attributes = many(attribute) widgets = forward_decl() widget = rawname + opencurlyparen + attributes + widgets + closecurlyparen >> unarg(Widget) widgets.define(many(widget)) self.toplevel = widget + end
def create_grammar(): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval null = n('null') true = n('true') false = n('false') number = toktype('Number') string = toktype('String') value = forward_decl() member = string + op_(':') + value object_ = (op_('{') + maybe(member + many(op_(',') + member)) + op_('}')) array = (op_('[') + maybe(value + many(op_(',') + value)) + op_(']')) value.define(null | true | false | object_ | array | number | string) json_text = object_ | array json_file = json_text + skip(finished) return json_file
def create_parser(): # operator: '~=' | '>=' | '<=' | '<' | '>' | '=' operator = some(lambda tok: tok.type == 'CMP') >> choose_class # value: STRING | WORD word = some(lambda tok: tok.type == 'WORD') >> Text string = some(lambda tok: tok.type == 'STRING') >> QuotedText value = string | word # function: WORD '(' ')' open_brace = skip(a(Token('BR', '('))) close_brace = skip(a(Token('BR', ')'))) function = word + open_brace + close_brace >> Function # field_expr: WORD operator value fieldexpr = (word + operator + (function | value)) >> (lambda x: x[1] ([x[0], x[2]])) OR = a(Token('OP', 'OR')) >> choose_class AND = a(Token('OP', 'AND')) >> choose_class def eval(data): arg1, lst = data for f, arg2 in lst: arg1 = f([arg1, arg2]) return arg1 def eval(data): lft, args = data return reduce(lambda arg1, (f, arg2): f([arg1, arg2]), args, lft) expr = forward_decl() basexpr = open_brace + expr + close_brace | fieldexpr andexpr = (basexpr + many(AND + basexpr)) >> eval orexpr = (andexpr + many(OR + andexpr)) >> eval expr.define(orexpr) return expr
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token(u'Name', s)) >> tokval op = lambda s: a(Token(u'Op', s)) >> tokval op_ = lambda s: skip(op(s)) id_types = [u'Name', u'Number', u'String'] id = some(lambda t: t.type in id_types).named(u'id') >> tokval make_graph_attr = lambda args: DefAttrs(u'graph', [Attr(*args)]) make_edge = lambda x, xs, attrs: Edge([x] + xs, attrs) node_id = id # + maybe(port) a_list = ( id + maybe(op_(u'=') + id) + skip(maybe(op(u','))) >> unarg(Attr)) attr_list = (many(op_(u'[') + many(a_list) + op_(u']')) >> flatten) attr_stmt = ( (n(u'graph') | n(u'node') | n(u'edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_(u'=') + id >> make_graph_attr node_stmt = node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> # stmt -> subgraph -> stmt_list) subgraph = forward_decl() edge_rhs = skip(op(u'->') | op(u'--')) + (subgraph | node_id) edge_stmt = ((subgraph | node_id) + oneplus(edge_rhs) + attr_list >> unarg(make_edge)) stmt = (attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt) stmt_list = many(stmt + skip(maybe(op(u';')))) subgraph.define( skip(n(u'subgraph')) + maybe(id) + op_(u'{') + stmt_list + op_(u'}') >> unarg(SubGraph)) graph = (maybe(n(u'strict')) + maybe(n(u'graph') | n(u'digraph')) + maybe(id) + op_(u'{') + stmt_list + op_(u'}') >> unarg(Graph)) dotfile = graph + skip(finished) return dotfile.parse(seq)
"""Helper function for defining operators""" return a(Token(token.OP, s)) >> _token_value _open_square_bracket = _op('[') _close_square_bracket = _op(']') _open_parenthesis = _op('(') _close_parenthesis = _op(')') _open_curly_bracket = _op('{') _close_curly_bracket = _op('}') _comma = _op(',') _dot = _op('.') _colon = _op(':') # Declaration of recursive types so they can be used now _sequence = forward_decl() _dictionary = forward_decl() _event_body = forward_decl() # Definition of all the the Apama event parts _simple_types = _number | _string | _boolean _comparable_types = _number | _string # _comparable_types are for dictionary keys _abstract_data_types = _sequence | _dictionary _channel = _string + skip(_comma) >> _strip_quotes _package_name = many(_name + _dot) >> _make_package _event_name = _name _event = ((_endmarker + finished >> (lambda x: None)) | maybe(_channel) + maybe(_package_name) + _event_name + _event_body) >> _create_apama_event _types = _simple_types | _abstract_data_types | _event
def evaluate(expression, environment): """Evaluate an expression in the specified variable environment.""" # Well known functions const = lambda x: lambda _: x unarg = lambda f: lambda args: f(*args) # Semantic actions and auxiliary functions tokval = lambda tok: tok.value makeop = lambda s, f: op(s) >> const(f) sometok = lambda type: some(lambda tok: tok.type == type) def eval_name(s): try: return environment[s] # Case-sensitive except KeyError: raise ValueError('unbound variable: %s' % s) def make_number(s): try: return int(s) except ValueError: return float(s) def eval_expr(expr, op_expr_pairs): result = expr for op, expr in op_expr_pairs: result = op(result, expr) return result def eval_call(func_name, maybe_expr_and_exprs): if maybe_expr_and_exprs: expr, exprs = maybe_expr_and_exprs args = [expr] + exprs else: args = [] f = eval_name(func_name) if not callable(f): raise TypeError('variable is not callable: %s' % func_name) argcount = len(args) f_argcount = f.func_code.co_argcount if f_argcount != argcount: raise TypeError('%s takes %d arguments (%d given)' % (func_name, f_argcount, argcount)) return f(*args) # Primitives number = ( sometok('number') >> tokval >> make_number) raw_name = sometok('name') >> tokval name = raw_name >> eval_name op = lambda s: a(Token('op', s)) >> tokval op_ = lambda s: skip(op(s)) add = makeop('+', operator.add) sub = makeop('-', operator.sub) mul = makeop('*', operator.mul) div = makeop('/', operator.div) mul_op = mul | div add_op = add | sub # Means of composition expr = forward_decl() call = ( raw_name + op_('(') + maybe(expr + many(op_(',') + expr)) + op_(')') >> unarg(eval_call)) primary = ( number | call | name | op_('(') + expr + op_(')')) term = ( primary + many(mul_op + primary) >> unarg(eval_expr)) expr.define( term + many(add_op + term) >> unarg(eval_expr)) # Toplevel parsers toplevel = maybe(expr) + skip(finished) return toplevel.parse(tokenize(expression))
def parse(seq): """Sequence(Token) -> object""" const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('OP', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('NAME', s)) >> tokval def make_array(n): if n is None: return [] else: return [n[0]] + n[1] def make_object(n): return dict(make_array(n)) def make_int(n): return '%s' % int(n) def make_real(n): return '%s' % float(n) def unescape(s): std = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } def sub(m): if m.group('standard') is not None: return std[m.group('standard')] else: return unichr(int(m.group('unicode'), 16)) return re_esc.sub(sub, s) def make_string(n): return n #return unescape(n[1:-1]) def make_all_models(models): return dict(models) # all_attrs = [] # for i in attrs: # attr = i[0] # if attr not in all_attrs: # all_attrs.append(attr) # else: # raise Exception('Attribute %s is already defined in class'%attr) def make_fields(n): #return dict(n) return Field(n) def make_params(n): return n null = toktype('NONE') >> const("None") true = toktype('TRUE') >> const("True") false = toktype('FALSE') >> const("False") number = toktype('INT') >> make_int real = toktype('REAL') >> make_real string = toktype('STRING') >> make_string value = forward_decl() name = toktype('NAME') field = toktype('FIELD') + maybe(op_('(') + many(value) + op_(')')) >> tuple member = string + op_(':') + value >> tuple attrs = forward_decl() params = forward_decl() models = many(name + op_('::') + many(attrs)) >> make_all_models attrs.define(name + op_(':') + field + many(params) >> make_fields) params.define(name + op_('=') + value >> tuple) value.define(null | true | false | name | number | real | string) parser_text = models parser_file = parser_text + skip(finished) return parser_file.parse(seq)
def decode_tokens(s: typing.List[Token]): def _to_int(token): _log.debug('_to_int: %r', token) return int(token) str_strip_re = re.compile(rb'^\d+:') def _to_string(s): _log.debug('_to_string: %r', s) return str_strip_re.sub(b'', s) def _to_list(_tokens): _log.debug('_to_list: %r', _tokens) return _tokens def _to_dict(n): _log.debug('_to_dict: %r', n) return dict(_to_list(n)) def token_value(x): return x.value def token_type(t): return p.some(lambda x: x.name == t) >> token_value def type_decl(type_name): return p.a( Token('Type', type_name) ).named('Type({})'.format(type_name)) value = p.forward_decl().named('Value') integer = token_type('Number') end = p.a(Token('End', b'e')) # String is special, has no type str_decl = ( token_type('String') >> _to_string ).named('String') dict_decl = ( p.skip(type_decl(b'd')) + p.many(value + value) + p.skip(end) >> _to_dict ).named('Dict') list_decl = ( p.skip(type_decl(b'l')) + p.many(value) + p.skip(end) >> _to_list ).named('List') integer_decl = ( p.skip(type_decl(b'i')) + integer + p.skip(end) >> _to_int ).named('Integer') value.define( integer_decl | dict_decl | list_decl | str_decl ) bencode_decl = ( value + p.skip(p.finished) ).named('Bencode') return bencode_decl.parse(s)
def parse(seq): """Sequence(Token) -> object""" const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval def make_array(n): if n is None: return [] else: return [n[0]] + n[1] def make_object(n): return dict(make_array(n)) def make_number(n): try: return int(n) except ValueError: return float(n) def unescape(s): std = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } def sub(m): if m.group('standard') is not None: return std[m.group('standard')] else: return chr(int(m.group('unicode'), 16)) return re_esc.sub(sub, s) def make_string(n): return unescape(n[1:-1]) null = n('null') >> const(None) true = n('true') >> const(True) false = n('false') >> const(False) number = toktype('Number') >> make_number string = toktype('String') >> make_string value = forward_decl() member = string + op_(':') + value >> tuple object = (op_('{') + maybe(member + many(op_(',') + member)) + op_('}') >> make_object) array = (op_('[') + maybe(value + many(op_(',') + value)) + op_(']') >> make_array) value.define(null | true | false | object | array | number | string) json_text = object | array json_file = json_text + skip(finished) return json_file.parse(seq)
def parse(seq): """Sequence(Token) -> object""" tokval = lambda x: x.value op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) _id = some(lambda t: t.type in ['Name', 'Number', 'String']) >> tokval keyword = lambda s: a(Token('Name', s)) >> tokval separator = some(lambda t: t.type == 'Separator') >> tokval def make_separator(sep): return Separator(sep[0:3], sep[3:-3].strip()) # # parts of syntax # option_stmt = (_id + maybe(op_('=') + _id) >> create_mapper(Attr)) option_list = (maybe( op_('[') + option_stmt + many(op_(',') + option_stmt) + op_(']')) >> create_mapper(oneplus_to_list, default_value=[])) # attributes statement:: # default_shape = box; # default_fontsize = 16; # attribute_stmt = (_id + op_('=') + _id >> create_mapper(Attr)) # node statement:: # A; # B [attr = value, attr = value]; # node_stmt = (_id + option_list >> create_mapper(Node)) # separator statement:: # === message === # ... message ... # separator_stmt = (separator >> make_separator) # edge statement:: # A -> B; # C -> D { # D -> E; # } # edge_block = forward_decl() edge_relation = (op('<<--') | op('<--') | op('<<-') | op('<-') | op('->') | op('->>') | op('-->') | op('-->>') | op('=>')) edge_stmt = (_id + edge_relation + _id + many(edge_relation + _id) + option_list + maybe(edge_block) >> create_mapper(Edge)) edge_block_inline_stmt_list = (many(edge_stmt + skip(maybe(op(';'))) | separator_stmt)) edge_block.define( op_('{') + edge_block_inline_stmt_list + op_('}') >> Statements) # group statement:: # group { # A; # } # group_inline_stmt_list = (many((attribute_stmt | node_stmt) + skip(maybe(op(';'))))) group_stmt = (skip(keyword('group')) + skip(maybe(_id)) + op_('{') + group_inline_stmt_list + op_('}') >> Group) # combined fragment (alt, loop) statement:: # loop { # A -> B; # } # alt { # D -> E; # } # fragment_stmt = forward_decl() fragment_inline_stmt = (attribute_stmt | fragment_stmt | edge_stmt | node_stmt) fragment_inline_stmt_list = (many(fragment_inline_stmt + skip(maybe(op(';'))))) fragment_types = (keyword('alt') | keyword('loop')) fragment_stmt.define(fragment_types + maybe(_id) + op_('{') + fragment_inline_stmt_list + op_('}') >> create_mapper(Fragment)) # extension statement (class, plugin):: # class red [color = red]; # plugin attributes [name = Name]; # extension_stmt = ((keyword('class') | keyword('plugin')) + _id + option_list >> create_mapper(Extension)) # diagram statement:: # seqdiag { # A -> B; # } # diagram_id = ( (keyword('diagram') | keyword('seqdiag')) + maybe(_id) >> list) diagram_inline_stmt = (extension_stmt | attribute_stmt | fragment_stmt | group_stmt | edge_stmt | separator_stmt | node_stmt) diagram_inline_stmt_list = (many(diagram_inline_stmt + skip(maybe(op(';'))))) diagram = (maybe(diagram_id) + op_('{') + diagram_inline_stmt_list + op_('}') >> create_mapper(Diagram)) dotfile = diagram + skip(finished) return dotfile.parse(seq)
if __name__ == "__main__": input_ts = dict(zip(['A_TS', 'B_TS'], generate_two_dummy_data_sets())) integer = p.oneplus(p.some(lambda c: c.isdigit())) >> (lambda toks: int(''.join(toks))) def to_number(toks): if not toks[1]: return float(toks[0]) return float("%s.%s".format(toks)) number = integer + p.maybe(p.skip(p.a('.')) + integer) >> to_number timeseries_name = p.oneplus(p.some(lambda c: c.isupper() or c == '_')) >> (lambda toks: ''.join(toks)) timeseries_expr = timeseries_name >> (lambda name: nodes.TimeSeriesNode(input_ts[name])) scalar_expr = number >> (lambda n: nodes.TimeSeriesNode(ScalarTimeSeries(n))) expr = p.forward_decl() expr_rest = p.forward_decl() expr.define( timeseries_expr + expr_rest >> (lambda x: x[1](x[0])) | scalar_expr + expr_rest >> (lambda x: x[1](x[0])) ) # Holy cow, is this there a better way to get around left-recursion? def generate_arithmetic_node_function(node_type): # the left and right names here are misleading and wrong def outer(right): def inner(left): if left is not None and right is not None: #print "Making " + str(node_type) + " with " + str(right) + " and " + str(left) return node_type(left, right)
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) node_type_keywords = [ 'start', 'startCompensation', 'startConditional', 'startConditionalNon', 'startError', 'startEscalation', 'startEscalationNon', 'startMessage', 'startMessageNon', 'startMultiple', 'startMultipleNon', 'startParallelMultiple', 'startParallelMultipleNon', 'startSignal', 'startSignalNon', 'startTimer', 'startTimerNon', 'end', 'endCancel', 'endCompensation', 'endError', 'endEscalation', 'endMessage', 'endMultiple', 'endSignal', 'endTerminate', 'intermediate', 'catchCancel', 'catchCompensation', 'throwCompensation', 'catchError', 'catchEscalation', 'catchEscalationNon', 'throwEscalation', 'catchLink', 'throwLink', 'catchMessage', 'catchMessageNon', 'throwMessage', 'catchMultiple', 'catchMultipleNon', 'throwMultiple', 'catchParallelMultiple', 'catchParallelMultipleNon', 'catchSignal', 'catchSignalNon', 'throwSignal', 'conditional', 'conditionalNon', 'timer', 'timerNon', 'task', 'businessRuleTask', 'manualTask', 'receiveTask', 'scriptTask', 'sendTask', 'serviceTask', 'userTask', 'call', 'businessRuleCall', 'manualCall', 'scriptCall', 'userCall', 'process', 'adhoc', 'transaction', 'event', 'eventCompensation', 'eventConditional', 'eventConditionalNon', 'eventError', 'eventEscalation', 'eventEscalationNon', 'eventMessage', 'eventMessageNon', 'eventMultiple', 'eventMultipleNon', 'eventParallelMultiple', 'eventParallelMultipleNon', 'eventSignal', 'eventSignalNon', 'eventTimer', 'eventTimerNon', 'inclusive', 'exclusive', 'parallel', 'complex', 'eventBased', 'eventBasedStart', 'eventBasedParallelStart', 'data', 'dataCollection', 'dataInput', 'dataInputCollection', 'dataOutput', 'dataOutputCollection', 'dataStore' ] node_type = some(lambda t: t.value in node_type_keywords).named('type') >> tokval id_types = ['Name', 'Number', 'String'] id = some(lambda t: t.type in id_types).named('id') >> tokval make_graph_attr = lambda args: DefAttrs('graph', [Attr(*args)]) node_id = id a_list = ( id + maybe(op_('=') + id) + skip(maybe(op(';'))) >> unarg(Attr)) attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) attr_stmt = ( (n('_') | n('node') | n('edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_('=') + id >> make_graph_attr node_stmt = node_type + node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> stmt -> subgraph -> stmt_list) lane = forward_decl() pool = forward_decl() edge_type = some(lambda t: t.type == 'EdgeOp') >> tokval edge_rhs = (edge_type + node_id) >> (lambda t: [t[0], t[1]]) edge_stmt = ( node_id + oneplus(edge_rhs) + attr_list >> unarg(Edge)) stmt = ( attr_stmt | edge_stmt | lane | pool | graph_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) pool.define( skip(n('pool')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Pool)) lane.define( skip(n('lane')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Lane)) graph = ( maybe(n('graph')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Graph)) dotfile = graph + skip(finished) try: parsed = dotfile.parse(seq) return parsed except NoParseError as e: print(e.message) return None
not_in = op('not') + op('in') >> const(lambda x, y: x not in y) true = kw('True') >> token_value >> make_bool false = kw('False') >> token_value >> make_bool name = token_type('name') >> token_value >> make_name number = token_type('number') >> token_value >> make_number string = token_type('string') >> token_value >> make_string # grammar rules mul_op = mul | div add_op = add | sub cmp_op = lt | le | eq | ne | ge | gt | in_ | not_in # forward declatations nexpr = forward_decl() bexpr = forward_decl() sexpr = forward_decl() expr = forward_decl() lambda_ = forward_decl() application = forward_decl() renaming = forward_decl() set_context = forward_decl() # string expression strexpr = string + many(add_op + string) >> u(concatenate) # numerical expression numeric = application | lambda_ | number | name | (op_('(') + nexpr + op_(')')) factor = numeric + many(power + numeric) >> u(make_expression)
] useless = ["comment", "space"] tokenizer = make_tokenizer(specs) return tuple(token for token in tokenizer(characters) if token.type not in useless) # Match a particular string op = lambda value: some(lambda token: token.value == value) # Match a particular string and discard the result op_ = lambda value: skip(some(lambda token: token.value == value)) expression = forward_decl() real = some(lambda x: x.type == "float") >> (lambda x: float(x.value)) integer = some(lambda x: x.type == "integer") >> (lambda x: int(x.value)) number = integer | real symbol = some(lambda x: x.type == "name") >> (lambda name: sp.Symbol(name.value)) def make_function(matches): arguments = matches[1] combined = [] if arguments is None else [arguments[0]] + arguments[1] function = sp.__dict__[str(matches[0])] return function(*combined) function = symbol + op_("(") + maybe(expression + many(op_(",") + expression)) + op_(")") >> make_function
from funcparserlib import parser as p from lib import ast from . import tokenizer as t def build_simple_parser(token_name, ast_class): return (p.some(lambda token: token.type == token_name) >> (lambda token: ast_class(repeat=token.value))) p_inc = build_simple_parser(token_name='inc', ast_class=ast.Inc) p_dec = build_simple_parser(token_name='dec', ast_class=ast.Dec) p_right = build_simple_parser(token_name='right', ast_class=ast.Right) p_left = build_simple_parser(token_name='left', ast_class=ast.Left) p_input = build_simple_parser(token_name='input', ast_class=ast.Input) p_output = build_simple_parser(token_name='output', ast_class=ast.Output) p_simple_expression = p_dec | p_inc | p_right | p_left | p_input | p_output p_loop_expression = p.forward_decl() p_expression = p.forward_decl() p_loop_expression.define( (p.skip(p.a(t.t_loop_start())) + p.maybe(p_expression) + p.skip(p.a(t.t_loop_end()))) >> (lambda contains: ast.Loop(contains=(contains if contains else list())))) p_expression.define(p.oneplus(p_simple_expression | p_loop_expression)) p_program = (p_expression >> (lambda contains: ast.Program(contains=contains)))
def pure_parse(seq): 'Sequence(Token) -> object' #http://pure-lang.googlecode.com/svn/docs/pure-syntax/pure-syntax.pdf&pli=1 # This is some beautiful code... whatever it's fast def make_number(n): if 'L' in n: return long(n) try: return int(n) except ValueError: return float(n) def make_array(n): if n is None: return [] else: return [n[0]] + n[1] def make_name(s): return s def make_funcapp(n): head, tail = n if len(tail) == 0: return head else: return (head, tail) number = sometok('number') >> make_number var = sometok('name') >> make_name atom = var | number pure = forward_decl() funcapp = forward_decl() array = forward_decl() expr = op_('(') + funcapp + op_(')') funcapp.define( var + many(expr|array|atom) >> make_funcapp ) first = (funcapp|array|atom) array.define( op_('[') + first + many(op_(',') + first) + op_(']') >> make_array ) # First order objects #first_order.define( #funcapp | ##array | #atom #) @with_forward_decls def pure(): return (first + skip(finished)) | first primary = pure return primary.parse(tokenize(seq))
def parse(seq): Attr = namedtuple('Attr', 'name value') """Sequence(Token) -> object""" keyword = lambda s: a(Token(u'keyword', s)) sep = lambda s: a(Token(u'Sep', s)) context_open = skip(keyword(u'{')) context_close = skip(keyword(u'}')) call_open = skip(keyword(u'(')) call_close = skip(keyword(u')')) def_keyword = skip(keyword(u'def')) context_keywork = skip(keyword(u'context')) eq_keyword = a(Token(u'Eq', '=')) environment_keyword = skip(keyword(u'environment')) delegation_keyword = skip(keyword(u'.')) comma = sep(u',') ## Helpers # skipped values def make_number(n): try: return int(n) except ValueError: return float(n) def unescape_string(value): return unescape(value[1:-1]) def make_string(value): return value unarg = lambda f: lambda args: f(*args) a_ = lambda x: skip(a(x)) isContextOpenClose = lambda s: some(s.value in [context_open, context_close]) const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda type: some(lambda x: x.type == type) >> tokval # types eq = toktype(u'Eq') >> make_string id = toktype(u'Name') >> make_string string = toktype(u'String') >> unescape_string number = toktype(u'Number') >> make_number value = id | number name = lambda s: a(Token(u'Name', s)) >> tokval accessor = forward_decl() argument = accessor | value | string function = id argument_list = (argument + maybe(skip(eq_keyword) + argument) + skip(maybe(comma)) >> unarg(Attr)) # rules assignment = id + skip(eq) + argument >> Assignment functionCall = function + call_open + many( argument_list) + call_close >> FunctionCall environmentCall = environment_keyword + delegation_keyword + id + call_open + many( argument_list) + call_close >> EnvironmentCall accessor.define(id + delegation_keyword + id >> Accessor) context_block = many(assignment | functionCall) contextDefinition = def_keyword + context_keywork + id + context_open + maybe( context_block) + context_close >> ContextDefinition enviro_block = many(assignment | contextDefinition) environment = environment_keyword + context_open + maybe( enviro_block) + context_close >> Environment simulacion_line = assignment | environment | environmentCall | functionCall simulacion_program = many(simulacion_line) + skip(finished) return simulacion_program.parse(seq)
def parse(seq): Attr = namedtuple('Attr', 'name value') """Sequence(Token) -> object""" keyword = lambda s: a(Token(u'keyword', s)) sep = lambda s: a(Token(u'Sep', s)) context_open = skip(keyword(u'{')) context_close = skip(keyword(u'}')) call_open = skip(keyword(u'(')) call_close = skip(keyword(u')')) def_keyword = skip(keyword(u'def')) context_keywork = skip(keyword(u'context')) eq_keyword = a(Token(u'Eq', '=')) environment_keyword = skip(keyword(u'environment')) delegation_keyword = skip(keyword(u'.')) comma = sep(u',') ## Helpers # skipped values def make_number(n): try: return int(n) except ValueError: return float(n) def unescape_string(value): return unescape(value[1:-1]) def make_string(value): return value unarg = lambda f: lambda args: f(*args) a_ = lambda x: skip(a(x)) isContextOpenClose = lambda s: some(s.value in [context_open, context_close]) const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda type: some(lambda x: x.type == type) >> tokval # types eq = toktype(u'Eq') >> make_string id = toktype(u'Name') >> make_string string = toktype(u'String') >> unescape_string number = toktype(u'Number') >> make_number value = id | number name = lambda s: a(Token(u'Name', s)) >> tokval accessor = forward_decl() argument = accessor | value | string function = id argument_list = ( argument + maybe(skip(eq_keyword) + argument) + skip(maybe(comma)) >> unarg(Attr)) # rules assignment = id + skip(eq) + argument >> Assignment functionCall = function + call_open + many(argument_list) + call_close >> FunctionCall environmentCall = environment_keyword + delegation_keyword + id + call_open + many( argument_list) + call_close >> EnvironmentCall accessor.define(id + delegation_keyword + id >> Accessor) context_block = many(assignment | functionCall) contextDefinition = def_keyword + context_keywork + id + context_open + maybe( context_block) + context_close >> ContextDefinition enviro_block = many(assignment | contextDefinition) environment = environment_keyword + context_open + maybe(enviro_block) + context_close >> Environment simulacion_line = assignment | environment | environmentCall | functionCall simulacion_program = many(simulacion_line) + skip(finished) return simulacion_program.parse(seq)
def parse(seq, region='', account_id='', visitors=[]): """Parse the given sequence of tokens into a StateMachine object Args: seq (list): List of lexer.Token tokens to parse region (string): AWS Region where Lambdas and Activities are located account_id (string): AWS Account ID where where Lambdas and Activities are located visitors (list[ast.StateVisitor]): List of StateVisitors that can be used modify Task states Returns sfn.StateMachine: StateMachine object """ state = forward_decl() # Primatives array = op_('[') + maybe(string + many(op_(',') + string)) + op_(']') >> make_array block = block_s + many(state) + block_e comment_block = block_s + maybe(string) + many(state) + block_e parameter_kv = name + maybe(op_('.') + e('$')) + op_(':') + json_text parameter_block = n('parameters') + op_( ':') + block_s + parameter_kv + many(parameter_kv) + block_e >> make( ASTModParameters) retry_block = n('retry') + ( array | string) + integer_pos + integer_nn + number >> make(ASTModRetry) catch_block = n('catch') + ( array | string) + op_(':') + maybe(string) + block >> make(ASTModCatch) # Simple States # DP Note: The 'next' modifier is not allowed in general usage, must use the 'Goto' # state to create that modifier. If 'next' should be allowed from any state # just add it to 'state_modifier' and 'transform_modifier' state_modifier = ( (n('timeout') + op_(':') + integer_pos >> make(ASTModTimeout)) | (n('heartbeat') + op_(':') + integer_pos >> make(ASTModHeartbeat)) | (n('input') + op_(':') + string >> make(ASTModInput)) | (n('result') + op_(':') + string >> make(ASTModResult)) | (n('output') + op_(':') + string >> make(ASTModOutput)) | (n('data') + op_(':') + block_s + json_text + block_e >> make(ASTModData)) | parameter_block | retry_block | catch_block) state_modifiers = state_modifier + many(state_modifier) >> make( ASTModifiers) state_block = maybe(block_s + maybe(string) + maybe(state_modifiers) + block_e) pass_ = n('Pass') + op_('(') + op_(')') + state_block >> make(ASTStatePass) success = n('Success') + op_('(') + op_(')') + state_block >> make( ASTStateSuccess) fail = n('Fail') + op_('(') + string + op_(',') + string + op_( ')') + state_block >> make(ASTStateFail) wait_types = n('seconds') | n('seconds_path') | n('timestamp') | n( 'timestamp_path') wait = n('Wait') + op_('(') + wait_types + op_('=') + ( integer_pos | timestamp_or_string) + op_(')') + state_block >> make(ASTStateWait) task = name + maybe(op_('.') + name) + op_('(') + maybe(string) + op_( ')') + state_block >> make(ASTStateTask) simple_state = pass_ | success | fail | wait | task # Flow Control States transform_modifier = ( (n('input') + op_(':') + string >> make(ASTModInput)) | (n('result') + op_(':') + string >> make(ASTModResult)) | (n('output') + op_(':') + string >> make(ASTModOutput))) transform_modifiers = transform_modifier + many( transform_modifier) >> make(ASTModifiers) transform_block = maybe( n_('transform') + op_(':') + block_s + maybe(transform_modifiers) + block_e) while_ = n('while') + comparison + op_( ':') + comment_block + transform_block >> make(ASTStateWhile) if_else = (n('if') + comparison + op_(':') + comment_block + many(n_('elif') + comparison + op_(':') + block) + maybe(n_('else') + op_(':') + block) + transform_block) >> make(ASTStateIfElse) switch_case = n('case') + (boolean | number | timestamp_or_string) + op_(':') + block switch = (n('switch') + string + op_(':') + block_s + maybe(string) + many(switch_case) + maybe(n('default') + op_(':') + block) + block_e + transform_block) >> make(ASTStateSwitch) choice_state = while_ | if_else | switch error_modifier = (retry_block | catch_block ) + many(retry_block | catch_block) >> make(ASTModifiers) error_block = maybe( n_('error') + op_(':') + block_s + maybe(error_modifier) + block_e) parallel = (n('parallel') + op_(':') + comment_block + many(n('parallel') + op_(':') + block) + transform_block + error_block) >> make(ASTStateParallel) goto = n('goto') + string >> make(ASTStateGoto) state.define(simple_state | choice_state | parallel | goto) # State Machine version = maybe(n('version') + op_(':') + string >> make(ASTModVersion)) timeout = maybe( n('timeout') + op_(':') + integer_pos >> make(ASTModTimeout)) machine = maybe(string) + version + timeout + many(state) + end >> make( ASTStepFunction) try: # DP NOTE: calling run() directly to have better control of error handling (tree, _) = machine.run(seq, State()) link_branch(tree) check_names(tree) resolve_arns(tree, region, account_id) verify_goto_targets(tree) for visitor in visitors: visitor.visit(tree) function = StepFunction(tree) #import code #code.interact(local=locals()) return function except NoParseError as ex: max = ex.state.max tok = seq[max] if len(seq) > max else Token('EOF', '<EOF>') if tok.code == 'ERRORTOKEN': msg = "Unterminated quote" else: msg = "Invalid syntax" # DP ???: Should the actual token be used in the error message? raise CompileError.from_token(tok, msg)
def parse(seq): """Sequence(Token) -> object""" tokval = lambda x: x.value op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) _id = some(lambda t: t.type in ['Name', 'Number', 'String']) >> tokval keyword = lambda s: a(Token('Name', s)) >> tokval def make_node_list(node_list, attrs): return Statements([Node(node, attrs) for node in node_list]) def make_edge(first, edge_type, second, followers, attrs): edges = [Edge(first, edge_type, second, attrs)] from_node = second for edge_type, to_node in followers: edges.append(Edge(from_node, edge_type, to_node, attrs)) from_node = to_node return Statements(edges) # # parts of syntax # node_list = ( _id + many(op_(',') + _id) >> create_mapper(oneplus_to_list) ) option_stmt = ( _id + maybe(op_('=') + _id) >> create_mapper(Attr) ) option_list = ( maybe(op_('[') + option_stmt + many(op_(',') + option_stmt) + op_(']')) >> create_mapper(oneplus_to_list, default_value=[]) ) # node (node list) statement:: # A; # B [attr = value, attr = value]; # C, D [attr = value, attr = value]; # node_stmt = ( node_list + option_list >> create_mapper(make_node_list) ) # edge statement:: # A -> B; # A <- B; # edge_relation = ( op('->') | op('--') | op('<-') | op('<->') | op('>-') | op('-<') | op('>-<') ) edge_stmt = ( node_list + edge_relation + node_list + many(edge_relation + node_list) + option_list >> create_mapper(make_edge) ) # attributes statement:: # default_shape = box; # default_fontsize = 16; # attribute_stmt = ( _id + op_('=') + _id >> create_mapper(Attr) ) # extension statement (class, plugin):: # class red [color = red]; # plugin attributes [name = Name]; # extension_stmt = ( (keyword('class') | keyword('plugin')) + _id + option_list >> create_mapper(Extension) ) # group statement:: # group { # A; # } # group_stmt = forward_decl() group_inline_stmt = ( edge_stmt | group_stmt | attribute_stmt | node_stmt ) group_inline_stmt_list = ( many(group_inline_stmt + skip(maybe(op(';')))) ) group_stmt.define( skip(keyword('group')) + maybe(_id) + op_('{') + group_inline_stmt_list + op_('}') >> create_mapper(Group) ) # # diagram statement:: # blockdiag { # A; # } # diagram_id = ( (keyword('diagram') | keyword('blockdiag')) + maybe(_id) >> list ) diagram_inline_stmt = ( extension_stmt | group_inline_stmt ) diagram_inline_stmt_list = ( many(diagram_inline_stmt + skip(maybe(op(';')))) ) diagram = ( maybe(diagram_id) + op_('{') + diagram_inline_stmt_list + op_('}') >> create_mapper(Diagram) ) dotfile = diagram + skip(finished) return dotfile.parse(seq)
def parse( seq: List[Token], gen_actor: Callable[[str, str], Actor], exported_tco: bool = True, custom_action_parser_pfx: Optional[Parser] = None, custom_action_parser_reg: Optional[Parser] = None, custom_query_parser_op: Optional[Parser] = None, custom_query_parser_pfx: Optional[Parser] = None, custom_query_parser_reg: Optional[Parser] = None ) -> Tuple[List[RootNode], List[Actor]]: actors: Dict[Tuple[str, str], Actor] = {} nid = 0 def next_id() -> int: nonlocal nid rv, nid = nid, nid + 1 return rv def check_function(p, type_, start, end): if len(p) == 1: p = p[0] if isinstance(p, dict): actor = (p.pop('.actor_name'), p.pop('.actor_secondary')) name = p.pop('.name') negated = p.pop('.negated') params = pdict = p prepare_params = False else: actor_name, actor_secondary, name, params = p actor = (actor_name, actor_secondary or '') negated = False prepare_params = True function_name = f'EventFlow{type_.capitalize()}{name}' if actor not in actors: actors[actor] = gen_actor(*actor) mp = getattr(actors[actor], ['actions', 'queries'][type_ == 'query']) if function_name not in mp: emit_warning(f'no {type_} with name "{function_name}" found, using empty call', start, end) if type_ == 'action': actors[actor].register_action(Action(actor, function_name, [])) else: actors[actor].register_query(Query(actor, function_name, [], IntType, False)) function = mp[function_name] if prepare_params: try: pdict = function.prepare_param_dict([p for name, p in params if name is None]) for name, p in params: if name is not None: if name in pdict: emit_warning(f'keyword argument name {name} matches positional argument name', start, end) pdict[name] = p except AssertionError as e: emit_error(str(e), start, end) raise LogError() return function_name, function, pdict, negated @__wrap_result def make_action(n, start, end): action_name, action, pdict, _ = check_function(n, 'action', start, end) return (), (ActionNode(f'Event{next_id()}', action, pdict),) @__wrap_result def make_case(n, start, end): if isinstance(n, tuple) and len(n) == 2: return ([x.value for x in n[0]], n[1]) return n def _get_query_num_values(query, start, end): num_values = query.num_values if num_values == 999999999: emit_warning(f'maximum value for {query.name} unknown; assuming 50', start, end, print_source=False) emit_warning(f'setting a maximum value in functions.csv may reduce generated bfevfl size', start, end) num_values = 50 return num_values @__wrap_result def make_switch(n, start, end): p, branches = n[:-1], n[-1] cases = branches[0] + branches[2] default = branches[1] query_name, query, pdict, _ = check_function(p, 'query', start, end) sw = SwitchNode(f'Event{next_id()}', query, pdict) entrypoints = [] enum_values = {} if query.rv.type.startswith('enum['): enum_values_list = query.rv.type[5:-1].split(',') enum_values = {v.strip(): i for i, v in enumerate(enum_values_list)} for values, block in cases: eps, node, connector = block entrypoints.extend(eps) sw.add_out_edge(node) connector.add_out_edge(sw.connector) for value in values: if isinstance(value, str): if not enum_values: emit_error(f'Query "{query.name}" does not return an enum', start, end) raise LogError() if value not in enum_values: emit_error(f'Enum "{value}" is not a valid return value of "{query.name}"', start, end) raise LogError() value = enum_values[value] sw.add_case(node, value) num_values = _get_query_num_values(query, start, end) default_values = set(range(num_values)) - set(sum((v for v, n in cases), [])) if default_values: if default is not None: _, default, connector = default connector.add_out_edge(sw.connector) default_branch = default or sw.connector sw.add_out_edge(default_branch) for value in default_values: sw.add_case(default_branch, value) elif default: emit_warning(f'default branch for {query_name} call is dead code, ignoring', start, end) return entrypoints, (sw,) @__wrap_result def make_bool_function(p, start, end): query_name, query, pdict, negated = check_function(p, 'query', start, end) num_values = _get_query_num_values(query, start, end) if num_values > 2: emit_warning(f'call to {query_name} treated as boolean function but may not be', start, end) return ((query, pdict), [({0}, query.inverted != negated), (set(range(1, num_values)), (not query.inverted) != negated)]) @__wrap_result def make_in(p, start, end): p, values = p[:-1], p[-1] query_name, query, pdict, _ = check_function(p, 'query', start, end) num_values = _get_query_num_values(query, start, end) matched = set() unmatched = set(range(num_values)) enum_values = {} if query.rv.type.startswith('enum['): enum_values_list = query.rv.type[5:-1].split(',') enum_values = {v.strip(): i for i, v in enumerate(enum_values_list)} for value in values: if isinstance(value.value, str): if not enum_values: emit_error(f'Query "{query.name}" does not return an enum', start, end) raise LogError() if value.value not in enum_values: emit_error(f'Enum "{value.value}" is not a valid return value of "{query.name}"', start, end) raise LogError() value.value = enum_values[value.value] if 0 > value.value or num_values <= value.value: emit_warning('{value.value} never returned by {query_name}, ignored', start, end) continue matched.add(value.value) unmatched.remove(value.value) if not matched or not unmatched: emit_warning(f'always true or always false check', start, end) return ((query, pdict), [(matched, True), (unmatched, False)]) @__wrap_result def make_cmp(p, start, end): p, op, value = p[:-2], p[-2], p[-1] query_name, query, pdict, _ = check_function(p, 'query', start, end) num_values = _get_query_num_values(query, start, end) if isinstance(value.value, str): if query.rv.type.startswith('enum['): enum_values_list = query.rv.type[5:-1].split(',') value.value = enum_values_list.index(value.value) if value.value == -1: emit_error(f'Enum "{value.value}" is not a valid return value of "{query.name}"', start, end) raise LogError() else: emit_error(f'Query "{query.name}" does not return an enum', start, end) raise LogError() if op == '==' or op == '!=': matched = {value.value} if 0 <= value.value < num_values else set() unmatched = set(i for i in range(num_values) if i != value.value) elif op == '<' or op == '>=': matched = set(range(min(num_values, value.value))) unmatched = set(range(value.value, num_values)) else: matched = set(range(min(num_values, value.value + 1))) unmatched = set(range(value.value + 1, num_values)) if op in ('!=', '>=', '>'): matched, unmatched = unmatched, matched if not matched or not unmatched: emit_warning(f'always true or always false check', start, end) return ((query, pdict), [(matched, True), (unmatched, False)]) def _predicate_replace(values, old, new): for i in range(len(values)): if values[i][1] == old: values[i] = (values[i][0], new) @__wrap_result def make_or(p, start, end): left, right = p if isinstance(right, TypedValue): left, right = right, left if isinstance(left, TypedValue): if isinstance(right, TypedValue): return TypedValue(type=BoolType, value=left.value or right.value) if not left.value: return right else: return TypedValue(type=BoolType, value=True) # todo: can probably optimize for smaller output _predicate_replace(left[1], False, right) return left @__wrap_result def make_and(p, start, end): left, right = p if isinstance(right, TypedValue): left, right = right, left if isinstance(left, TypedValue): if isinstance(right, TypedValue): return TypedValue(type=BoolType, value=left.value and right.value) if left.value: return right else: return TypedValue(type=BoolType, value=False) # todo: can probably optimize for smaller output _predicate_replace(left[1], True, right) return left @__wrap_result def make_not(p, start, end): if isinstance(p, TypedValue): p.value = not p.value return p _predicate_replace(p[1], True, None) _predicate_replace(p[1], False, True) _predicate_replace(p[1], None, False) return p def _expand_table(table, current, next_): ((query, pdict), values) = table sw = SwitchNode(f'Event{next_id()}', query, pdict) for match, action in values: if isinstance(action, tuple): to = _expand_table(action, current, next_) elif action: to = current else: to = next_ for value in match: sw.add_case(to, value) sw.add_out_edge(to) return sw @__wrap_result def make_ifelse(n, start, end): if_, block, elifs, else_ = n cond_branches = [(if_, block)] + elifs + ([(None, else_)] if else_ else []) entrypoints = [] next_ = next_connector = ConnectorNode(f'Connector{next_id()}') for table, body in cond_branches[::-1]: eps, node, branch_connector = body entrypoints.extend(eps) if table is None: next_ = node next_connector = branch_connector elif isinstance(table, TypedValue): if table.value: next_ = node next_connector = branch_connector else: continue else: next_ = _expand_table(table, node, next_) branch_connector.add_out_edge(next_.connector) next_connector.add_out_edge(next_.connector) next_connector = next_.connector return entrypoints, (next_,) @__wrap_result def make_fork(n_, start, end): for entrypoints, node, connector in n_: for ep in entrypoints: __replace_node(ep, connector, None) __replace_node(node, connector, None) eps = __flatten([ep for ep, _, _ in n_]) n = [x for _, x, _ in n_] fork_id, join_id = next_id(), next_id() join = JoinNode(f'Event{join_id}') fork = ForkNode(f'Event{fork_id}', join, n) for node in n: fork.add_out_edge(node) return eps, (fork, join) @__wrap_result def make_while(n, start, end): table, (eps, node, connector) = n next_connector = ConnectorNode(f'Connector{next_id()}') if isinstance(table, TypedValue): if table.value: # while true connector.add_out_edge(node) return eps, (node, connector) else: # while false if eps: emit_error('entrypoints in while-false not supported', start, end) raise LogError() return [], (connector, connector) else: next_ = _expand_table(table, node, next_connector) connector.add_out_edge(next_) return eps, (next_, next_connector) @__wrap_result def make_do_while(n, start, end): (eps, node, connector), table = n next_connector = ConnectorNode(f'Connector{next_id()}') if isinstance(table, TypedValue): if table.value: # do while true connector.add_out_edge(node) return eps, (node, connector) else: next_ = _expand_table(table, node, next_connector) connector.add_out_edge(next_) return eps, (node, next_connector) @__wrap_result def make_subflow_param(n, start, end): return (n,) @__wrap_result def make_subflow(n, start, end): ns, name, params = n param_dict = {k[0][0]: k[0][1] for k in params} return (), (SubflowNode(f'Event{next_id()}', ns or '', name, param_dict),) @__wrap_result def make_none(_, start, end): return None @__wrap_result def make_return(_, start, end): return (), (TerminalNode,) @__wrap_result def named_value(n, start, end): return n @__wrap_result def unnamed_value(n, start, end): return (None, n) @__wrap_result def make_param(n, start, end): return n def verify_params(name, root, params_list): for param, type_, value in params_list: if value is not None and type_ != value.type: emit_error(f'variable definition for {param} in {name} is of type {type_} but has default value of type {value.type}') raise LogError() params = {param: type_ for param, type_, value in params_list} for node in find_postorder(root): if isinstance(node, (ActionNode, SwitchNode)): for param, value in node.params.items(): if isinstance(value.value, Argument): if value.value not in params: emit_error(f'variable {value.value} not defined in flow {name}') raise LogError() expected_type = params[value.value] actual_type = value.type if param.startswith('EntryVariableKey'): if param[16:].startswith('Int_'): actual_type = IntType elif param[16:].startswith('Bool_'): actual_type = BoolType elif param[16:].startswith('Float_'): actual_type = FloatType elif param[16:].startswith('String_'): actual_type = StringType else: value.type = ArgumentType if actual_type != AnyType and expected_type != actual_type: emit_error(f'variable {value.value} has the wrong type, defined to be {expected_type} but used as {actual_type}') raise LogError() if isinstance(node, SubflowNode): for param, value in node.params.items(): if isinstance(value.value, Argument): if value.value not in params: emit_error(f'variable {value.value} not defined in flow {name}') raise LogError() value.type = params[value.value] @__wrap_result def make_flow(n, start, end): local, name, params, body = n entrypoints, body_root, body_connector = body verify_params(name, body_root, params) vardefs = [RootNode.VarDef(name=n, type=t, initial_value=v.value if v else None) for n, t, v in params] valueless_vardefs = [RootNode.VarDef(name=n, type=t, initial_value=None) for n, t, v in params] node = RootNode(name, local is not None, False, vardefs) for e in entrypoints: e.vardefs = valueless_vardefs[:] node.add_out_edge(body_root) body_connector.add_out_edge(TerminalNode) return list(entrypoints) + [node] @__wrap_result def link_ep_block(n, start, end): connector = ConnectorNode(f'Connector{next_id()}') ep, block_info = n block_info = [x for x in block_info if x is not None] if block_info: eps, block = (__flatten(p) for p in zip(*(x for x in block_info if x is not None))) else: eps, block = [], () if not block: if ep is not None: ep_node = RootNode(ep, True, True, []) ep_node.add_out_edge(connector) eps.append(ep_node) return (eps, connector, connector) for n1, n2 in zip(block, block[1:] + [connector]): n1_conn = n1 if isinstance(n1, tuple): n1, n1_conn = n1 if isinstance(n2, tuple): n2, _ = n2 if isinstance(n1, SwitchNode): n1.connector.add_out_edge(n2) else: n1_conn.add_out_edge(n2) if ep is not None: ep_node = RootNode(ep, True, True, []) ep_node.add_out_edge(block[0]) eps.append(ep_node) return (eps, block[0], connector) @__wrap_result def link_block(n, start, end): connector = ConnectorNode(f'Connector{next_id()}') n = [n[0]] + n[1] eps, blocks, connectors = zip(*n) eps = __flatten(eps) for connector, block in zip(connectors[:-1], blocks[1:]): connector.add_out_edge(block) return (eps, blocks[0], connectors[-1]) @__wrap_result def collect_flows(n, start, end): if n is None: return [] else: return __flatten([x for x in n if x is not None]) block = forward_decl() # pass = PASS NL pass_ = (__tokkw('pass') + __tokop('NL')) >> make_none # return = RETURN NL return_ = (__tokkw('return') + __tokop('NL')) >> make_return # id_value = id ASSIGN value | value id_value = ((id_ + __tokop('ASSIGN') + __value) >> named_value) | (__value >> unnamed_value) # function_params = [id_value {COMMA id_value}] function_params = maybe(id_value + many(__tokop('COMMA') + id_value)) >> __make_array # actor_name = id [ AT id ] actor_name = id_ + maybe(__tokop('AT') + id_) # function_name = id function_name = id_ # base_function = actor_name DOT action_name LPAREN function_params RPAREN base_function = ( actor_name + __tokop('DOT') + function_name + __tokop('LPAREN') + function_params + __tokop('RPAREN') ) # function = custom_query_parser_reg | base_function | custom_query_parser_pfx | LPAREN function RPAREN function = forward_decl() if custom_query_parser_reg is not None: function_ = custom_query_parser_reg | base_function else: function_ = base_function if custom_query_parser_pfx is not None: function_ = function_ | custom_query_parser_pfx function_ = function_ | (__tokop('LPAREN') + function + __tokop('RPAREN')) function.define(function_) # action = (custom_action_parser_reg | base_function | custom_action_parser_pfx) NL if custom_action_parser_reg is not None: action = custom_action_parser_reg | base_function else: action = base_function if custom_action_parser_pfx is not None: action = action | custom_action_parser_pfx action = action + __tokop('NL') >> make_action # int_or_enum = INT | QUOTE_ID int_or_enum = (__toktype('INT') >> __int) | (__toktype('QUOTE_ID') >> __identifier) # __intlist = int_or_enum {COMMA int_or_enum} [COMMA] | LPAREN __intlist RPAREN __intlist = forward_decl() __intlist.define((int_or_enum + many(__tokop('COMMA') + int_or_enum) + maybe(__tokop('COMMA')) >> __make_array) | \ __tokop('LPAREN') + __intlist + __tokop('RPAREN')) # case = CASE __intlist block case = __tokkw('case') + __intlist + block >> make_case # default = DEFAULT block default = __tokkw('default') + block >> make_case # cases = { case } [ default ] { case } | pass cases = many(case) + maybe(default) + many(case) | pass_ # switch = SWITCH function COLON NL INDENT cases DEDENT switch = __tokkw('switch') + function + __tokop('COLON') + __tokop('NL') + \ __tokop('INDENT') + cases + __tokop('DEDENT') >> make_switch predicate = forward_decl() predicate0 = forward_decl() predicate1 = forward_decl() predicate2 = forward_decl() predicate3 = forward_decl() # bool_function = function bool_function = function >> make_bool_function # in_predicate = function IN __intlist in_predicate = function + __tokkw('in') + __intlist >> make_in # not_in_predicate = function IN __intlist not_in_predicate = function + __tokkw('not') + __tokkw('in') + __intlist >> make_in >> make_not # cmp_predicate = function CMP INT cmp_predicate = function + __toktype('CMP') + int_or_enum >> make_cmp # not_predicate = NOT predicate0 not_predicate = __tokkw('not') + predicate0 >> make_not # const_predicate = TRUE | FALSE const_predicate = (__tokkw_keep('true') | __tokkw_keep('false')) >> __bool # paren_predicate = LPAREN predicate RPAREN paren_predicate = __tokop('LPAREN') + predicate + __tokop('RPAREN') # predicate0 = not_predicate | const_predicate | bool_function | paren_predicate predicate0.define(not_predicate | const_predicate | bool_function | paren_predicate) # predicate1 = custom_query_parser_op | in_predicate | not_in_predicate | cmp_predicate | predicate0 predicate1_ = in_predicate | not_in_predicate | cmp_predicate | predicate0 if custom_query_parser_op is not None: predicate1_ = (custom_query_parser_op >> make_bool_function) | predicate1_ predicate1.define(predicate1_) # and_predicate = predicate1 AND predicate2 and_predicate = predicate1 + __tokkw('and') + predicate2 >> make_and # predicate2 = and_predicate | predicate1 predicate2.define(and_predicate | predicate1) # or_predicate = predicate2 OR predicate3 or_predicate = predicate2 + __tokkw('or') + predicate3 >> make_or # predicate3 = or_predicate | predicate2 predicate3.define(or_predicate | predicate2) # predicate = predicate3 predicate.define(predicate3) # if = IF predicate block if_ = __tokkw('if') + predicate + block # elif = ELIF predicate block elif_ = __tokkw('elif') + predicate + block # else = ELSE block else_ = __tokkw('else') + block # ifelse = if { elif } [ else ] ifelse = if_ + many(elif_) + maybe(else_) >> make_ifelse # branches = { BRANCH block } # branchless case handled implicitly by lack of INDENT branches = many(__tokkw('branch') + block) # fork = FORK COLON NL INDENT branches DEDENT fork = __tokkw('fork') + __tokop('COLON') + __tokop('NL') + \ __tokop('INDENT') + branches + __tokop('DEDENT') >> make_fork # while = WHILE predicate block while_ = __tokkw('while') + predicate + block >> make_while # do_while = DO block WHILE predicate NL do_while = __tokkw('do') + block + __tokkw('while') + predicate + __tokop('NL') >> make_do_while # flow_name = [id COLON COLON] id flow_name = maybe(id_ + __tokop('COLON') + __tokop('COLON')) + id_ # subflow_param = id ASSIGN nonenum_value subflow_param = id_ + __tokop('ASSIGN') + __nonenum_value >> make_subflow_param # subflow_params = [subflow_param { COMMA subflow_param }] subflow_params = maybe(subflow_param + many(__tokop('COMMA') + subflow_param)) >> __make_array # run = RUN flow_name LPAREN subflow_params RPAREN NL run = ( __tokkw('run') + flow_name + __tokop('LPAREN') + subflow_params + __tokop('RPAREN') + __tokop('NL') ) >> make_subflow # stmt = action | switch | ifelse | fork | while_ | do_while | run | pass_ | return | NL stmt = action | switch | ifelse | fork | while_ | do_while | run | pass_ | return_ | (__tokop('NL') >> make_none) # entrypoint = ENTRYPOINT id COLON NL entrypoint = __tokkw('entrypoint') + id_ + __tokop('COLON') + __tokop('NL') # stmts = stmt { stmt } stmts = stmt + many(stmt) >> __make_array # ep_block_body = [entrypoint] stmts ep_block_body = maybe(entrypoint) + stmts >> link_ep_block # block_body = ep_block_body { ep_block_body } block_body = ep_block_body + many(ep_block_body) >> link_block # block = COLON NL INDENT block_body DEDENT block.define(__tokop('COLON') + __tokop('NL') + __tokop('INDENT') + block_body + __tokop('DEDENT')) # type = INT | FLOAT | STR | BOOL | ANY type_atom = (__tokkw_keep('int') | __tokkw_keep('float') | __tokkw_keep('str') | __tokkw_keep('bool') | __tokkw_keep('any')) >> __type # flow_param = ID COLON TYPE [ASSIGN base_value] flow_param = id_ + __tokop('COLON') + type_atom + maybe(__tokop('ASSIGN') + __base_value) >> make_param # flow_params = [flow_param { COMMA flow_param }] flow_params = maybe(flow_param + many(__tokop('COMMA') + flow_param)) >> __make_array # flow = [LOCAL] FLOW ID LPAREN flow_params RPAREN block flow = ( maybe(a(Token('ID', 'local'))) + __tokkw('flow') + id_ + __tokop('LPAREN') + flow_params + __tokop('RPAREN') + block ) >> make_flow # file = { flow | NL } evfl_file = many(flow | (__tokop('NL') >> make_none)) >> collect_flows parser = evfl_file + skip(finished) roots: List[RootNode] = parser.parse(seq).value local_roots = {r.name: r for r in roots if r.local} exported_roots = {r.name: r for r in roots if not r.local} for n in roots: __collapse_connectors(n) __process_local_calls(roots, local_roots, exported_roots, exported_tco) for n in roots: __replace_node(n, TerminalNode, None) return list(exported_roots.values()), list(actors.values())
not_in = op('not') + op('in') >> const(lambda x,y: x not in y) true = kw('True') >> token_value >> make_bool false = kw('False') >> token_value >> make_bool name = token_type('name') >> token_value >> make_name number = token_type('number') >> token_value >> make_number string = token_type('string') >> token_value >> make_string # grammar rules mul_op = mul | div add_op = add | sub cmp_op = lt | le | eq | ne | ge | gt | in_ | not_in # forward declatations nexpr = forward_decl() bexpr = forward_decl() sexpr = forward_decl() expr = forward_decl() lambda_ = forward_decl() application = forward_decl() renaming = forward_decl() set_context = forward_decl() # string expression strexpr = string + many(add_op + string) >> u(concatenate) # numerical expression numeric = application | lambda_ | number | name | (op_('(') + nexpr + op_(')')) factor = numeric + many(power + numeric) >> u(make_expression)
if s == 'int': return int elif s == 'str': return str elif s == 'float': return float elif s == 'bool': return bool elif s in ['obj', 'object']: return typing.Any elif s in ['None', 'NoneType']: return type(None) return s element = forward_decl() elements = forward_decl() skip_of = skip_str('of') any_class = some( lambda x: re.match('[a-zA-Z0-9_]+', x.string)) >> to_simple_type set_parser = (a_str('set') + skip_of + element) >> compile_set_type list_parser = (a_str('list') + skip_of + element) >> compile_list_type dict_parser = (a_str('dict') + skip_of + skip_str('{') + element + skip_str(',') + element + skip_str('}')) >> \ compile_dict_type tuple_parser = (a_str('tuple') + skip_of + skip_str('(') + elements + skip_str(')')) >> compile_tuple_type element.define(set_parser | list_parser | dict_parser | tuple_parser | any_class) elements.define((many(element + skip_str(',')) + element) >> (lambda x: x[0] + [x[1]])) type_contract_parser = skip_str('(') + maybe(elements) + skip_str(
def parser(last_error=None): last_error = LastError() if last_error is None else last_error def apl(f): return lambda x: f(*x) def delim(t): return skip(_tok(t)) symbol = _tok(Token.SYMBOL) >> _gen(Symbol) string = _tok(Token.STRING) >> _gen(String) placeholder = _tok(Token.PLACEHOLDER) >> _gen(Placeholder) keyword = _tok(Token.KEYWORD) >> _gen(Keyword) # Note: tokenizer guarantee that value consists of dots and digits # TODO: convert exceptions number = _tok(Token.NUMBER) >> _gen(Number, literal_eval) expr = forward_decl() implicit_tuple = forward_decl() list_ = ( (_tok(Token.OPEN_BRACKET) + many(expr | keyword) + _tok(Token.CLOSE_BRACKET)) >> apl(_list) ) dict_ = ( error_ctx(_tok(Token.OPEN_BRACE) + many(keyword + expr) + _tok(Token.CLOSE_BRACE), last_error, DICT_ERROR) >> apl(_dict) ) inline_args = many(expr | keyword) explicit_tuple = ( error_ctx(_tok(Token.OPEN_PAREN) + symbol + inline_args + _tok(Token.CLOSE_PAREN), last_error, EXPLICIT_TUPLE_ERROR) >> apl(_tuple) ) indented_arg = ( oneplus(implicit_tuple | expr + delim(Token.NEWLINE)) >> _maybe_join ) indented_kwarg = ( ((keyword + expr + delim(Token.NEWLINE)) | (keyword + delim(Token.NEWLINE) + delim(Token.INDENT) + indented_arg + delim(Token.DEDENT))) ) indented_args_kwargs = ( (many(indented_kwarg) + many(indented_arg)) >> apl(lambda pairs, args: list(chain(*(pairs + [args])))) ) implicit_tuple.define( error_ctx(symbol + inline_args + delim(Token.NEWLINE) + maybe(delim(Token.INDENT) + indented_args_kwargs + delim(Token.DEDENT)), last_error, IMPLICIT_TUPLE_ERROR) >> apl(_implicit_tuple) ) expr.define(symbol | string | number | explicit_tuple | list_ | dict_ | placeholder) body = ( (many(implicit_tuple) + _tok(Token.EOF)) >> apl(_module) ) return body
xor_op = op('^') >> make_name or_op = op('|') >> make_name land_op = op('and') >> make_name lor_op = op('or') >> make_name operator_name = (op('>>=') | op('<<=') | op('*=') | op('/=') | op('%=') | op('+=') | op('-=') | op('&=') | op('|=') | op('+') | op('-') | op('~') | op('not') | op('?') | op('!') | op('>>') | op('<<') | op('*') | op('/') | op('%') | op('<') | op('<=') | op('>=') | op('>') | op('as?') | op('as!') | op('is') | op('==') | op('!=') | op('~=') | op('===') | op('&') | op('^') | op('|') | op('and') | op('or')) >> make_name # Nonterminal symbols. fun_decl = forward_decl() struct_decl = forward_decl() enum_decl = forward_decl() protocol_decl = forward_decl() expr = forward_decl() select_expr = forward_decl() operand = forward_decl() if_expr = forward_decl() switch_expr = forward_decl() type_signature = forward_decl() type_name = forward_decl() pattern = forward_decl() stmt = forward_decl() def make_stmt_list(args):
def tokval(t: Token) -> Any: return t.value ident = some(lambda t: t.type == 'Ident') >> tokval binder = some( lambda t: t.type == 'Ident' and t.value in _binder_names) >> tokval binop = some(lambda t: t.type == 'Ident' and t.value in bin_ops) >> tokval lparen = skip(some(lambda t: t.type == 'LParen')) rparen = skip(some(lambda t: t.type == 'RParen')) dot = skip(some(lambda t: t.type == 'Dot')) comma = skip(some(lambda t: t.type == 'Ident' and t.value == ",")) expr = forward_decl() expr_cont_quantified = (binder + ident + dot + expr + rparen) >> (lambda xs: EQuantified(*xs)) expr_cont_expr1 = (expr + ( ((binop + expr + rparen) >> (lambda xs: lambda e1: EApply(EApply(EIdent(xs[0]), e1), xs[1]))) | ((expr + rparen) >> (lambda e2: lambda e1: EApply(e1, e2))))) >> (lambda xs: xs[1](xs[0])) expr.define((ident >> EIdent) | (lparen + (expr_cont_quantified | expr_cont_expr1))) def sep_by(p1: Parser, p2: Parser) -> Parser:
def parse(seq, translate=lambda x, y: y): """Parse the given sequence of tokens into a StateMachine object Args: seq (list): List of lexer.Token tokens to parse translate (function): Translation function applied to Lambda/Activity names. Arguments are ("Lambda"|"Activity", arn) Returns sfn.StateMachine: StateMachine object """ state = forward_decl() # Primatives array = op_('[') + maybe(string + many(op_(',') + string)) + op_(']') >> make_array block = block_s + many(state) + block_e comment_block = block_s + maybe(string) + many(state) + block_e retry_block = n('retry') + (array|string) + integer_pos + integer_nn + number >> make(ASTModRetry) catch_block = n('catch') + (array|string) + op_(':') + maybe(string) + block >> make(ASTModCatch) # Simple States state_modifier = ((n('timeout') + op_(':') + integer_pos >> make(ASTModTimeout)) | (n('heartbeat') + op_(':') + integer_pos >> make(ASTModHeartbeat)) | (n('input') + op_(':') + string >> make(ASTModInput)) | (n('result') + op_(':') + string >> make(ASTModResult)) | (n('output') + op_(':') + string >> make(ASTModOutput)) | (n('data') + op_(':') + block_s + json_text() + block_e >> make(ASTModData)) | retry_block | catch_block) state_modifiers = state_modifier + many(state_modifier) >> make(ASTModifiers) state_block = maybe(block_s + maybe(string) + maybe(state_modifiers) + block_e) pass_ = n('Pass') + op_('(') + op_(')') + state_block >> make(ASTStatePass) success = n('Success') + op_('(') + op_(')') + state_block >> make(ASTStateSuccess) fail = n('Fail') + op_('(') + string + op_(',') + string + op_(')') + state_block >> make(ASTStateFail) task = (n('Lambda') | n('Activity')) + op_('(') + string + op_(')') + state_block >> make(ASTStateTask) wait_types = n('seconds') | n('seconds_path') | n('timestamp') | n('timestamp_path') wait = n('Wait') + op_('(') + wait_types + op_('=') + (integer_pos|timestamp_or_string) + op_(')') + state_block >> make(ASTStateWait) simple_state = pass_ | success | fail | task | wait # Flow Control States transform_modifier = ((n('input') + op_(':') + string >> make(ASTModInput)) | (n('result') + op_(':') + string >> make(ASTModResult)) | (n('output') + op_(':') + string >> make(ASTModOutput))) transform_modifiers = transform_modifier + many(transform_modifier) >> make(ASTModifiers) transform_block = maybe(n_('transform') + op_(':') + block_s + maybe(transform_modifiers) + block_e) while_ = n('while') + comparison + op_(':') + comment_block + transform_block >> make(ASTStateWhile) if_else = (n('if') + comparison + op_(':') + comment_block + many(n_('elif') + comparison + op_(':') + block) + maybe(n_('else') + op_(':') + block) + transform_block) >> make(ASTStateIfElse) switch_case = n('case') + (boolean|number|timestamp_or_string) + op_(':') + block switch = (n('switch') + string + op_(':') + block_s + maybe(string) + many(switch_case) + maybe(n('default') + op_(':') + block) + block_e + transform_block) >> make(ASTStateSwitch) choice_state = while_ | if_else | switch error_modifier = (retry_block|catch_block) + many(retry_block|catch_block) >> make(ASTModifiers) error_block = maybe(n_('error') + op_(':') + block_s + maybe(error_modifier) + block_e) parallel = (n('parallel') + op_(':') + comment_block + many(n('parallel') + op_(':') + block) + transform_block + error_block) >> make(ASTStateParallel) state.define(simple_state | choice_state | parallel) # State Machine version = maybe(n('version') + op_(':') + string >> make(ASTModVersion)) timeout = maybe(n('timeout') + op_(':') + integer_pos >> make(ASTModTimeout)) machine = maybe(string) + version + timeout + many(state) + end >> make(ASTStepFunction) try: # DP NOTE: calling run() directly to have better control of error handling (tree, _) = machine.run(seq, State()) link_branch(tree) check_names(tree) resolve_arns(tree, translate) function = StepFunction(tree) #import code #code.interact(local=locals()) return function except NoParseError as e: max = e.state.max tok = seq[max] if len(seq) > max else Token('EOF', '<EOF>') if tok.code == 'ERRORTOKEN': msg = "Unterminated quote" else: msg = "Invalid syntax" # DP ???: Should the actual token be used in the error message? raise CompileError.from_token(tok, msg)
REAL: Final = literal("REAL") | literal("EXP") INT: Final = literal("INT") | literal("HEX") | literal("OCT") | literal("BIN") BOOL: Final = literal("BOOL") STR: Final = literal("STR") NONE: Final = literal("NONE") LITERAL: Final = NONE | BOOL | REAL | INT | STR NAME: Final = some(make_toktype_predicate("NAME")) LIST_MAKER: Final = forward_decl() DICT_MAKER: Final = forward_decl() ATOM: Final = LITERAL | LIST_MAKER | DICT_MAKER EXPR: Final = forward_decl() ATOM_EXPR: Final = forward_decl() LOOKUP_ATTR: Final = DOT + NAME >> lookup_attr LOOKUP_ITEM: Final = LSQB + EXPR + RSQB >> lookup_item TRAILER: Final = many(LOOKUP_ATTR | LOOKUP_ITEM)
## [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] ## [87] CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A ## [88] Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29] ## [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE] digitsxml = re.compile(ur'[\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29]') letters = re.compile(ur'[\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3\u4E00-\u9FA5\u3007\u3021-\u3029]') letterxml = p.some(letters.match) combiningchars = re.compile(ur'[\u0300-\u0345\u0360-\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094C\u094D\u0951-\u0954\u0962-\u0963\u0981-\u0983\u09BC\u09BE\u09BF\u09C0-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09E2-\u09E3\u0A02\u0A3C\u0A3E\u0A3F\u0A40-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B82-\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C82-\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A]') combiningcharxml = p.some(combiningchars.match) digitxml = p.some(digitsxml.match) expression = p.forward_decl() quote = p.skip(p.a("'")) singlequoted = quote + (p.oneplus(p.a("'") + p.a("'") | p.some(lambda x: x != "'")) >> join) + quote identifier = (letterxml + (p.many(letterxml | digitxml | p.a('_') | p.a('.') | combiningcharxml) >> join) >> join) >> string.upper >> tag(u'identifier') iri = p.oneplus(p.some(lambda x: x not in u"'#")) >> join >> tag(u'iri') ## Whitespace ::= #x20 | #x09 | #x0a | #x0d whitespace = oneof(u'\x20\x09\x0a\x0d') spaces = p.skip(p.many(whitespace))
def parse(seq): """Sequence(Token) -> object""" const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval def make_array(n): if n is None: return [] else: return [n[0]] + n[1] def make_object(n): return dict(make_array(n)) def make_number(n): try: return int(n) except ValueError: return float(n) def unescape(s): std = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } def sub(m): if m.group('standard') is not None: return std[m.group('standard')] else: return chr(int(m.group('unicode'), 16)) return re_esc.sub(sub, s) def make_string(n): return unescape(n[1:-1]) null = n('null') >> const(None) true = n('true') >> const(True) false = n('false') >> const(False) number = toktype('Number') >> make_number string = toktype('String') >> make_string value = forward_decl() member = string + op_(':') + value >> tuple object = ( op_('{') + maybe(member + many(op_(',') + member)) + op_('}') >> make_object) array = ( op_('[') + maybe(value + many(op_(',') + value)) + op_(']') >> make_array) value.define( null | true | false | object | array | number | string) json_text = object | array json_file = json_text + skip(finished) return json_file.parse(seq)
@star_args def make_param_type(type, param): params = param if isinstance(param, list) else [param] return ParameterizedType(type, params) @star_args def make_param_list(head, tail): return [head] + tail identifier = tok('identifier') class_type = identifier + many(op('.') + identifier) >> make_class_type type_expr = forward_decl() simple_expr = class_type paren_param_list = ( op('(') + type_expr + many(op(',') + type_expr) + op(')') >> make_param_list) bracket_param_list = ( op('[') + type_expr + many(op(',') + type_expr) + op(']') >> make_param_list) of_param_list = op('of') + (paren_param_list | type_expr) param_type = ( simple_expr + (of_param_list | bracket_param_list) >> make_param_type) type_expr.define(param_type | class_type) type_file = type_expr + skip(finished)
def base_parser(validate_field, validate_entry): """Return the base parser which all other parsers are based on. The valid_fields and valid_entries arguments denote the allowable names for a grammar. """ # Simple expressions integer = token_type('number') name = token_type('name') variable = name # Braced expressions e.g. '{braced}' non_braced = if_token_type('content', always_true) braced_expr = parser.forward_decl() braced_expr.define( (if_token_type('lbrace', lambda v: True) + parser.many(braced_expr | non_braced) + if_token_type('rbrace', lambda v: True)) >> make_braced_expr) braced_expr = braced_expr >> remove_outer_braces # String expressions, e.g. '"This " # var # " that"' string_expr =\ full_delimited_list( parser.some(lambda x: x.type == 'string') >> make_string | parser.some(lambda x: x.type == 'name') >> token_value, 'concat' ) >> join_string_expr('') # The value of a field value = braced_expr | integer | string_expr | variable # Make sure we only parsed valid fields valid_field = if_token_type('name', validate_field) field = valid_field + skip('equals') + value >> make_field assignment = token_type('name') + skip('equals') + value # A regular comment: Any text outside of entries comment = token_type('comment') # @string string_entry = simple_entry(assignment, is_string_entry, make_string_entry) # @comment comment_entry = simple_entry(token_type('content'), is_comment_entry, make_comment_entry) # @preamble preamble_entry = simple_entry(token_type('content'), is_preamble_entry, make_preamble_entry) # Make sure we only parsed valid entries valid_entry = if_token_type('name', validate_entry) >> token_value # @article etc. entry = skip('entry')\ + valid_entry\ + skip('lbrace')\ + (token_type('name') | token_type('number')) + skip('comma')\ + parser.maybe(delimited_list(field, 'comma'))\ + parser.maybe(skip('comma'))\ + skip('rbrace')\ >> make_entry return parser.many(string_entry | comment_entry | preamble_entry | entry | comment) + parser.skip(parser.finished)
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id_types = ['Name', 'Number', 'String'] id = some(lambda t: t.type in id_types).named('id') >> tokval make_graph_attr = lambda args: DefAttrs('graph', [Attr(*args)]) make_edge = lambda x, xs, attrs: Edge([x] + xs, attrs) node_id = id # + maybe(port) a_list = ( id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) attr_stmt = ( (n('graph') | n('node') | n('edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_('=') + id >> make_graph_attr node_stmt = node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> # stmt -> subgraph -> stmt_list) subgraph = forward_decl() edge_rhs = skip(op('->') | op('--')) + (subgraph | node_id) edge_stmt = ( (subgraph | node_id) + oneplus(edge_rhs) + attr_list >> unarg(make_edge)) stmt = ( attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) subgraph.define( skip(n('subgraph')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(SubGraph)) graph = ( maybe(n('strict')) + maybe(n('graph') | n('digraph')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Graph)) dotfile = graph + skip(finished) return dotfile.parse(seq)
if s == 'int': return int elif s == 'str': return str elif s == 'float': return float elif s == 'bool': return bool elif s in ['obj', 'object']: return typing.Any elif s in ['None', 'NoneType']: return type(None) return s element = forward_decl() elements = forward_decl() skip_of = skip_str('of') any_class = some(lambda x: re.match('[a-zA-Z0-9_]+', x.string)) >> to_simple_type set_parser = (a_str('set') + skip_of + element) >> compile_set_type list_parser = (a_str('list') + skip_of + element) >> compile_list_type dict_parser = (a_str('dict') + skip_of + skip_str('{') + element + skip_str(',') + element + skip_str('}')) >> \ compile_dict_type tuple_parser = (a_str('tuple') + skip_of + skip_str('(') + elements + skip_str(')')) >> compile_tuple_type element.define(set_parser | list_parser | dict_parser | tuple_parser | any_class) elements.define((many(element + skip_str(',')) + element) >> (lambda x: x[0] + [x[1]])) type_contract_parser = skip_str('(') + maybe(elements) + skip_str(')') + skip_str('->') + element docstring_description = many(some(lambda token: '>>>' not in token.line)) >> (lambda tokens: ' '.join(token.string for token in tokens))
def parse(tokens): name = some(t('NAME')) >> tokval raw_string = some(t('STRING')) >> tokval >> strip('"') num = some(t('NUMBER')) >> tokval >> float true = const('true') >> always(True) false = const('false') >> always(False) null_ = const('null') >> always(None) enum_item = (num | true | false | null_ | name | raw_string) enum = many(enum_item + skip(op("|"))) + enum_item >> append \ >> anno("enum") boolean = const("bool") >> always(None) >> anno("boolean") null = const("null") >> always(None) >> anno("null") num_range = skip(op('{')) + maybe(num) + \ skip(op(",")) + maybe(num) + skip(op('}')) >> tuple regexp = some(t("REGEXP")) >> tokval >> strip("/") string = ((skip(const("str")) + maybe(num_range) + maybe(regexp)) | (maybe(num_range) + (regexp))) \ >> anno("string") _format = skip(op("%")) + name >> anno("format") num_range_step = skip(op('{')) + maybe(num) + \ skip(op(",")) + maybe(num) + \ maybe(skip(op(",")) + num) + skip(op('}')) >> tuple number = skip(const("num")) + maybe(num_range_step) >> anno("number") integer = skip(const("int")) + maybe(num_range_step) >> anno("integer") schema = forward_decl() array = skip(op('[')) \ + (many(schema + skip(op(","))) + maybe(schema) >> append) \ + skip(op(']')) + maybe(num_range) + maybe(op("!")) >> anno("array") indent = some(t("INDENT")) >> tokval >> anno("indent") dedent = some(t("DEDENT")) >> tokval nl = some(t('NL')) definition = op("@") + name key = (((name | string) + maybe(op("?"))) | definition) + skip(op(":")) ref = skip(op("@")) + (name | name + skip(op(":")) + name) >> anno("ref") ref_declaration = skip(op("@")) + name + raw_string \ >> (lambda name_url: (name_url[0], "extref", name_url[1])) base_schema = ref | string | number | integer | boolean | null | _format \ | array oneof = oneplus(base_schema + skip(op("|"))) + base_schema \ >> append >> anno("oneof") anyof = oneplus(base_schema + skip(op("/"))) + base_schema \ >> append >> anno("anyof") allof = oneplus(base_schema + skip(op("&"))) + base_schema \ >> append >> anno("allof") simple_schema = anyof | oneof | allof | base_schema | enum | array dots = op("...") >> always((None, "open", None)) refid = skip(op("@")) + raw_string >> (lambda x: (None, "id", x)) obj = forward_decl() nested_obj = skip(nl) + skip(indent) + obj + skip(dedent) obj.define(oneplus(((key + ((simple_schema + skip(nl)) | nested_obj)) | ((dots | refid | ref_declaration) + skip(nl))) >> list) >> list2dict >> anno("object")) schema.define(obj | simple_schema) exprs = skip(maybe(nl)) + schema + skip(maybe(nl)) + skip(finished) return exprs.parse(list(tokens))