def Eat(self, token_type): # type: (Id_t) -> None """Assert that we're at the current token and advance.""" if not self.AtToken(token_type): p_die('Parser expected %s, got %s', NewStr(Id_str(token_type)), NewStr(Id_str(self.op_id)), word=self.cur_word) self.Next()
def TranslateBracket(func_name, token_dict): print(r""" static inline int %s(const unsigned char* s, int len) { const unsigned char* p = s; /* modified by re2c */ const unsigned char* end = s + len; const unsigned char* YYMARKER; int id; for (;;) { /*!re2c """ % func_name) for pat in sorted(token_dict): id_ = token_dict[pat] re2c_pat = TranslateConstant(pat) id_name = Id_str(id_).split('.')[-1] # e.g. Undefined_Tok print(' %-30s { id = id__%s; break; }' % (re2c_pat, id_name)) # EARLY RETURN: Do NOT advance past other chars, including the NUL # terminator. print(' %-30s { return id__Undefined_Tok; }' % '*') print(""" */ } // must be an exact match return (p == end) ? id : id__Undefined_Tok; } """)
def testBraceRangeLexer(self): lex = match.BraceRangeLexer('1..3') while True: id_, val = lex.Next() log('%s %r', Id_str(id_), val) if id_ == Id.Eol_Tok: break
def PrintAst(node, flag): # type: (command_t, arg_types.main) -> None if flag.ast_format == 'none': stderr_line('AST not printed.') if 0: from _devbuild.gen.id_kind_asdl import Id_str from frontend.lexer import ID_HIST for id_, count in ID_HIST.most_common(10): print('%8d %s' % (count, Id_str(id_))) print() total = sum(ID_HIST.values()) print('%8d total tokens returned' % total) else: # text output f = mylib.Stdout() afmt = flag.ast_format # note: mycpp rewrite to avoid 'in' if afmt in ('text', 'abbrev-text'): ast_f = fmt.DetectConsoleOutput(f) elif afmt in ('html', 'abbrev-html'): ast_f = fmt.HtmlOutput(f) else: raise AssertionError() if 'abbrev-' in afmt: tree = node.AbbreviatedTree() else: tree = node.PrettyTree() ast_f.FileHeader() fmt.PrintTree(tree, ast_f) ast_f.FileFooter() ast_f.write('\n')
def _TestlistComp(self, p_node, id0): # type: (PNode, Id_t) -> expr_t """ testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) """ assert p_node.typ == grammar_nt.testlist_comp children = p_node.children n = len(children) if n > 1 and children[1].typ == grammar_nt.comp_for: elt = self.Expr(children[0]) comp = self._CompFor(children[1]) if id0 == Id.Op_LParen: return expr.GeneratorExp(elt, [comp]) if id0 == Id.Op_LBracket: return expr.ListComp(elt, [comp]) raise AssertionError() if id0 == Id.Op_LParen: if len(children) == 1: # parenthesized expression like (x+1) or (x) return self.Expr(children[0]) # (1,) (1, 2) etc. if children[1].tok.id == Id.Arith_Comma: return self._Tuple(children) raise NotImplementedError('testlist_comp') if id0 == Id.Op_LBracket: elts = [] # type: List[expr_t] for i in xrange(0, n, 2): # skip commas elts.append(self.Expr(children[i])) return expr.List(elts, expr_context_e.Store) # unused expr_context_e raise AssertionError(Id_str(id0))
def TranslateSimpleLexer(func_name, lexer_def): print(r""" static inline void %s(const unsigned char* line, int line_len, int start_pos, int* id, int* end_pos) { assert(start_pos <= line_len); /* caller should have checked */ const unsigned char* p = line + start_pos; /* modified by re2c */ /* Echo and History lexer apparently need this, but others don't */ const unsigned char* YYMARKER; for (;;) { /*!re2c """ % func_name) for is_regex, pat, id_ in lexer_def: if is_regex: re2c_pat = TranslateRegex(pat) else: re2c_pat = TranslateConstant(pat) id_name = Id_str(id_).split('.')[-1] # e.g. Undefined_Tok print(' %-30s { *id = id__%s; break; }' % (re2c_pat, id_name)) # EARLY RETURN: Do NOT advance past the NUL terminator. print(' %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \ r'"\x00"') print(""" */ } *end_pos = p - line; /* relative */ } """)
def _ProcParams(self, p_node): # type: (PNode) -> proc_sig_t """ proc_params: proc_param (',' proc_param)* [',' '@' Expr_Name] """ children = p_node.children n = len(children) params = [] # type: List[param] rest = None # type: Optional[Token] block = None # type: Optional[Token] i = 0 while i < n: p = children[i] if ISNONTERMINAL(p.typ): name, default_val = self._ProcParam(p) # No type_expr for procs type_ = None # type: type_expr_t params.append(param(name, type_, default_val)) else: if p.tok.id == Id.Expr_At: # @args i += 1 rest = children[i].tok elif p.tok.id == Id.Arith_Amp: # &block i += 1 block = children[i].tok else: raise AssertionError(Id_str(p.tok.id)) i += 2 return proc_sig.Closed(params, rest, block)
def _ProcParam(self, pnode): # type: (PNode) -> Tuple[Token, expr_t] """ proc_param: [':'] Expr_Name ['=' expr] """ assert pnode.typ == grammar_nt.proc_param children = pnode.children tok0 = children[0].tok n = len(children) i = 0 if tok0.id == Id.Arith_Colon: # TODO: Set a flag for :out param i += 1 child = children[i] if child.tok.id == Id.Expr_Name: default_val = None # type: expr_t i += 1 if n > i and children[ i].tok.id == Id.Arith_Equal: # proc p(x = 1+2*3) i += 1 default_val = self.Expr(children[i]) return tok0, default_val raise AssertionError(Id_str(tok0.id))
def EvalCharLiteralForRegex(tok): # type: (Token) -> Optional[class_literal_term_t] """For regex char classes. Similar logic as below. """ id_ = tok.id value = tok.val if id_ == Id.Char_OneChar: c = value[1] s = consts.LookupCharC(c) return class_literal_term.ByteSet(s, tok.span_id) elif id_ == Id.Char_Hex: s = value[2:] i = int(s, 16) return class_literal_term.ByteSet(chr(i), tok.span_id) elif id_ in (Id.Char_Unicode4, Id.Char_Unicode8): s = value[2:] i = int(s, 16) return class_literal_term.CodePoint(i, tok.span_id) elif id_ == Id.Expr_Name: # [b B] is NOT mutated return None else: raise AssertionError(Id_str(id_))
def _ProcParam(self, pnode): # type: (PNode) -> param """ proc_param: [':'] Expr_Name ['=' expr] """ assert pnode.typ == grammar_nt.proc_param children = pnode.children tok0 = children[0].tok n = len(children) prefix_tok = None # type: Token i = 0 if tok0.id == Id.Arith_Colon: prefix_tok = tok0 i += 1 child = children[i] if child.tok.id == Id.Expr_Name: name_tok = child.tok default_val = None # type: expr_t i += 1 if i < n and children[ i].tok.id == Id.Arith_Equal: # proc p(x = 1+2*3) i += 1 default_val = self.Expr(children[i]) # No type_expr for procs type_ = None # type: type_expr_t return param(prefix_tok, name_tok, type_, default_val) raise AssertionError(Id_str(tok0.id))
def func_item(self, node): # type: (PNode) -> command_t """ func_item: ( ('var' | 'const') name_type_list '=' testlist # oil_var_decl # TODO: for, if/switch, with, break/continue/return, try/throw, etc. | 'while' test suite | 'for' name_type_list 'in' test suite | flow_stmt | 'set' place_list (augassign | '=') testlist # oil_place_mutation # x f(x) etc. # # And x = 1. Python uses the same "hack" to fit within pgen2. It also # supports a = b = 1, which we don't want. # # And echo 'hi' 'there' # # TODO: expr_to_ast needs to validate this | testlist (['=' testlist] | tea_word*) ) """ if node.tok.id == Id.Expr_While: return command.While(self.Expr(node.children[1]), self._Suite(node.children[2])) elif node.tok.id == Id.Expr_For: return command.For( self._NameTypeList(node.children[1]), self.Expr(node.children[3]), self._Suite(node.children[4]) ) elif node.tok.id == Id.Expr_Break: return command.Break() elif node.tok.id == Id.Expr_Continue: return command.Continue() elif node.tok.id == Id.Expr_Return: # 'return' [testlist] if len(node.children) == 1: return command.Return() else: return command.Return(self.Expr(node.children[1])) elif node.tok.id == Id.Expr_Name: # TODO: turn echo 'hi' into AST return command.NoOp() else: raise NotImplementedError(Id_str(node.tok.id))
def _Atom(self, children): # type: (List[PNode]) -> expr_t """Handles alternatives of 'atom' where there is more than one child.""" tok = children[0].tok id_ = tok.id n = len(children) if id_ == Id.Op_LParen: # atom: '(' [yield_expr|testlist_comp] ')' | ... if n == 2: # () is a tuple assert children[1].tok.id == Id.Op_RParen, children[1] return expr.Tuple([], expr_context_e.Store) return self._TestlistComp(children[1], id_) if id_ == Id.Op_LBracket: # atom: ... | '[' [testlist_comp] ']' | ... if n == 2: # [] assert children[1].tok.id == Id.Op_RBracket, children[1] return expr.List([], expr_context_e.Store) # unused expr_context_e return self._TestlistComp(children[1], id_) if id_ == Id.Op_LBrace: # atom: ... | '{' [Op_Newline] [dict] '}' i = 1 if children[i].tok.id == Id.Op_Newline: i += 1 return self._Dict(children[i]) if id_ == Id.Arith_Slash: r = self._Regex(children[1]) flags = [] # type: List[Token] # TODO: Parse translation preference. trans_pref = None # type: Token return expr.RegexLiteral(children[0].tok, r, flags, trans_pref) if id_ == Id.Expr_Func: # STUB. This should really be a Func, not Lambda. return expr.Lambda([], expr.Implicit()) raise NotImplementedError(Id_str(id_))
def _ProcParam(self, pnode): # type: (PNode) -> Tuple[Token, expr_t] """ func_param: Expr_Name [type_expr] ['=' expr] | '...' Expr_Name """ assert pnode.typ == grammar_nt.proc_param children = pnode.children tok0 = children[0].tok n = len(children) if tok0.id == Id.Expr_Name: default_val = None # type: expr_t if n > 1 and children[1].tok.id == Id.Arith_Equal: # proc p(x = 1+2*3) default_val = self.Expr(children[2]) return tok0, default_val raise AssertionError(Id_str(tok0.id))
def _Classify(gr, tok): # type: (Grammar, token) -> int # We have to match up what ParserGenerator.make_grammar() did when # calling make_label() and make_first(). See classify() in # opy/pgen2/driver.py. # 'x' and 'for' are both tokenized as Expr_Name. This handles the 'for' # case. if tok.id == Id.Expr_Name: if tok.val in gr.keywords: return gr.keywords[tok.val] # This handles 'x'. typ = tok.id if typ in gr.tokens: return gr.tokens[typ] type_str = '' if tok.id == Id.Unknown_Tok else (' (%s)' % Id_str(tok.id)) p_die('Unexpected token in expression mode%s', type_str, token=tok)
def _FuncParam(self, pnode): # type: (PNode) -> param """ func_param: Expr_Name [type_expr] ['=' expr] | '...' Expr_Name """ assert pnode.typ == grammar_nt.func_param children = pnode.children tok0 = children[0].tok n = len(children) if tok0.id == Id.Expr_Name: default_val = None # type: expr_t type_ = None # type: type_expr_t if n > 1 and children[1].tok.id == Id.Arith_Equal: # f(x = 1+2*3) default_val = self.Expr(children[2]) elif n > 2 and children[2].tok.id == Id.Arith_Equal: # f(x Int = 1+2*3) default_val = self.Expr(children[3]) return param(tok0, type_, default_val) raise AssertionError(Id_str(tok0.id))
def _Trailer(self, base, p_trailer): # type: (expr_t, PNode) -> expr_t """ trailer: ( '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME | '->' NAME | '::' NAME ) """ children = p_trailer.children op_tok = children[0].tok # TODO: Need to process ALL the trailers, e.g. f(x, y)[1, 2](x, y) if op_tok.id == Id.Op_LParen: arglist = arg_list() if len(children) == 2: # () return expr.FuncCall(base, arglist) p = children[1] # the X in ( X ) assert p.typ == grammar_nt.arglist # f(x, y) self._Arglist(p.children, arglist) return expr.FuncCall(base, arglist) if op_tok.id == Id.Op_LBracket: p_args = children[1] assert p_args.typ == grammar_nt.subscriptlist indices = [] # type: List[expr_t] n = len(p_args.children) for i in xrange(0, n, 2): # was children[::2] a = p_args.children[i] indices.append(self._Subscript(a.children)) return subscript(base, indices) if op_tok.id in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_DColon): attr = children[1].tok # will be Id.Expr_Name return attribute(base, op_tok, attr, expr_context_e.Store) raise AssertionError(Id_str(op_tok.id))
def PrintAst(nodes, opts): # type: (List[command_t], Any) -> None if len(nodes) == 1: node = nodes[0] else: node = command.CommandList(nodes) if opts.ast_format == 'none': print('AST not printed.', file=sys.stderr) if 0: from _devbuild.gen.id_kind_asdl import Id_str from frontend.lexer import ID_HIST for id_, count in ID_HIST.most_common(10): print('%8d %s' % (count, Id_str(id_))) print() total = sum(ID_HIST.values()) print('%8d total tokens returned' % total) else: # text output f = mylib.Stdout() if opts.ast_format in ('text', 'abbrev-text'): ast_f = fmt.DetectConsoleOutput(f) elif opts.ast_format in ('html', 'abbrev-html'): ast_f = fmt.HtmlOutput(f) else: raise AssertionError() if 'abbrev-' in opts.ast_format: tree = node.AbbreviatedTree() else: tree = node.PrettyTree() ast_f.FileHeader() fmt.PrintTree(tree, ast_f) ast_f.FileFooter() ast_f.write('\n')
def _Atom(self, children): # type: (List[PNode]) -> expr_t """Handles alternatives of 'atom' where there is more than one child.""" tok = children[0].tok id_ = tok.id n = len(children) if id_ == Id.Op_LParen: # atom: '(' [yield_expr|testlist_comp] ')' | ... if n == 2: # () is a tuple assert children[1].tok.id == Id.Op_RParen, children[1] return expr.Tuple([], expr_context_e.Store) return self._TestlistComp(children[1], id_) if id_ == Id.Op_LBracket: # atom: ... | '[' [testlist_comp] ']' | ... if n == 2: # [] assert children[1].tok.id == Id.Op_RBracket, children[1] return expr.List([], expr_context_e.Store) # unused expr_context_e return self._TestlistComp(children[1], id_) if id_ == Id.Left_PercentBrace: return self._Dict(children[1]) if id_ == Id.Arith_Slash: r = self._Regex(children[1]) flags = [] # type: List[Token] # TODO: Parse translation preference. trans_pref = None # type: Token return expr.RegexLiteral(children[0].tok, r, flags, trans_pref) raise NotImplementedError(Id_str(id_))
def _ReAtom(self, p_atom): # type: (PNode) -> re_t """ re_atom: ( char_literal """ assert p_atom.typ == grammar_nt.re_atom, p_atom.typ children = p_atom.children typ = children[0].typ if ISNONTERMINAL(typ): p_child = p_atom.children[0] if typ == grammar_nt.class_literal: return re.ClassLiteral(False, self._ClassLiteral(p_child)) if typ == grammar_nt.braced_var_sub: return cast(braced_var_sub, p_child.children[1].tok) if typ == grammar_nt.dq_string: return cast(double_quoted, p_child.children[1].tok) if typ == grammar_nt.sq_string: return cast(single_quoted, p_child.children[1].tok) if typ == grammar_nt.simple_var_sub: return simple_var_sub(children[0].tok) if typ == grammar_nt.char_literal: return children[0].tok raise NotImplementedError(typ) else: tok = children[0].tok # Special punctuation if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar): return speck(tok.id, tok.span_id) # TODO: d digit can turn into PosixClass and PerlClass right here! # It's parsing. if tok.id == Id.Expr_Name: return self._NameInRegex(None, tok) if tok.id == Id.Expr_Symbol: # Validate symbols here, like we validate PerlClass, etc. if tok.val in ('%start', '%end', 'dot'): return tok p_die("Unexpected token %r in regex", tok.val, token=tok) if tok.id == Id.Expr_At: # | '@' Expr_Name return re.Splice(children[1].tok) if tok.id == Id.Arith_Tilde: # | '~' [Expr_Name | class_literal] typ = children[1].typ if ISNONTERMINAL(typ): ch = children[1].children return re.ClassLiteral(True, self._ClassLiteral(children[1])) else: return self._NameInRegex(tok, children[1].tok) if tok.id == Id.Op_LParen: # | '(' regex ['as' name_type] ')' # TODO: Add variable return re.Group(self._Regex(children[1])) if tok.id == Id.Arith_Colon: # | ':' '(' regex ')' raise NotImplementedError(Id_str(tok.id)) raise NotImplementedError(Id_str(tok.id))
def main(argv): try: action = argv[1] except IndexError: raise RuntimeError('Action required') # TODO: Remove duplication in core/meta.py ID_TO_KIND = {} BOOL_ARG_TYPES = {} TEST_UNARY_LOOKUP = {} TEST_BINARY_LOOKUP = {} TEST_OTHER_LOOKUP = {} ID_SPEC = id_kind_def.IdSpec(ID_TO_KIND, BOOL_ARG_TYPES) id_kind_def.AddKinds(ID_SPEC) id_kind_def.AddBoolKinds(ID_SPEC) # must come second id_kind_def.SetupTestBuiltin(ID_SPEC, TEST_UNARY_LOOKUP, TEST_BINARY_LOOKUP, TEST_OTHER_LOOKUP) ids = ID_SPEC.id_str2int.items() ids.sort(key=lambda pair: pair[1]) # Sort by ID if action == 'c': for name, id_int in ids: print('#define id__%s %s' % (name, id_int)) elif action == 'cpp': from asdl import gen_cpp schema_ast = _CreateModule(ID_SPEC, ids) out_prefix = argv[2] with open(out_prefix + '.h', 'w') as f: f.write("""\ #ifndef ID_KIND_ASDL_H #define ID_KIND_ASDL_H namespace id_kind_asdl { """) v = gen_cpp.ClassDefVisitor(f, {}, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) f.write(""" } // namespace id_kind_asdl #endif // ID_KIND_ASDL_H """) with open(out_prefix + '.cc', 'w') as f: f.write("""\ #include <assert.h> #include "id_kind_asdl.h" namespace id_kind_asdl { """) v = gen_cpp.MethodDefVisitor(f, {}, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) f.write('} // namespace id_kind_asdl\n') elif action == 'mypy': from asdl import gen_python schema_ast = _CreateModule(ID_SPEC, ids) #print(schema_ast) f = sys.stdout f.write("""\ from asdl import pybase """) # Minor style issue: we want Id and Kind, not Id_e and Kind_e v = gen_python.GenMyPyVisitor(f, None, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) elif action == 'cpp-consts': from frontend import consts from _devbuild.gen.id_kind_asdl import Id_str, Kind_str from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str LIST_INT = ['STRICT_ALL', 'OIL_BASIC', 'OIL_ALL', 'DEFAULT_TRUE'] # TODO: These could be changed to numbers LIST_STR = [ 'SET_OPTION_NAMES', 'SHOPT_OPTION_NAMES', 'VISIBLE_SHOPT_NAMES', 'PARSE_OPTION_NAMES' ] prefix = argv[2] with open(prefix + '.h', 'w') as f: def out(fmt, *args): print(fmt % args, file=f) out("""\ #ifndef LOOKUP_H #define LOOKUP_H #include "mylib.h" #include "id_kind_asdl.h" #include "option_asdl.h" #include "runtime_asdl.h" #include "types_asdl.h" namespace consts { """) for name in LIST_INT: out('extern List<int>* %s;', name) for name in LIST_STR: out('extern List<Str*>* %s;', name) out("""\ extern int NO_INDEX; int RedirDefaultFd(id_kind_asdl::Id_t id); types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id); types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id); id_kind_asdl::Kind GetKind(id_kind_asdl::Id_t id); option_asdl::builtin_t LookupNormalBuiltin(Str* s); option_asdl::builtin_t LookupAssignBuiltin(Str* s); option_asdl::builtin_t LookupSpecialBuiltin(Str* s); Tuple2<runtime_asdl::state_t, runtime_asdl::emit_t> IfsEdge(runtime_asdl::state_t state, runtime_asdl::char_kind_t ch); } // namespace consts #endif // LOOKUP_H """) with open(prefix + '.cc', 'w') as f: def out(fmt, *args): print(fmt % args, file=f) out("""\ #include "consts.h" namespace Id = id_kind_asdl::Id; using id_kind_asdl::Kind; using types_asdl::redir_arg_type_e; using types_asdl::bool_arg_type_e; using option_asdl::builtin_t; int NO_INDEX = 0; // duplicated from frontend/consts.py namespace consts { """) # Note: could use opt_num:: instead of raw ints for name in LIST_INT: val = getattr(consts, name) val_str = ', '.join(str(i) for i in val) out('List<int>* %s = new List<int>({%s});', name, val_str) for name in LIST_STR: val = getattr(consts, name) val_str = '/* TODO */' out('List<Str*>* %s = new List<Str*>({%s});', name, val_str) out("""\ int RedirDefaultFd(id_kind_asdl::Id_t id) { // relies on "switch lowering" switch (id) { """) for id_ in sorted(consts.REDIR_DEFAULT_FD): a = Id_str(id_).replace('.', '::') b = consts.REDIR_DEFAULT_FD[id_] out(' case %s: return %s;' % (a, b)) out("""\ } } """) out("""\ types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id) { // relies on "switch lowering" switch (id) { """) for id_ in sorted(consts.REDIR_ARG_TYPES): a = Id_str(id_).replace('.', '::') # redir_arg_type_e::Path, etc. b = redir_arg_type_str(consts.REDIR_ARG_TYPES[id_]).replace( '.', '_e::') out(' case %s: return %s;' % (a, b)) out("""\ } } """) out("""\ types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id) { // relies on "switch lowering" switch (id) { """) for id_ in sorted(BOOL_ARG_TYPES): a = Id_str(id_).replace('.', '::') # bool_arg_type_e::Str, etc. b = bool_arg_type_str(BOOL_ARG_TYPES[id_]).replace('.', '_e::') out(' case %s: return %s;' % (a, b)) out("""\ } } """) out("""\ Kind GetKind(id_kind_asdl::Id_t id) { // relies on "switch lowering" switch (id) { """) for id_ in sorted(ID_TO_KIND): a = Id_str(id_).replace('.', '::') b = Kind_str(ID_TO_KIND[id_]).replace('.', '::') out(' case %s: return %s;' % (a, b)) out("""\ } } """) out("""\ builtin_t LookupNormalBuiltin(Str* s) { assert(0); } builtin_t LookupAssignBuiltin(Str* s) { assert(0); } builtin_t LookupSpecialBuiltin(Str* s) { assert(0); } """) out("""\ } // namespace consts """) elif action == 'py-consts': # It's kind of weird to use the generated code to generate more code. # Can we do this instead with the parsed module for "id" and "types.asdl"? from frontend import consts from _devbuild.gen.id_kind_asdl import Id_str, Kind_str from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str print(""" from _devbuild.gen.id_kind_asdl import Id, Kind from _devbuild.gen.types_asdl import redir_arg_type_e, bool_arg_type_e """) if 0: print('') print('REDIR_DEFAULT_FD = {') for id_ in sorted(consts.REDIR_DEFAULT_FD): v = consts.REDIR_DEFAULT_FD[id_] print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('REDIR_ARG_TYPES = {') for id_ in sorted(consts.REDIR_ARG_TYPES): v = consts.REDIR_ARG_TYPES[id_] # HACK v = redir_arg_type_str(v).replace('.', '_e.') print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('BOOL_ARG_TYPES = {') for id_ in sorted(BOOL_ARG_TYPES): v = BOOL_ARG_TYPES[id_] # HACK v = bool_arg_type_str(v).replace('.', '_e.') print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('TEST_UNARY_LOOKUP = {') for op_str in sorted(TEST_UNARY_LOOKUP): v = Id_str(TEST_UNARY_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('TEST_BINARY_LOOKUP = {') for op_str in sorted(TEST_BINARY_LOOKUP): v = Id_str(TEST_BINARY_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('TEST_OTHER_LOOKUP = {') for op_str in sorted(TEST_OTHER_LOOKUP): v = Id_str(TEST_OTHER_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('ID_TO_KIND = {') for id_ in sorted(ID_TO_KIND): v = Kind_str(ID_TO_KIND[id_]) print(' %s: %s,' % (Id_str(id_), v)) print('}') else: raise RuntimeError('Invalid action %r' % action)
def Expr(self, pnode): # type: (PNode) -> expr_t """Transform expressions (as opposed to statements).""" typ = pnode.typ tok = pnode.tok children = pnode.children if ISNONTERMINAL(typ): # # Oil Entry Points / Additions # if typ == grammar_nt.oil_expr: # for if/while # oil_expr: '(' testlist ')' return self.Expr(children[1]) if typ == grammar_nt.command_expr: # return_expr: testlist end_stmt return self.Expr(children[0]) # # Python-like Expressions / Operators # if typ == grammar_nt.atom: if len(children) == 1: return self.Expr(children[0]) return self._Atom(children) if typ == grammar_nt.testlist: # testlist: test (',' test)* [','] return self._Tuple(children) if typ == grammar_nt.test: # test: or_test ['if' or_test 'else' test] | lambdef if len(children) == 1: return self.Expr(children[0]) # TODO: Handle lambdef test = self.Expr(children[2]) body = self.Expr(children[0]) orelse = self.Expr(children[4]) return expr.IfExp(test, body, orelse) if typ == grammar_nt.lambdef: # lambdef: '|' [name_type_list] '|' test n = len(children) if n == 4: params = self._NameTypeList(children[1]) else: params = [] body = self.Expr(children[n-1]) return expr.Lambda(params, body) # # Operators with Precedence # if typ == grammar_nt.or_test: # or_test: and_test ('or' and_test)* return self._AssocBinary(children) if typ == grammar_nt.and_test: # and_test: not_test ('and' not_test)* return self._AssocBinary(children) if typ == grammar_nt.not_test: # not_test: 'not' not_test | comparison if len(children) == 1: return self.Expr(children[0]) op_tok = children[0].tok # not return expr.Unary(op_tok, self.Expr(children[1])) elif typ == grammar_nt.comparison: if len(children) == 1: return self.Expr(children[0]) return self._CompareChain(children) elif typ == grammar_nt.range_expr: n = len(children) if n == 1: return self.Expr(children[0]) if n == 3: return expr.Range( self.Expr(children[0]), self.Expr(children[2]) ) raise AssertionError(n) elif typ == grammar_nt.expr: # expr: xor_expr ('|' xor_expr)* return self._AssocBinary(children) if typ == grammar_nt.xor_expr: # xor_expr: and_expr ('xor' and_expr)* return self._AssocBinary(children) if typ == grammar_nt.and_expr: # a & b # and_expr: shift_expr ('&' shift_expr)* return self._AssocBinary(children) elif typ == grammar_nt.shift_expr: # shift_expr: arith_expr (('<<'|'>>') arith_expr)* return self._AssocBinary(children) elif typ == grammar_nt.arith_expr: # arith_expr: term (('+'|'-') term)* return self._AssocBinary(children) elif typ == grammar_nt.term: # term: factor (('*'|'/'|'div'|'mod') factor)* return self._AssocBinary(children) elif typ == grammar_nt.factor: # factor: ('+'|'-'|'~') factor | power # the power would have already been reduced if len(children) == 1: return self.Expr(children[0]) assert len(children) == 2 op = children[0] e = children[1] assert isinstance(op.tok, Token) return expr.Unary(op.tok, self.Expr(e)) elif typ == grammar_nt.power: # power: atom trailer* ['^' factor] node = self.Expr(children[0]) if len(children) == 1: # No trailers return node n = len(children) i = 1 while i < n and ISNONTERMINAL(children[i].typ): node = self._Trailer(node, children[i]) i += 1 if i != n: # ['^' factor] op_tok = children[i].tok assert op_tok.id == Id.Arith_Caret, op_tok factor = self.Expr(children[i+1]) node = expr.Binary(op_tok, node, factor) return node elif typ == grammar_nt.array_literal: left_tok = children[0].tok items = [self._ArrayItem(p) for p in children[1:-1]] return expr.ArrayLiteral(left_tok, items) elif typ == grammar_nt.oil_expr_sub: return self.Expr(children[0]) # # Oil Lexer Modes # elif typ == grammar_nt.sh_array_literal: return cast(sh_array_literal, children[1].tok) elif typ == grammar_nt.sh_command_sub: return cast(command_sub, children[1].tok) elif typ == grammar_nt.braced_var_sub: return cast(braced_var_sub, children[1].tok) elif typ == grammar_nt.dq_string: return cast(double_quoted, children[1].tok) elif typ == grammar_nt.sq_string: return cast(single_quoted, children[1].tok) elif typ == grammar_nt.simple_var_sub: return simple_var_sub(children[0].tok) else: nt_name = self.number2symbol[typ] raise AssertionError( "PNode type %d (%s) wasn't handled" % (typ, nt_name)) else: # Terminals should have a token id_ = tok.id if id_ == Id.Expr_Name: return expr.Var(tok) if id_ in ( Id.Expr_DecInt, Id.Expr_BinInt, Id.Expr_OctInt, Id.Expr_HexInt, Id.Expr_Float): return expr.Const(tok) if id_ in (Id.Expr_Null, Id.Expr_True, Id.Expr_False): return expr.Const(tok) raise NotImplementedError(Id_str(id_))
def main(argv): try: action = argv[1] except IndexError: raise RuntimeError('Action required') # TODO: Remove duplication in core/meta.py ID_TO_KIND = {} BOOL_ARG_TYPES = {} TEST_UNARY_LOOKUP = {} TEST_BINARY_LOOKUP = {} TEST_OTHER_LOOKUP = {} ID_SPEC = id_kind.IdSpec(ID_TO_KIND, BOOL_ARG_TYPES) id_kind.AddKinds(ID_SPEC) id_kind.AddBoolKinds(ID_SPEC) # must come second id_kind.SetupTestBuiltin(ID_SPEC, TEST_UNARY_LOOKUP, TEST_BINARY_LOOKUP, TEST_OTHER_LOOKUP) ids = ID_SPEC.id_str2int.items() ids.sort(key=lambda pair: pair[1]) # Sort by ID if action == 'c': for name, id_int in ids: print('#define id__%s %s' % (name, id_int)) elif action == 'cpp': from asdl import gen_cpp schema_ast = _CreateModule(ID_SPEC, ids) out_prefix = argv[2] with open(out_prefix + '.h', 'w') as f: f.write(""" #ifndef ID_KIND_ASDL_H #define ID_KIND_ASDL_H namespace id_kind_asdl { """) v = gen_cpp.ClassDefVisitor(f, {}, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) f.write(""" } // namespace id_kind_asdl #endif // ID_KIND_ASDL_H """) with open(out_prefix + '.cc', 'w') as f: f.write("""\ #include <assert.h> #include "id_kind_asdl.h" namespace id_kind_asdl { """) v = gen_cpp.MethodDefVisitor(f, {}, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) f.write('} // namespace id_kind_asdl\n') elif action == 'mypy': from asdl import gen_python schema_ast = _CreateModule(ID_SPEC, ids) #print(schema_ast) f = sys.stdout f.write("""\ from asdl import pybase from typing import List """) # Minor style issue: we want Id and Kind, not Id_e and Kind_e v = gen_python.GenMyPyVisitor(f, None, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) elif action == 'cc-tables': from frontend.lookup import REDIR_DEFAULT_FD, REDIR_ARG_TYPES from _devbuild.gen.id_kind_asdl import Id_str, Kind_str from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str prefix = argv[2] with open(prefix + '.h', 'w') as f: def out(fmt, *args): print(fmt % args, file=f) out("""\ #ifndef LOOKUP_H #define LOOKUP_H #include "id_kind_asdl.h" namespace lookup { id_kind_asdl::Kind LookupKind(id_kind_asdl::Id_t id); } // namespace lookup #endif // LOOKUP_H """) with open(prefix + '.cc', 'w') as f: def out(fmt, *args): print(fmt % args, file=f) out("""\ #include "lookup.h" namespace Id = id_kind_asdl::Id; using id_kind_asdl::Kind; namespace lookup { """) out('Kind LookupKind(id_kind_asdl::Id_t id) {') out(' // relies on "switch lowering"') out(' switch (id) {') for id_ in sorted(ID_TO_KIND): a = Id_str(id_).replace('.', '::') b = Kind_str(ID_TO_KIND[id_]).replace('.', '::') out(' case %s: return %s;' % (a, b)) out("""\ } } } // namespace lookup """) elif action == 'py-tables': # It's kind of weird to use the generated code to generate more code. # Can we do this instead with the parsed module for "id" and "types.asdl"? from frontend.lookup import REDIR_DEFAULT_FD, REDIR_ARG_TYPES from _devbuild.gen.id_kind_asdl import Id_str, Kind_str from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str print(""" from _devbuild.gen.id_kind_asdl import Id, Kind from _devbuild.gen.types_asdl import redir_arg_type_e, bool_arg_type_e """) print('') print('REDIR_DEFAULT_FD = {') for id_ in sorted(REDIR_DEFAULT_FD): v = REDIR_DEFAULT_FD[id_] print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('REDIR_ARG_TYPES = {') for id_ in sorted(REDIR_ARG_TYPES): v = REDIR_ARG_TYPES[id_] # HACK v = redir_arg_type_str(v).replace('.', '_e.') print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('BOOL_ARG_TYPES = {') for id_ in sorted(BOOL_ARG_TYPES): v = BOOL_ARG_TYPES[id_] # HACK v = bool_arg_type_str(v).replace('.', '_e.') print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('TEST_UNARY_LOOKUP = {') for op_str in sorted(TEST_UNARY_LOOKUP): v = Id_str(TEST_UNARY_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('TEST_BINARY_LOOKUP = {') for op_str in sorted(TEST_BINARY_LOOKUP): v = Id_str(TEST_BINARY_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('TEST_OTHER_LOOKUP = {') for op_str in sorted(TEST_OTHER_LOOKUP): v = Id_str(TEST_OTHER_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('ID_TO_KIND = {') for id_ in sorted(ID_TO_KIND): v = Kind_str(ID_TO_KIND[id_]) print(' %s: %s,' % (Id_str(id_), v)) print('}') else: raise RuntimeError('Invalid action %r' % action)
def _ReAtom(self, p_atom): # type: (PNode) -> re_t """ re_atom: ( char_literal """ assert p_atom.typ == grammar_nt.re_atom, p_atom.typ children = p_atom.children typ = children[0].typ if ISNONTERMINAL(typ): p_child = p_atom.children[0] if typ == grammar_nt.class_literal: return re.ClassLiteral(False, self._ClassLiteral(p_child)) if typ == grammar_nt.braced_var_sub: return cast(braced_var_sub, p_child.children[1].tok) if typ == grammar_nt.dq_string: return cast(double_quoted, p_child.children[1].tok) if typ == grammar_nt.sq_string: return cast(single_quoted, p_child.children[1].tok) if typ == grammar_nt.simple_var_sub: return simple_var_sub(children[0].tok) if typ == grammar_nt.char_literal: return children[0].tok raise NotImplementedError(typ) else: tok = children[0].tok # Special punctuation if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar): return speck(tok.id, tok.span_id) # TODO: d digit can turn into PosixClass and PerlClass right here! # It's parsing. if tok.id == Id.Expr_Name: return self._NameInRegex(None, tok) if tok.id == Id.Expr_Symbol: # Validate symbols here, like we validate PerlClass, etc. if tok.val in ('%start', '%end', 'dot'): return tok p_die("Unexpected token %r in regex", tok.val, token=tok) if tok.id == Id.Expr_At: # | '@' Expr_Name return re.Splice(children[1].tok) if tok.id == Id.Arith_Tilde: # | '~' [Expr_Name | class_literal] typ = children[1].typ if ISNONTERMINAL(typ): return re.ClassLiteral(True, self._ClassLiteral(children[1])) else: return self._NameInRegex(tok, children[1].tok) if tok.id == Id.Op_LParen: # | '(' regex ')' # Note: in ERE (d+) is the same as <d+>. That is, Group becomes # Capture. return re.Group(self._Regex(children[1])) if tok.id == Id.Arith_Less: # | '<' regex [':' name_type] '>' regex = self._Regex(children[1]) n = len(children) if n == 5: # TODO: Add type expression # YES # < d+ '.' d+ : ratio Float > # < d+ : month Int > # INVALID # < d+ : month List[int] > name_tok = children[3].children[0].tok else: name_tok = None return re.Capture(regex, name_tok) if tok.id == Id.Arith_Colon: # | ':' '(' regex ')' raise NotImplementedError(Id_str(tok.id)) raise NotImplementedError(Id_str(tok.id))
def TranslateOshLexer(lexer_def): # https://stackoverflow.com/questions/12836171/difference-between-an-inline-function-and-static-inline-function # Has to be 'static inline' rather than 'inline', otherwise the # _bin/oil.ovm-dbg build fails (but the _bin/oil.ovm doesn't!). # Since we reference this function in exactly one translation unit -- # fastlex.c, the difference is moot, and we just satisfy the compiler. print(r""" /* Common stuff */ /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; re2c:yyfill:enable = 0; // generated code doesn't ask for more input */ static inline void MatchOshToken(int lex_mode, const unsigned char* line, int line_len, int start_pos, int* id, int* end_pos) { assert(start_pos <= line_len); /* caller should have checked */ const unsigned char* p = line + start_pos; /* modified by re2c */ //printf("p: %p q: %p\n", p, q); const unsigned char* YYMARKER; /* why do we need this? */ switch (lex_mode) { """) # TODO: Should be ordered by most common? Or will profile-directed feedback # help? for state, pat_list in lexer_def.iteritems(): # e.g. lex_mode.DQ => lex_mode__DQ print(' case %s:' % lex_mode_str(state).replace('.', '__')) print(' for (;;) {') print(' /*!re2c') for is_regex, pat, id_ in pat_list: if is_regex: re2c_pat = TranslateRegex(pat) else: re2c_pat = TranslateConstant(pat) id_name = Id_str(id_).split('.')[-1] # e.g. Undefined_Tok print(' %-30s { *id = id__%s; break; }' % (re2c_pat, id_name)) # EARLY RETURN: Do NOT advance past the NUL terminator. print(' %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \ r'"\x00"') print(' */') print(' }') print(' break;') print() # This is literal code without generation: """ case lex_mode__OUTER: for (;;) { /*!re2c literal_chunk = [a-zA-Z0-9_/.-]+; var_like = [a-zA-Z_][a-zA-Z0-9_]* "="; // might be NAME=val comment = [ \t\r]* "#" [^\000\r\n]*; space = [ \t\r]+; nul = "\000"; literal_chunk { *id = id__Lit_Chars; break; } var_like { *id = id__Lit_VarLike; break; } [ \t\r]* "\n" { *id = id__Op_Newline; break; } space { *id = id__WS_Space; break; } nul { *id = id__Eof_Real; break; } // anything else * { *id = id__Lit_Other; break; } */ } *end_pos = p - line; break; case lex_mode__COMMENT: *id = id__Lit_Other; *end_pos = 6; break; """ print("""\ default: assert(0); } *end_pos = p - line; /* relative */ } """)
def main(argv): try: action = argv[1] except IndexError: raise RuntimeError('Action required') # TODO: Remove duplication in core/meta.py ID_TO_KIND = {} BOOL_ARG_TYPES = {} TEST_UNARY_LOOKUP = {} TEST_BINARY_LOOKUP = {} TEST_OTHER_LOOKUP = {} ID_SPEC = id_kind_def.IdSpec(ID_TO_KIND, BOOL_ARG_TYPES) id_kind_def.AddKinds(ID_SPEC) id_kind_def.AddBoolKinds(ID_SPEC) # must come second id_kind_def.SetupTestBuiltin(ID_SPEC, TEST_UNARY_LOOKUP, TEST_BINARY_LOOKUP, TEST_OTHER_LOOKUP) ids = ID_SPEC.id_str2int.items() ids.sort(key=lambda pair: pair[1]) # Sort by ID if action == 'c': for name, id_int in ids: print('#define id__%s %s' % (name, id_int)) elif action == 'cpp': from asdl import gen_cpp schema_ast = _CreateModule(ID_SPEC, ids) out_prefix = argv[2] with open(out_prefix + '.h', 'w') as f: f.write("""\ #ifndef ID_KIND_ASDL_H #define ID_KIND_ASDL_H namespace id_kind_asdl { """) v = gen_cpp.ClassDefVisitor(f, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) f.write(""" } // namespace id_kind_asdl #endif // ID_KIND_ASDL_H """) with open(out_prefix + '.cc', 'w') as f: f.write("""\ #include <assert.h> #include "id_kind_asdl.h" namespace id_kind_asdl { """) v = gen_cpp.MethodDefVisitor(f, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) f.write('} // namespace id_kind_asdl\n') elif action == 'mypy': from asdl import gen_python schema_ast = _CreateModule(ID_SPEC, ids) #print(schema_ast) f = sys.stdout f.write("""\ from asdl import pybase """) # Minor style issue: we want Id and Kind, not Id_e and Kind_e v = gen_python.GenMyPyVisitor(f, e_suffix=False, simple_int_sums=['Id']) v.VisitModule(schema_ast) elif action == 'cpp-consts': from frontend import consts from _devbuild.gen.id_kind_asdl import Id_str, Kind_str from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str LIST_INT = [ 'STRICT_ALL', 'OIL_BASIC', 'OIL_ALL', 'DEFAULT_TRUE', 'PARSE_OPTION_NUMS', 'SHOPT_OPTION_NUMS', 'SET_OPTION_NUMS', ] # TODO: These could be changed to numbers LIST_STR = ['VISIBLE_SHOPT_NAMES'] prefix = argv[2] with open(prefix + '.h', 'w') as f: def out(fmt, *args): print(fmt % args, file=f) out("""\ #ifndef CONSTS_H #define CONSTS_H #include "mylib.h" #include "id_kind_asdl.h" #include "option_asdl.h" #include "runtime_asdl.h" #include "types_asdl.h" namespace consts { """) for name in LIST_INT: out('extern List<int>* %s;', name) for name in LIST_STR: out('extern List<Str*>* %s;', name) out("""\ extern int NO_INDEX; int RedirDefaultFd(id_kind_asdl::Id_t id); types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id); types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id); id_kind_asdl::Kind GetKind(id_kind_asdl::Id_t id); option_asdl::builtin_t LookupNormalBuiltin(Str* s); option_asdl::builtin_t LookupAssignBuiltin(Str* s); option_asdl::builtin_t LookupSpecialBuiltin(Str* s); bool IsControlFlow(Str* s); bool IsKeyword(Str* s); Str* LookupCharC(Str* c); Str* LookupCharPrompt(Str* c); Str* OptionName(option_asdl::option_t opt_num); Tuple2<runtime_asdl::state_t, runtime_asdl::emit_t> IfsEdge(runtime_asdl::state_t state, runtime_asdl::char_kind_t ch); } // namespace consts #endif // CONSTS_H """) with open(prefix + '.cc', 'w') as f: def out(fmt, *args): print(fmt % args, file=f) out("""\ #include "consts.h" namespace Id = id_kind_asdl::Id; using id_kind_asdl::Kind; using types_asdl::redir_arg_type_e; using types_asdl::bool_arg_type_e; using option_asdl::builtin_t; namespace consts { int NO_INDEX = 0; // duplicated from frontend/consts.py """) # Note: could use opt_num:: instead of raw ints for name in LIST_INT: val = getattr(consts, name) val_str = ', '.join(str(i) for i in val) out('List<int>* %s = new List<int>({%s});', name, val_str) for name in LIST_STR: val = getattr(consts, name) val_str = '/* TODO */' out('List<Str*>* %s = new List<Str*>({%s});', name, val_str) out("""\ int RedirDefaultFd(id_kind_asdl::Id_t id) { // relies on "switch lowering" switch (id) { """) for id_ in sorted(consts.REDIR_DEFAULT_FD): a = Id_str(id_).replace('.', '::') b = consts.REDIR_DEFAULT_FD[id_] out(' case %s: return %s;' % (a, b)) out("""\ } } """) out("""\ types_asdl::redir_arg_type_t RedirArgType(id_kind_asdl::Id_t id) { // relies on "switch lowering" switch (id) { """) for id_ in sorted(consts.REDIR_ARG_TYPES): a = Id_str(id_).replace('.', '::') # redir_arg_type_e::Path, etc. b = redir_arg_type_str(consts.REDIR_ARG_TYPES[id_]).replace( '.', '_e::') out(' case %s: return %s;' % (a, b)) out("""\ } } """) out("""\ types_asdl::bool_arg_type_t BoolArgType(id_kind_asdl::Id_t id) { // relies on "switch lowering" switch (id) { """) for id_ in sorted(BOOL_ARG_TYPES): a = Id_str(id_).replace('.', '::') # bool_arg_type_e::Str, etc. b = bool_arg_type_str(BOOL_ARG_TYPES[id_]).replace('.', '_e::') out(' case %s: return %s;' % (a, b)) out("""\ } } """) out("""\ Kind GetKind(id_kind_asdl::Id_t id) { // relies on "switch lowering" switch (id) { """) for id_ in sorted(ID_TO_KIND): a = Id_str(id_).replace('.', '::') b = Kind_str(ID_TO_KIND[id_]).replace('.', '::') out(' case %s: return %s;' % (a, b)) out("""\ } } """) b = builtin_def.BuiltinDict() GenBuiltinLookup(b, 'LookupNormalBuiltin', 'normal', f) GenBuiltinLookup(b, 'LookupAssignBuiltin', 'assign', f) GenBuiltinLookup(b, 'LookupSpecialBuiltin', 'special', f) # TODO: Fill these in out("""\ bool IsControlFlow(Str* s) { assert(0); } bool IsKeyword(Str* s) { assert(0); } """) GenCharLookup('LookupCharC', consts._ONE_CHAR_C, f, required=True) GenCharLookup('LookupCharPrompt', consts._ONE_CHAR_PROMPT, f) # OptionName() is a bit redundant with ADSL's option_str(), but we can # remove that. out("""\ Str* OptionName(option_asdl::option_t opt_num) { const char* s; switch (opt_num) { """) # These are the only ones we use set_opts = [(opt.index, opt.name) for opt in option_def.All() if opt.builtin == 'set'] for index, name in set_opts: out(' case %s:' % index) out(' s = "%s";' % name) out(' break;') out("""\ default: assert(0); } return new Str(s); // TODO-intern } """) # # Generate a tightly packed 2D array for C, from a Python dict. # edges = consts._IFS_EDGES max_state = max(edge[0] for edge in edges) max_char_kind = max(edge[1] for edge in edges) edge_array = [] for i in xrange(max_state + 1): # unused cells get -1 edge_array.append(['-1'] * (max_char_kind + 1)) for i in xrange(max_state + 1): for j in xrange(max_char_kind + 1): entry = edges.get((i, j)) if entry is not None: # pack (new_state, action) into 32 bits edge_array[i][j] = '(%d<<16)|%d' % entry parts = [] for i in xrange(max_state + 1): parts.append(' {') parts.append(', '.join('%10s' % cell for cell in edge_array[i])) parts.append(' },\n') out("""\ int _IFS_EDGE[%d][%d] = { %s }; """ % (max_state + 1, max_char_kind + 1, ''.join(parts))) out("""\ // Note: all of these are integers, e.g. state_i, emit_i, char_kind_i using runtime_asdl::state_t; using runtime_asdl::emit_t; using runtime_asdl::char_kind_t; Tuple2<state_t, emit_t> IfsEdge(state_t state, runtime_asdl::char_kind_t ch) { int cell = _IFS_EDGE[state][ch]; state_t new_state = cell >> 16; emit_t emit = cell & 0xFFFF; return Tuple2<state_t, emit_t>(new_state, emit); } """) out("""\ } // namespace consts """) elif action == 'py-consts': # It's kind of weird to use the generated code to generate more code. # Can we do this instead with the parsed module for "id" and "types.asdl"? from frontend import consts from _devbuild.gen.id_kind_asdl import Id_str, Kind_str from _devbuild.gen.types_asdl import redir_arg_type_str, bool_arg_type_str print(""" from _devbuild.gen.id_kind_asdl import Id, Kind from _devbuild.gen.types_asdl import redir_arg_type_e, bool_arg_type_e """) if 0: print('') print('REDIR_DEFAULT_FD = {') for id_ in sorted(consts.REDIR_DEFAULT_FD): v = consts.REDIR_DEFAULT_FD[id_] print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('REDIR_ARG_TYPES = {') for id_ in sorted(consts.REDIR_ARG_TYPES): v = consts.REDIR_ARG_TYPES[id_] # HACK v = redir_arg_type_str(v).replace('.', '_e.') print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('BOOL_ARG_TYPES = {') for id_ in sorted(BOOL_ARG_TYPES): v = BOOL_ARG_TYPES[id_] # HACK v = bool_arg_type_str(v).replace('.', '_e.') print(' %s: %s,' % (Id_str(id_), v)) print('}') print('') print('TEST_UNARY_LOOKUP = {') for op_str in sorted(TEST_UNARY_LOOKUP): v = Id_str(TEST_UNARY_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('TEST_BINARY_LOOKUP = {') for op_str in sorted(TEST_BINARY_LOOKUP): v = Id_str(TEST_BINARY_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('TEST_OTHER_LOOKUP = {') for op_str in sorted(TEST_OTHER_LOOKUP): v = Id_str(TEST_OTHER_LOOKUP[op_str]) print(' %r: %s,' % (op_str, v)) print('}') print('') print('ID_TO_KIND = {') for id_ in sorted(ID_TO_KIND): v = Kind_str(ID_TO_KIND[id_]) print(' %s: %s,' % (Id_str(id_), v)) print('}') else: raise RuntimeError('Invalid action %r' % action)
def PrettyId(id_): # type: (Id_t) -> str """For displaying type errors in the UI.""" # Displays 'Id.BoolUnary_v' for now return NewStr(Id_str(id_))
def _PushOilTokens(parse_ctx, gr, p, lex): # type: (ParseContext, Grammar, parse.Parser, Lexer) -> token """Push tokens onto pgen2's parser. Returns the last token so it can be reused/seen by the CommandParser. """ #log('keywords = %s', gr.keywords) #log('tokens = %s', gr.tokens) last_token = None # type: Optional[token] balance = 0 # to ignore newlines while True: if last_token: # e.g. left over from WordParser tok = last_token #log('last_token = %s', last_token) last_token = None else: tok = lex.Read(lex_mode_e.Expr) #log('tok = %s', tok) # Comments and whitespace. Newlines aren't ignored. if lookup.LookupKind(tok.id) == Kind.Ignored: continue # For var x = { # a: 1, b: 2 # } if balance > 0 and tok.id == Id.Op_Newline: #log('*** SKIPPING NEWLINE') continue balance += _OTHER_BALANCE.get(tok.id, 0) #log('BALANCE after seeing %s = %d', tok.id, balance) #if tok.id == Id.Expr_Name and tok.val in KEYWORDS: # tok.id = KEYWORDS[tok.val] # log('Replaced with %s', tok.id) assert tok.id < 256, Id_str(tok.id) ilabel = _Classify(gr, tok) #log('tok = %s, ilabel = %d', tok, ilabel) if p.addtoken(tok.id, tok, ilabel): return tok # # Mututally recursive calls into the command/word parsers. # if mylib.PYTHON: if tok.id == Id.Left_AtParen: left_tok = tok lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral) # Blame the opening token line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) words = [] while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) if 0: log('w = %s', w) if isinstance(w, word__Token): word_id = word_.CommandId(w) if word_id == Id.Right_ShArrayLiteral: break elif word_id == Id.Op_Newline: # internal newlines allowed continue else: # Token p_die('Unexpected token in array literal: %r', w.token.val, word=w) assert isinstance(w, word__Compound) # for MyPy words.append(w) words2 = braces.BraceDetectAll(words) words3 = word_.TildeDetectAll(words2) typ = Id.Expr_CastedDummy lit_part = sh_array_literal(left_tok, words3) opaque = cast(token, lit_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) tok = w.token ilabel = _Classify(gr, tok) done = p.addtoken(tok.id, tok, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DollarParen: left_token = tok lex.PushHint(Id.Op_RParen, Id.Eof_RParen) line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) c_parser = parse_ctx.MakeParserForCommandSub( line_reader, lex, Id.Eof_RParen) node = c_parser.ParseCommandSub() # A little gross: Copied from osh/word_parse.py right_token = c_parser.w_parser.cur_token cs_part = command_sub(left_token, node) cs_part.spids.append(left_token.span_id) cs_part.spids.append(right_token.span_id) typ = Id.Expr_CastedDummy opaque = cast(token, cs_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) ilabel = _Classify(gr, right_token) done = p.addtoken(right_token.id, right_token, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DoubleQuote: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) parts = [] # type: List[word_part_t] last_token = w_parser.ReadDoubleQuoted(left_token, parts) expr_dq_part = double_quoted(left_token, parts) typ = Id.Expr_CastedDummy opaque = cast(token, expr_dq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue if tok.id == Id.Left_DollarBrace: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) part, last_token = w_parser.ReadBracedBracedVarSub(left_token) # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub! typ = Id.Expr_CastedDummy opaque = cast(token, part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue # '' and c'' if tok.id in (Id.Left_SingleQuoteRaw, Id.Left_SingleQuoteC): if tok.id == Id.Left_SingleQuoteRaw: sq_mode = lex_mode_e.SQ_Raw else: sq_mode = lex_mode_e.SQ_C left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) tokens = [] # type: List[token] no_backslashes = (left_token.val == "'") last_token = w_parser.ReadSingleQuoted(sq_mode, left_token, tokens, no_backslashes) sq_part = single_quoted(left_token, tokens) typ = Id.Expr_CastedDummy opaque = cast(token, sq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue else: # We never broke out -- EOF is too soon (how can this happen???) raise parse.ParseError("incomplete input", tok.id, tok)