コード例 #1
0
ファイル: parse_lib.py プロジェクト: dotmpe/oil
    def __init__(self, arena, parse_opts, aliases, oil_grammar):
        # type: (Arena, optview.Parse, Dict[str, str], Grammar) -> None
        self.arena = arena
        self.parse_opts = parse_opts
        self.aliases = aliases

        self.e_parser = expr_parse.ExprParser(self, oil_grammar)
        # NOTE: The transformer is really a pure function.
        if oil_grammar:
            self.tr = expr_to_ast.Transformer(oil_grammar)
            if mylib.PYTHON:
                names = MakeGrammarNames(oil_grammar)
        else:  # hack for unit tests, which pass None
            self.tr = None
            if mylib.PYTHON:  # TODO: Simplify
                names = {}

        if mylib.PYTHON:
            self.p_printer = expr_parse.ParseTreePrinter(
                names)  # print raw nodes

        self.parsing_expr = False  # "single-threaded" state

        # Completion state lives here since it may span multiple parsers.
        self.trail = _NullTrail()  # type: _BaseTrail
        self.one_pass_parse = False
コード例 #2
0
ファイル: parse_lib.py プロジェクト: roryokane/oil
    def __init__(self,
                 arena,
                 parse_opts,
                 aliases,
                 oil_grammar,
                 trail=None,
                 one_pass_parse=False):
        # type: (Arena, OilParseOptions, Dict[str, Any], Grammar, Optional[_BaseTrail], bool) -> None
        self.arena = arena
        self.parse_opts = parse_opts
        self.aliases = aliases

        self.e_parser = expr_parse.ExprParser(self, oil_grammar)
        # NOTE: The transformer is really a pure function.
        if oil_grammar:
            self.tr = expr_to_ast.Transformer(oil_grammar)
            names = MakeGrammarNames(oil_grammar)
        else:  # hack for unit tests, which pass None
            self.tr = None
            names = {}

        self.parsing_expr = False  # "single-threaded" state

        # Completion state lives here since it may span multiple parsers.
        self.trail = trail or _NullTrail()
        self.one_pass_parse = one_pass_parse

        self.p_printer = expr_parse.ParseTreePrinter(names)  # print raw nodes
コード例 #3
0
def ParseDemo(oil_grammar):
    # type: (Grammar) -> None

    arena = alloc.Arena()
    arena.PushSource(source__Stdin(''))

    parse_ctx = None  # type: ParseContext
    e_parser = expr_parse.ExprParser(parse_ctx, oil_grammar)

    line_lexer = lexer.LineLexer('', arena)
    line_reader = reader.StringLineReader('1 + 2*3', arena)
    lex = lexer.Lexer(line_lexer, line_reader)

    try:
        pnode, _ = e_parser.Parse(lex, arith_nt.arith_expr)
    except error.Parse as e:
        #ui.PrettyPrintError(e, arena)
        log("Parse Error (TODO: print it)")
        return

    # TODO: Fill this in.  Oil uses parse_lib.MakeGrammarNames()
    #
    # terminals: _Id_str?  Doesn't work in mycpp
    # nonterminals: gr.number2symbol.  Is this ever used at runtime?
    #
    # Dict[int,str] should really be a List[str] then?

    if 0:
        names = {}  # type: Dict[int, str]
        printer = expr_parse.ParseTreePrinter(names)
        printer.Print(pnode)
        # NOTE: Could also transform

    # This only works for Oil
    if 0:
        tr = expr_to_ast.Transformer(oil_grammar)
        node = tr.Expr(pnode)

        assert node is not None

        tree = node.AbbreviatedTree()
        fmt.PrintTree(tree, mylib.Stdout())
コード例 #4
0
ファイル: pgen2_demo.py プロジェクト: tekknolagi/oil
def ParseDemo(oil_grammar):
    # type: (Grammar) -> None

    arena = alloc.Arena()
    arena.PushSource(source__Stdin(''))

    parse_ctx = None  # type: ParseContext
    e_parser = expr_parse.ExprParser(parse_ctx, oil_grammar)

    line_lexer = lexer.LineLexer('', arena)
    line_reader = reader.StringLineReader('1 + 2*3', arena)
    lex = lexer.Lexer(line_lexer, line_reader)

    try:
        pnode, _ = e_parser.Parse(lex, arith_nt.arith_expr)
    except util.ParseError as e:
        #ui.PrettyPrintError(e, arena)
        print(e)
        return

    # TODO: Fill this in.  Oil uses parse_lib.MakeGrammarNames()
    names = {}  # type: Dict[int, str]
    printer = expr_parse.ParseTreePrinter(names)
    printer.Print(pnode)
コード例 #5
0
ファイル: grammar_gen.py プロジェクト: tekknolagi/oil
def main(argv):
  action = argv[1]
  argv = argv[2:]

  # Used at grammar BUILD time.
  OPS = {
      '.': Id.Expr_Dot,
      '->': Id.Expr_RArrow,
      '::': Id.Expr_DColon,

      '@': Id.Expr_At,
      '...': Id.Expr_Ellipsis,

      '$': Id.Expr_Dollar,  # Only for legacy eggex /d+$/
  }

  # Note: We have two lists of ops because Id.Op_Semi is used, not
  # Id.Arith_Semi.
  for _, token_str, id_ in lex.EXPR_OPS:
    assert token_str not in OPS, token_str
    OPS[token_str] = id_

  # Tokens that look like / or ${ or @{
  triples = (
      meta.ID_SPEC.LexerPairs(Kind.Arith) +
      lex.OIL_LEFT_SUBS +
      lex.OIL_LEFT_UNQUOTED +
      lex.EXPR_WORDS
  )
  more_ops = {}
  for _, token_str, id_ in triples:
    assert token_str not in more_ops, token_str
    more_ops[token_str] = id_

  # Tokens that look like 'for'
  keyword_ops = {}
  for _, token_str, id_ in lex.EXPR_WORDS:  # for, in, etc.
    assert token_str not in keyword_ops, token_str
    keyword_ops[token_str] = id_

  if 0:
    from pprint import pprint
    pprint(OPS)
    print('---')
    pprint(more_ops)
    print('---')
    pprint(keyword_ops)
    print('---')

  tok_def = OilTokenDef(OPS, more_ops, keyword_ops)

  if action == 'marshal':  # generate the grammar and parse it
    grammar_path = argv[0]
    out_dir = argv[1]

    basename, _ = os.path.splitext(os.path.basename(grammar_path))

    # HACK for find:
    if basename == 'find':
      from tools.find import tokenizer as find_tokenizer
      tok_def = find_tokenizer.TokenDef()

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f, tok_def=tok_def)

    marshal_path = os.path.join(out_dir, basename + '.marshal')
    with open(marshal_path, 'wb') as out_f:
      gr.dump(out_f)

    nonterm_path = os.path.join(out_dir, basename + '_nt.py')
    with open(nonterm_path, 'w') as out_f:
      gr.dump_nonterminals(out_f)

    log('Compiled %s -> %s and %s', grammar_path, marshal_path, nonterm_path)
    #gr.report()

  elif action == 'parse':  # generate the grammar and parse it
    # Remove build dependency
    from frontend import parse_lib
    from oil_lang import expr_parse

    grammar_path = argv[0]
    start_symbol = argv[1]
    code_str = argv[2]

    # For choosing lexer and semantic actions
    grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f, tok_def=tok_def)

    arena = alloc.Arena()
    lex_ = MakeOilLexer(code_str, arena)

    is_expr = grammar_name in ('calc', 'grammar')

    parse_opts = parse_lib.OilParseOptions()
    parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
    p = expr_parse.ExprParser(parse_ctx, gr)
    try:
      pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
    except parse.ParseError as e:
      log('Parse Error: %s', e)
      return 1

    names = parse_lib.MakeGrammarNames(gr)
    p_printer = expr_parse.ParseTreePrinter(names)  # print raw nodes
    p_printer.Print(pnode)

    if is_expr:
      from oil_lang import expr_to_ast
      tr = expr_to_ast.Transformer(gr)
      if start_symbol == 'eval_input':
        ast_node = tr.Expr(pnode)
      else:
        ast_node = tr.VarDecl(pnode)
      ast_node.PrettyPrint()
      print()

  elif action == 'stdlib-test':
    # This shows how deep Python's parse tree is.  It doesn't use semantic
    # actions to prune on the fly!

    import parser  # builtin module
    t = parser.expr('1+2')
    print(t)
    t2 = parser.st2tuple(t)
    print(t2)

  else:
    raise RuntimeError('Invalid action %r' % action)
コード例 #6
0
def main(argv):
  action = argv[1]
  argv = argv[2:]

  # Common initialization
  arith_ops = {}
  for _, token_str, id_ in meta.ID_SPEC.LexerPairs(Kind.Arith):
    arith_ops[token_str] = id_

  if 0:
    from pprint import pprint
    pprint(arith_ops)

  tok_def = OilTokenDef(arith_ops)

  if action == 'marshal':  # generate the grammar and parse it
    grammar_path = argv[0]
    out_dir = argv[1]

    basename, _ = os.path.splitext(os.path.basename(grammar_path))

    # HACK for find:
    if basename == 'find':
      from tools.find import tokenizer as find_tokenizer
      tok_def = find_tokenizer.TokenDef()

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f, tok_def=tok_def)

    marshal_path = os.path.join(out_dir, basename + '.marshal')
    with open(marshal_path, 'wb') as out_f:
      gr.dump(out_f)

    nonterm_path = os.path.join(out_dir, basename + '_nt.py')
    with open(nonterm_path, 'w') as out_f:
      gr.dump_nonterminals(out_f)

    log('Compiled %s -> %s and %s', grammar_path, marshal_path, nonterm_path)
    #gr.report()

  elif action == 'parse':  # generate the grammar and parse it
    # Remove build dependency
    from frontend import parse_lib
    from oil_lang import expr_parse

    grammar_path = argv[0]
    start_symbol = argv[1]
    code_str = argv[2]

    # For choosing lexer and semantic actions
    grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f, tok_def=tok_def)

    arena = alloc.Arena()
    lex = MakeOilLexer(code_str, arena)

    is_expr = grammar_name in ('calc', 'grammar')

    parse_opts = parse_lib.OilParseOptions()
    parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
    p = expr_parse.ExprParser(parse_ctx, gr)
    try:
      pnode, _ = p.Parse(lex, gr.symbol2number[start_symbol])
    except parse.ParseError as e:
      log('Parse Error: %s', e)
      return 1

    names = parse_lib.MakeGrammarNames(gr)
    p_printer = expr_parse.ParseTreePrinter(names)  # print raw nodes
    p_printer.Print(pnode)

    if is_expr:
      from oil_lang import expr_to_ast
      tr = expr_to_ast.Transformer(gr)
      if start_symbol == 'eval_input':
        ast_node = tr.Expr(pnode)
      else:
        ast_node = tr.OilAssign(pnode)
      ast_node.PrettyPrint()
      print()

  elif action == 'stdlib-test':
    # This shows how deep Python's parse tree is.  It doesn't use semantic
    # actions to prune on the fly!

    import parser  # builtin module
    t = parser.expr('1+2')
    print(t)
    t2 = parser.st2tuple(t)
    print(t2)

  else:
    raise RuntimeError('Invalid action %r' % action)
コード例 #7
0
ファイル: opy_main.py プロジェクト: mrshu/oil
def OpyCommandMain(argv):
  """Dispatch to the right action."""

  # TODO: Use core/args.
  #opts, argv = Options().parse_args(argv)

  try:
    action = argv[0]
  except IndexError:
    raise args.UsageError('opy: Missing required subcommand.')

  argv = argv[1:]  # TODO: Should I do input.ReadRequiredArg()?
                   # That will shift the input.

  if action in (
      'parse', 'parse-with', 'compile', 'dis', 'ast', 'symbols', 'cfg',
      'compile-ovm', 'eval', 'repl', 'run', 'run-ovm'):
    loader = pyutil.GetResourceLoader()
    f = loader.open(GRAMMAR_REL_PATH)
    contents = f.read()
    f.close()
    gr = grammar.Grammar()
    gr.loads(contents)

    # In Python 2 code, always use from __future__ import print_function.
    try:
      del gr.keywords["print"]
    except KeyError:
      pass

    symbols = Symbols(gr)
    pytree.Init(symbols)  # for type_repr() pretty printing
    transformer.Init(symbols)  # for _names and other dicts

    compiler = skeleton.Compiler(gr)
  else:
    # e.g. pgen2 doesn't use any of these.  Maybe we should make a different
    # tool.
    compiler = None

  # TODO: Also have a run_spec for 'opyc run'.
  compile_spec = args.OilFlags()
  compile_spec.Flag('-emit-docstring', args.Bool, default=True,
                    help='Whether to emit docstrings')
  compile_spec.Flag('-fast-ops', args.Bool, default=True,
                    help='Whether to emit LOAD_FAST, STORE_FAST, etc.')
  compile_spec.Flag('-oil-subset', args.Bool, default=False,
                    help='Only allow the constructs necessary to implement'
                    'Oil. Example: using multiple inheritance will abort '
                    'compilation.')

  #
  # Actions
  #

  if action == 'pgen2':
    grammar_path = argv[0]
    marshal_path = argv[1]
    WriteGrammar(grammar_path, marshal_path)

  elif action == 'stdlib-parse':
    # This is what the compiler/ package was written against.
    import parser

    py_path = argv[1]
    with open(py_path) as f:
      st = parser.suite(f.read())

    tree = st.totuple()

    printer = TupleTreePrinter(HostStdlibNames())
    printer.Print(tree)
    n = CountTupleTree(tree)
    log('COUNT %d', n)

  elif action == 'lex':
    py_path = argv[0]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      for typ, val, start, end, unused_line in tokens:
        print('%10s %10s %-10s %r' % (start, end, token.tok_name[typ], val))

  elif action == 'lex-names':  # Print all the NAME tokens.
    for py_path in argv:
      log('Lexing %s', py_path)
      with open(py_path) as f:
        tokens = tokenize.generate_tokens(f.readline)
        for typ, val, start, end, unused_line in tokens:
          if typ == token.NAME:
            print(val)

  elif action == 'parse':
    py_path = argv[0]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      p = parse.Parser(gr, convert=skeleton.py2st)
      parse_tree = driver.PushTokens(p, tokens, gr, 'file_input')

    if isinstance(parse_tree, tuple):
      n = CountTupleTree(parse_tree)
      log('COUNT %d', n)

      printer = TupleTreePrinter(transformer._names)
      printer.Print(parse_tree)
    else:
      tree.PrettyPrint(sys.stdout)
      log('\tChildren: %d' % len(tree.children), file=sys.stderr)

  # Parse with an arbitrary grammar, but the Python lexer.
  elif action == 'parse-with':
    grammar_path = argv[0]
    start_symbol = argv[1]
    code_str = argv[2]

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f)

    f = cStringIO.StringIO(code_str)
    tokens = tokenize.generate_tokens(f.readline)
    p = parse.Parser(gr)  # no convert=
    try:
      pnode = driver.PushTokens(p, tokens, gr, start_symbol)
    except parse.ParseError as e:
      # Extract location information and show it.
      _, _, (lineno, offset) = e.opaque
      # extra line needed for '\n' ?
      lines = code_str.splitlines() + ['']

      line = lines[lineno-1]
      log('  %s', line)
      log('  %s^', ' '*offset)
      log('Parse Error: %s', e)
      return 1
    printer = expr_parse.ParseTreePrinter(transformer._names)  # print raw nodes
    printer.Print(pnode)

  elif action == 'ast':  # output AST
    opt, i = compile_spec.Parse(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='ast')

  elif action == 'symbols':  # output symbols
    opt, i = compile_spec.Parse(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='symbols')

  elif action == 'cfg':  # output Control Flow Graph
    opt, i = compile_spec.Parse(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='cfg')

  elif action == 'compile':  # 'opyc compile' is pgen2 + compiler2
    # spec.Arg('action', ['foo', 'bar'])
    # But that leads to some duplication.

    opt, i = compile_spec.Parse(argv)

    py_path = argv[i]
    out_path = argv[i+1]

    with open(py_path) as f:
      co = compiler.Compile(f, opt, 'exec')

    log("Compiled to %d bytes of top-level bytecode", len(co.co_code))

    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      h = misc.getPycHeader(py_path)
      out_f.write(h)
      marshal.dump(co, out_f)

  elif action == 'compile-ovm':
    opt, i = compile_spec.Parse(argv)
    py_path = argv[i]
    out_path = argv[i+1]

    # Compile to Python bytecode (TODO: remove ovm_codegen.py)
    mode = 'exec'
    with open(py_path) as f:
      co = compiler.Compile(f, opt, mode)

    if 1:
      with open(out_path, 'wb') as out_f:
        oheap2.Write(co, out_f)
      return 0

    log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      if 1:
        out_f.write(co.co_code)
      else:
        h = misc.getPycHeader(py_path)
        out_f.write(h)
        marshal.dump(co, out_f)
    log('Wrote only the bytecode to %r', out_path)

  elif action == 'eval':  # Like compile, but parses to a code object and prints it
    opt, i = compile_spec.Parse(argv)
    py_expr = argv[i]
    f = skeleton.StringInput(py_expr, '<eval input>')
    co = compiler.Compile(f, opt, 'eval')

    v = dis_tool.Visitor()
    v.show_code(co)
    print()
    print('RESULT:')
    print(eval(co))

  elif action == 'repl':  # Like eval in a loop
    while True:
      py_expr = raw_input('opy> ')
      f = skeleton.StringInput(py_expr, '<REPL input>')

      # TODO: change this to 'single input'?  Why doesn't this work?
      co = compiler.Compile(f, opt, 'eval')

      v = dis_tool.Visitor()
      v.show_code(co)
      print(eval(co))

  elif action == 'dis-tables':
    out_dir = argv[0]
    pyc_paths = argv[1:]

    out = TableOutput(out_dir)

    for pyc_path in pyc_paths:
      with open(pyc_path) as f:
        magic, unixtime, timestamp, code = dis_tool.unpack_pyc(f)
        WriteDisTables(pyc_path, code, out)

    out.Close()

  elif action == 'dis':
    opt, i = compile_spec.Parse(argv)
    path = argv[i]
    v = dis_tool.Visitor()

    if path.endswith('.py'):
      with open(path) as f:
        co = compiler.Compile(f, opt, 'exec')

      log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
      v.show_code(co)

    else:  # assume pyc_path
      with open(path, 'rb') as f:
        v.Visit(f)

  elif action == 'dis-md5':
    pyc_paths = argv
    if not pyc_paths:
      raise args.UsageError('dis-md5: At least one .pyc path is required.')

    for path in pyc_paths:
      h = hashlib.md5()
      with open(path) as f:
        magic = f.read(4)
        h.update(magic)
        ignored_timestamp = f.read(4)
        while True:
          b = f.read(64 * 1024)
          if not b:
            break
          h.update(b)
      print('%6d %s %s' % (os.path.getsize(path), h.hexdigest(), path))

  elif action == 'run':  # Compile and run, without writing pyc file
    # TODO: Add an option like -v in __main__

    #level = logging.DEBUG if args.verbose else logging.WARNING
    #logging.basicConfig(level=level)
    #logging.basicConfig(level=logging.DEBUG)

    opt, i = compile_spec.Parse(argv)

    py_path = argv[i]
    opy_argv = argv[i:]

    if py_path.endswith('.py'):
      with open(py_path) as f:
        co = compiler.Compile(f, opt, 'exec')
      num_ticks = execfile.run_code_object(co, opy_argv)

    elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
      with open(py_path) as f:
        f.seek(8)  # past header.  TODO: validate it!
        co = marshal.load(f)
      num_ticks = execfile.run_code_object(co, opy_argv)

    else:
      raise args.UsageError('Invalid path %r' % py_path)

  elif action == 'run-ovm':  # Compile and run, without writing pyc file
    opt, i = compile_spec.Parse(argv)
    py_path = argv[i]
    opy_argv = argv[i+1:]

    if py_path.endswith('.py'):
      #mode = 'exec'
      mode = 'ovm'  # OVM bytecode is different!
      with open(py_path) as f:
        co = compiler.Compile(f, opt, mode)
      log('Compiled to %d bytes of OVM code', len(co.co_code))
      num_ticks = ovm.run_code_object(co, opy_argv)

    elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
      with open(py_path) as f:
        f.seek(8)  # past header.  TODO: validate it!
        co = marshal.load(f)
      num_ticks = ovm.run_code_object(co, opy_argv)

    else:
      raise args.UsageError('Invalid path %r' % py_path)

  else:
    raise args.UsageError('Invalid action %r' % action)