Python Parserの例、pgen2.parse.Parser Pythonの例

コード例 #1

0

ファイルを表示

def main(argv):
    grammar_path = 'tools/find/find.pgen2'
    tok_def = TokenDef()
    with open(grammar_path) as f:
        gr = pgen.MakeGrammar(f, tok_def=tok_def)

    p = parse.Parser(gr, convert=NoSingletonAction)
    tokens = Tokens(argv[1:])

    #print(list(tokens))
    start_symbol = 'start'
    pnode = driver.PushTokens(p, tokens, gr, start_symbol, opmap=OPMAP)

    names = {}
    for k, v in gr.number2symbol.items():
        # eval_input == 256.  Remove?
        assert k >= 256, (k, v)
        assert k not in names, k
        names[k] = v
    # TODO: These overlap
    for k, v in token.tok_name.items():
        if k != token.NT_OFFSET:
            assert k not in names, k
            names[k] = v
    for name, num in OPMAP.items():
        assert num not in names, num
        names[num] = name

    #print(pnode)
    printer = ParseTreePrinter(names)
    printer.Print(pnode)

    # TODO: Translate pnode into a tree like this.
    left = expr.True_()
    right = expr.PathTest(False, '*.py')
    ast_node = expr.Binary(op_e.And, left, right)
    ast_node.PrettyPrint()
    print()

コード例 #2

0

ファイルを表示

 def __init__(self, gr):
   # type: (Grammar) -> None
   self.gr = gr
   # Reused multiple times.
   self.push_parser = parse.Parser(gr, convert=NoSingletonAction)

コード例 #3

0

ファイルを表示

ファイル: expr_parse.py プロジェクト: roryokane/oil

 def __init__(self, parse_ctx, gr):
   # type: (ParseContext, Grammar) -> None
   self.parse_ctx = parse_ctx
   self.gr = gr
   # Reused multiple times.
   self.push_parser = parse.Parser(gr)

コード例 #4

0

ファイルを表示

def OpyCommandMain(argv):
  """Dispatch to the right action."""

  # TODO: Use core/arg_def.
  #opts, argv = Options().parse_args(argv)

  try:
    action = argv[0]
  except IndexError:
    raise error.Usage('opy: Missing required subcommand.')

  argv = argv[1:]  # TODO: Should I do input.ReadRequiredArg()?
                   # That will shift the input.

  if action in (
      'parse', 'parse-with', 'compile', 'dis', 'ast', 'symbols', 'cfg',
      'compile-ovm', 'eval', 'repl', 'run', 'run-ovm'):
    loader = pyutil.GetResourceLoader()
    f = loader.open(GRAMMAR_REL_PATH)
    contents = f.read()
    f.close()
    gr = grammar.Grammar()
    gr.loads(contents)

    # In Python 2 code, always use from __future__ import print_function.
    try:
      del gr.keywords["print"]
    except KeyError:
      pass

    symbols = Symbols(gr)
    pytree.Init(symbols)  # for type_repr() pretty printing
    transformer.Init(symbols)  # for _names and other dicts

    compiler = skeleton.Compiler(gr)
  else:
    # e.g. pgen2 doesn't use any of these.  Maybe we should make a different
    # tool.
    compiler = None

  # TODO: Also have a run_spec for 'opyc run'.
  compile_spec = arg_def.OilFlags('opy')
  compile_spec.Flag('-emit-docstring', args.Bool, default=True,
                    help='Whether to emit docstrings')
  compile_spec.Flag('-fast-ops', args.Bool, default=True,
                    help='Whether to emit LOAD_FAST, STORE_FAST, etc.')
  compile_spec.Flag('-oil-subset', args.Bool, default=False,
                    help='Only allow the constructs necessary to implement'
                    'Oil. Example: using multiple inheritance will abort '
                    'compilation.')

  #
  # Actions
  #

  if action == 'pgen2':
    grammar_path = argv[0]
    marshal_path = argv[1]
    WriteGrammar(grammar_path, marshal_path)

  elif action == 'stdlib-parse':
    # This is what the compiler/ package was written against.
    import parser

    py_path = argv[1]
    with open(py_path) as f:
      st = parser.suite(f.read())

    tree = st.totuple()

    printer = TupleTreePrinter(HostStdlibNames())
    printer.Print(tree)
    n = CountTupleTree(tree)
    log('COUNT %d', n)

  elif action == 'lex':
    py_path = argv[0]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      for typ, val, start, end, unused_line in tokens:
        print('%10s %10s %-10s %r' % (start, end, token.tok_name[typ], val))

  elif action == 'lex-names':  # Print all the NAME tokens.
    for py_path in argv:
      log('Lexing %s', py_path)
      with open(py_path) as f:
        tokens = tokenize.generate_tokens(f.readline)
        for typ, val, start, end, unused_line in tokens:
          if typ == token.NAME:
            print(val)

  elif action == 'parse':
    py_path = argv[0]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      p = parse.Parser(gr)
      pnode  = driver.PushTokens(p, tokens, gr, 'file_input')

    printer = ParseTreePrinter(transformer._names)  # print raw nodes
    printer.Print(pnode)

  # Parse with an arbitrary grammar, but the Python lexer.
  elif action == 'parse-with':
    grammar_path = argv[0]
    start_symbol = argv[1]
    code_str = argv[2]

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f)

    f = cStringIO.StringIO(code_str)
    tokens = tokenize.generate_tokens(f.readline)
    p = parse.Parser(gr)  # no convert=
    try:
      pnode = driver.PushTokens(p, tokens, gr, start_symbol)
    except parse.ParseError as e:
      # Extract location information and show it.
      _, _, (lineno, offset) = e.opaque
      # extra line needed for '\n' ?
      lines = code_str.splitlines() + ['']

      line = lines[lineno-1]
      log('  %s', line)
      log('  %s^', ' '*offset)
      log('Parse Error: %s', e)
      return 1
    printer = ParseTreePrinter(transformer._names)  # print raw nodes
    printer.Print(pnode)

  elif action == 'ast':  # output AST
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='ast')

  elif action == 'symbols':  # output symbols
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='symbols')

  elif action == 'cfg':  # output Control Flow Graph
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='cfg')

  elif action == 'compile':  # 'opyc compile' is pgen2 + compiler2
    # spec.Arg('action', ['foo', 'bar'])
    # But that leads to some duplication.

    opt, i = compile_spec.ParseArgv(argv)

    py_path = argv[i]
    out_path = argv[i+1]

    with open(py_path) as f:
      co = compiler.Compile(f, opt, 'exec')

    log("Compiled to %d bytes of top-level bytecode", len(co.co_code))

    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      h = misc.getPycHeader(py_path)
      out_f.write(h)
      marshal.dump(co, out_f)

  elif action == 'compile-ovm':
    # NOTE: obsolete
    from ovm2 import oheap2
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    out_path = argv[i+1]

    # Compile to Python bytecode (TODO: remove ovm_codegen.py)
    mode = 'exec'
    with open(py_path) as f:
      co = compiler.Compile(f, opt, mode)

    if 1:
      with open(out_path, 'wb') as out_f:
        oheap2.Write(co, out_f)
      return 0

    log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      if 1:
        out_f.write(co.co_code)
      else:
        h = misc.getPycHeader(py_path)
        out_f.write(h)
        marshal.dump(co, out_f)
    log('Wrote only the bytecode to %r', out_path)

  elif action == 'eval':  # Like compile, but parses to a code object and prints it
    opt, i = compile_spec.ParseArgv(argv)
    py_expr = argv[i]
    f = skeleton.StringInput(py_expr, '<eval input>')
    co = compiler.Compile(f, opt, 'eval')

    v = dis_tool.Visitor()
    v.show_code(co)
    print()
    print('RESULT:')
    print(eval(co))

  elif action == 'repl':  # Like eval in a loop
    while True:
      py_expr = raw_input('opy> ')
      f = skeleton.StringInput(py_expr, '<REPL input>')

      # TODO: change this to 'single input'?  Why doesn't this work?
      co = compiler.Compile(f, opt, 'eval')

      v = dis_tool.Visitor()
      v.show_code(co)
      print(eval(co))

  elif action == 'dis-tables':
    out_dir = argv[0]
    pyc_paths = argv[1:]

    out = TableOutput(out_dir)

    for pyc_path in pyc_paths:
      with open(pyc_path) as f:
        magic, unixtime, timestamp, code = dis_tool.unpack_pyc(f)
        WriteDisTables(pyc_path, code, out)

    out.Close()

  elif action == 'dis':
    opt, i = compile_spec.ParseArgv(argv)
    path = argv[i]
    v = dis_tool.Visitor()

    if path.endswith('.py'):
      with open(path) as f:
        co = compiler.Compile(f, opt, 'exec')

      log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
      v.show_code(co)

    else:  # assume pyc_path
      with open(path, 'rb') as f:
        v.Visit(f)

  elif action == 'dis-md5':
    pyc_paths = argv
    if not pyc_paths:
      raise error.Usage('dis-md5: At least one .pyc path is required.')

    for path in pyc_paths:
      h = hashlib.md5()
      with open(path) as f:
        magic = f.read(4)
        h.update(magic)
        ignored_timestamp = f.read(4)
        while True:
          b = f.read(64 * 1024)
          if not b:
            break
          h.update(b)
      print('%6d %s %s' % (os.path.getsize(path), h.hexdigest(), path))

  elif action == 'run':  # Compile and run, without writing pyc file
    # TODO: Add an option like -v in __main__

    #level = logging.DEBUG if args.verbose else logging.WARNING
    #logging.basicConfig(level=level)
    #logging.basicConfig(level=logging.DEBUG)

    opt, i = compile_spec.ParseArgv(argv)

    py_path = argv[i]
    opy_argv = argv[i:]

    if py_path.endswith('.py'):
      with open(py_path) as f:
        co = compiler.Compile(f, opt, 'exec')
      num_ticks = execfile.run_code_object(co, opy_argv)

    elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
      with open(py_path) as f:
        f.seek(8)  # past header.  TODO: validate it!
        co = marshal.load(f)
      num_ticks = execfile.run_code_object(co, opy_argv)

    else:
      raise error.Usage('Invalid path %r' % py_path)

  elif action == 'run-ovm':  # Compile and run, without writing pyc file
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    opy_argv = argv[i+1:]

    if py_path.endswith('.py'):
      #mode = 'exec'
      mode = 'ovm'  # OVM bytecode is different!
      with open(py_path) as f:
        co = compiler.Compile(f, opt, mode)
      log('Compiled to %d bytes of OVM code', len(co.co_code))
      num_ticks = ovm.run_code_object(co, opy_argv)

    elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
      with open(py_path) as f:
        f.seek(8)  # past header.  TODO: validate it!
        co = marshal.load(f)
      num_ticks = ovm.run_code_object(co, opy_argv)

    else:
      raise error.Usage('Invalid path %r' % py_path)

  else:
    raise error.Usage('Invalid action %r' % action)

コード例 #5

0

ファイルを表示

def Compile(f, opt, gr, mode, print_action=None):
  """Run the full compiler pipeline.

  Args:
    f: file handle with input source code
    opt: Parsed command line flags
    gr: Grammar
    start_symbol: name of the grammar start symbol
    mode: 'exec', 'eval', or 'single', like Python's builtin compile()
    print_action: 'ast' or 'cfg'.  Print an intermediate representation.
    opt: Command line flags
  """
  filename = f.name

  tokens = tokenize.generate_tokens(f.readline)

  p = parse.Parser(gr)
  if mode == 'single':
    start_symbol = 'single_input'
  elif mode == 'exec':
    start_symbol = 'file_input'
  elif mode == 'eval':
    start_symbol = 'eval_input'

  parse_tree = driver.PushTokens(p, tokens, gr, start_symbol)

  parse_tuples = _ParseTreeToTuples(parse_tree)

  tr = transformer.Transformer()
  as_tree = tr.transform(parse_tuples)

  if print_action == 'ast':
      print(as_tree)
      return

  # NOTE: This currently does nothing!
  v = syntax.SyntaxErrorChecker()
  v.Dispatch(as_tree)

  s = symbols.SymbolVisitor()
  s.Dispatch(as_tree)

  if print_action == 'symbols':
      _PrintScopes(s.scopes)
      return

  graph = pyassem.FlowGraph()  # Mutated by code generator

  if mode == "single":  # Not used now?
      ctx = _ModuleContext(filename, opt, s.scopes)
      # NOTE: the name of the Frame is a comment, not exposed to users.
      frame = pyassem.Frame("<interactive>", filename)  # mutated
      gen = pycodegen.InteractiveCodeGenerator(ctx, frame, graph)
      gen.set_lineno(as_tree)

  elif mode == "exec":
      # TODO: Does this need to be made more efficient?
      p1 = future.FutureParser()
      p2 = future.BadFutureParser()
      p1.Dispatch(as_tree)
      p2.Dispatch(as_tree)

      ctx = _ModuleContext(filename, opt, s.scopes, futures=p1.get_features())
      frame = pyassem.Frame("<module>", filename)  # mutated

      gen = pycodegen.TopLevelCodeGenerator(ctx, frame, graph)

  elif mode == "eval":
      ctx = _ModuleContext(filename, opt, s.scopes)
      frame = pyassem.Frame("<expression>", filename)  # mutated
      gen = pycodegen.TopLevelCodeGenerator(ctx, frame, graph)

  else:
      raise AssertionError('Invalid mode %r' % mode)

  # NOTE: There is no Start() or FindLocals() at the top level.
  gen.Dispatch(as_tree)  # mutates graph
  gen.Finish()

  if print_action == 'cfg':
      print(graph)
      return

  co = pyassem.MakeCodeObject(frame, graph, opt)

  # NOTE: Could call marshal.dump here?
  return co