Beispiel #1
0
def WriteGrammar(grammar_path, marshal_path):
  """Used for py27.grammar.
  
  oil_lang/grammar.pgen2 uses oil_lang/grammar_gen.py
  """
  with open(grammar_path) as f:
    gr = pgen.MakeGrammar(f)

  with open(marshal_path, 'wb') as out_f:
    gr.dump(out_f)

  log('Compiled %s -> grammar tables in %s', grammar_path, marshal_path)
Beispiel #2
0
def main(argv):
    grammar_path = 'tools/find/find.pgen2'
    tok_def = TokenDef()
    with open(grammar_path) as f:
        gr = pgen.MakeGrammar(f, tok_def=tok_def)

    p = parse.Parser(gr, convert=NoSingletonAction)
    tokens = Tokens(argv[1:])

    #print(list(tokens))
    start_symbol = 'start'
    pnode = driver.PushTokens(p, tokens, gr, start_symbol, opmap=OPMAP)

    names = {}
    for k, v in gr.number2symbol.items():
        # eval_input == 256.  Remove?
        assert k >= 256, (k, v)
        assert k not in names, k
        names[k] = v
    # TODO: These overlap
    for k, v in token.tok_name.items():
        if k != token.NT_OFFSET:
            assert k not in names, k
            names[k] = v
    for name, num in OPMAP.items():
        assert num not in names, num
        names[num] = name

    #print(pnode)
    printer = ParseTreePrinter(names)
    printer.Print(pnode)

    # TODO: Translate pnode into a tree like this.
    left = expr.True_()
    right = expr.PathTest(False, '*.py')
    ast_node = expr.Binary(op_e.And, left, right)
    ast_node.PrettyPrint()
    print()
Beispiel #3
0
def main(argv):
  action = argv[1]
  argv = argv[2:]

  # Used at grammar BUILD time.
  OPS = {
      '.': Id.Expr_Dot,
      '->': Id.Expr_RArrow,
      '::': Id.Expr_DColon,

      '@': Id.Expr_At,
      '...': Id.Expr_Ellipsis,

      '$': Id.Expr_Dollar,  # Only for legacy eggex /d+$/
  }

  # Note: We have two lists of ops because Id.Op_Semi is used, not
  # Id.Arith_Semi.
  for _, token_str, id_ in lex.EXPR_OPS:
    assert token_str not in OPS, token_str
    OPS[token_str] = id_

  # Tokens that look like / or ${ or @{
  triples = (
      meta.ID_SPEC.LexerPairs(Kind.Arith) +
      lex.OIL_LEFT_SUBS +
      lex.OIL_LEFT_UNQUOTED +
      lex.EXPR_WORDS
  )
  more_ops = {}
  for _, token_str, id_ in triples:
    assert token_str not in more_ops, token_str
    more_ops[token_str] = id_

  # Tokens that look like 'for'
  keyword_ops = {}
  for _, token_str, id_ in lex.EXPR_WORDS:  # for, in, etc.
    assert token_str not in keyword_ops, token_str
    keyword_ops[token_str] = id_

  if 0:
    from pprint import pprint
    pprint(OPS)
    print('---')
    pprint(more_ops)
    print('---')
    pprint(keyword_ops)
    print('---')

  tok_def = OilTokenDef(OPS, more_ops, keyword_ops)

  if action == 'marshal':  # generate the grammar and parse it
    grammar_path = argv[0]
    out_dir = argv[1]

    basename, _ = os.path.splitext(os.path.basename(grammar_path))

    # HACK for find:
    if basename == 'find':
      from tools.find import tokenizer as find_tokenizer
      tok_def = find_tokenizer.TokenDef()

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f, tok_def=tok_def)

    marshal_path = os.path.join(out_dir, basename + '.marshal')
    with open(marshal_path, 'wb') as out_f:
      gr.dump(out_f)

    nonterm_path = os.path.join(out_dir, basename + '_nt.py')
    with open(nonterm_path, 'w') as out_f:
      gr.dump_nonterminals(out_f)

    log('Compiled %s -> %s and %s', grammar_path, marshal_path, nonterm_path)
    #gr.report()

  elif action == 'parse':  # generate the grammar and parse it
    # Remove build dependency
    from frontend import parse_lib
    from oil_lang import expr_parse

    grammar_path = argv[0]
    start_symbol = argv[1]
    code_str = argv[2]

    # For choosing lexer and semantic actions
    grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f, tok_def=tok_def)

    arena = alloc.Arena()
    lex_ = MakeOilLexer(code_str, arena)

    is_expr = grammar_name in ('calc', 'grammar')

    parse_opts = parse_lib.OilParseOptions()
    parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
    p = expr_parse.ExprParser(parse_ctx, gr)
    try:
      pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
    except parse.ParseError as e:
      log('Parse Error: %s', e)
      return 1

    names = parse_lib.MakeGrammarNames(gr)
    p_printer = expr_parse.ParseTreePrinter(names)  # print raw nodes
    p_printer.Print(pnode)

    if is_expr:
      from oil_lang import expr_to_ast
      tr = expr_to_ast.Transformer(gr)
      if start_symbol == 'eval_input':
        ast_node = tr.Expr(pnode)
      else:
        ast_node = tr.VarDecl(pnode)
      ast_node.PrettyPrint()
      print()

  elif action == 'stdlib-test':
    # This shows how deep Python's parse tree is.  It doesn't use semantic
    # actions to prune on the fly!

    import parser  # builtin module
    t = parser.expr('1+2')
    print(t)
    t2 = parser.st2tuple(t)
    print(t2)

  else:
    raise RuntimeError('Invalid action %r' % action)
Beispiel #4
0
def main(argv):
  action = argv[1]
  argv = argv[2:]

  # Common initialization
  arith_ops = {}
  for _, token_str, id_ in meta.ID_SPEC.LexerPairs(Kind.Arith):
    arith_ops[token_str] = id_

  if 0:
    from pprint import pprint
    pprint(arith_ops)

  tok_def = OilTokenDef(arith_ops)

  if action == 'marshal':  # generate the grammar and parse it
    grammar_path = argv[0]
    out_dir = argv[1]

    basename, _ = os.path.splitext(os.path.basename(grammar_path))

    # HACK for find:
    if basename == 'find':
      from tools.find import tokenizer as find_tokenizer
      tok_def = find_tokenizer.TokenDef()

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f, tok_def=tok_def)

    marshal_path = os.path.join(out_dir, basename + '.marshal')
    with open(marshal_path, 'wb') as out_f:
      gr.dump(out_f)

    nonterm_path = os.path.join(out_dir, basename + '_nt.py')
    with open(nonterm_path, 'w') as out_f:
      gr.dump_nonterminals(out_f)

    log('Compiled %s -> %s and %s', grammar_path, marshal_path, nonterm_path)
    #gr.report()

  elif action == 'parse':  # generate the grammar and parse it
    # Remove build dependency
    from frontend import parse_lib
    from oil_lang import expr_parse

    grammar_path = argv[0]
    start_symbol = argv[1]
    code_str = argv[2]

    # For choosing lexer and semantic actions
    grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f, tok_def=tok_def)

    arena = alloc.Arena()
    lex = MakeOilLexer(code_str, arena)

    is_expr = grammar_name in ('calc', 'grammar')

    parse_opts = parse_lib.OilParseOptions()
    parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
    p = expr_parse.ExprParser(parse_ctx, gr)
    try:
      pnode, _ = p.Parse(lex, gr.symbol2number[start_symbol])
    except parse.ParseError as e:
      log('Parse Error: %s', e)
      return 1

    names = parse_lib.MakeGrammarNames(gr)
    p_printer = expr_parse.ParseTreePrinter(names)  # print raw nodes
    p_printer.Print(pnode)

    if is_expr:
      from oil_lang import expr_to_ast
      tr = expr_to_ast.Transformer(gr)
      if start_symbol == 'eval_input':
        ast_node = tr.Expr(pnode)
      else:
        ast_node = tr.OilAssign(pnode)
      ast_node.PrettyPrint()
      print()

  elif action == 'stdlib-test':
    # This shows how deep Python's parse tree is.  It doesn't use semantic
    # actions to prune on the fly!

    import parser  # builtin module
    t = parser.expr('1+2')
    print(t)
    t2 = parser.st2tuple(t)
    print(t2)

  else:
    raise RuntimeError('Invalid action %r' % action)
Beispiel #5
0
def OpyCommandMain(argv):
  """Dispatch to the right action."""

  # TODO: Use core/arg_def.
  #opts, argv = Options().parse_args(argv)

  try:
    action = argv[0]
  except IndexError:
    raise error.Usage('opy: Missing required subcommand.')

  argv = argv[1:]  # TODO: Should I do input.ReadRequiredArg()?
                   # That will shift the input.

  if action in (
      'parse', 'parse-with', 'compile', 'dis', 'ast', 'symbols', 'cfg',
      'compile-ovm', 'eval', 'repl', 'run', 'run-ovm'):
    loader = pyutil.GetResourceLoader()
    f = loader.open(GRAMMAR_REL_PATH)
    contents = f.read()
    f.close()
    gr = grammar.Grammar()
    gr.loads(contents)

    # In Python 2 code, always use from __future__ import print_function.
    try:
      del gr.keywords["print"]
    except KeyError:
      pass

    symbols = Symbols(gr)
    pytree.Init(symbols)  # for type_repr() pretty printing
    transformer.Init(symbols)  # for _names and other dicts

    compiler = skeleton.Compiler(gr)
  else:
    # e.g. pgen2 doesn't use any of these.  Maybe we should make a different
    # tool.
    compiler = None

  # TODO: Also have a run_spec for 'opyc run'.
  compile_spec = arg_def.OilFlags('opy')
  compile_spec.Flag('-emit-docstring', args.Bool, default=True,
                    help='Whether to emit docstrings')
  compile_spec.Flag('-fast-ops', args.Bool, default=True,
                    help='Whether to emit LOAD_FAST, STORE_FAST, etc.')
  compile_spec.Flag('-oil-subset', args.Bool, default=False,
                    help='Only allow the constructs necessary to implement'
                    'Oil. Example: using multiple inheritance will abort '
                    'compilation.')

  #
  # Actions
  #

  if action == 'pgen2':
    grammar_path = argv[0]
    marshal_path = argv[1]
    WriteGrammar(grammar_path, marshal_path)

  elif action == 'stdlib-parse':
    # This is what the compiler/ package was written against.
    import parser

    py_path = argv[1]
    with open(py_path) as f:
      st = parser.suite(f.read())

    tree = st.totuple()

    printer = TupleTreePrinter(HostStdlibNames())
    printer.Print(tree)
    n = CountTupleTree(tree)
    log('COUNT %d', n)

  elif action == 'lex':
    py_path = argv[0]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      for typ, val, start, end, unused_line in tokens:
        print('%10s %10s %-10s %r' % (start, end, token.tok_name[typ], val))

  elif action == 'lex-names':  # Print all the NAME tokens.
    for py_path in argv:
      log('Lexing %s', py_path)
      with open(py_path) as f:
        tokens = tokenize.generate_tokens(f.readline)
        for typ, val, start, end, unused_line in tokens:
          if typ == token.NAME:
            print(val)

  elif action == 'parse':
    py_path = argv[0]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      p = parse.Parser(gr)
      pnode  = driver.PushTokens(p, tokens, gr, 'file_input')

    printer = ParseTreePrinter(transformer._names)  # print raw nodes
    printer.Print(pnode)

  # Parse with an arbitrary grammar, but the Python lexer.
  elif action == 'parse-with':
    grammar_path = argv[0]
    start_symbol = argv[1]
    code_str = argv[2]

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f)

    f = cStringIO.StringIO(code_str)
    tokens = tokenize.generate_tokens(f.readline)
    p = parse.Parser(gr)  # no convert=
    try:
      pnode = driver.PushTokens(p, tokens, gr, start_symbol)
    except parse.ParseError as e:
      # Extract location information and show it.
      _, _, (lineno, offset) = e.opaque
      # extra line needed for '\n' ?
      lines = code_str.splitlines() + ['']

      line = lines[lineno-1]
      log('  %s', line)
      log('  %s^', ' '*offset)
      log('Parse Error: %s', e)
      return 1
    printer = ParseTreePrinter(transformer._names)  # print raw nodes
    printer.Print(pnode)

  elif action == 'ast':  # output AST
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='ast')

  elif action == 'symbols':  # output symbols
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='symbols')

  elif action == 'cfg':  # output Control Flow Graph
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='cfg')

  elif action == 'compile':  # 'opyc compile' is pgen2 + compiler2
    # spec.Arg('action', ['foo', 'bar'])
    # But that leads to some duplication.

    opt, i = compile_spec.ParseArgv(argv)

    py_path = argv[i]
    out_path = argv[i+1]

    with open(py_path) as f:
      co = compiler.Compile(f, opt, 'exec')

    log("Compiled to %d bytes of top-level bytecode", len(co.co_code))

    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      h = misc.getPycHeader(py_path)
      out_f.write(h)
      marshal.dump(co, out_f)

  elif action == 'compile-ovm':
    # NOTE: obsolete
    from ovm2 import oheap2
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    out_path = argv[i+1]

    # Compile to Python bytecode (TODO: remove ovm_codegen.py)
    mode = 'exec'
    with open(py_path) as f:
      co = compiler.Compile(f, opt, mode)

    if 1:
      with open(out_path, 'wb') as out_f:
        oheap2.Write(co, out_f)
      return 0

    log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      if 1:
        out_f.write(co.co_code)
      else:
        h = misc.getPycHeader(py_path)
        out_f.write(h)
        marshal.dump(co, out_f)
    log('Wrote only the bytecode to %r', out_path)

  elif action == 'eval':  # Like compile, but parses to a code object and prints it
    opt, i = compile_spec.ParseArgv(argv)
    py_expr = argv[i]
    f = skeleton.StringInput(py_expr, '<eval input>')
    co = compiler.Compile(f, opt, 'eval')

    v = dis_tool.Visitor()
    v.show_code(co)
    print()
    print('RESULT:')
    print(eval(co))

  elif action == 'repl':  # Like eval in a loop
    while True:
      py_expr = raw_input('opy> ')
      f = skeleton.StringInput(py_expr, '<REPL input>')

      # TODO: change this to 'single input'?  Why doesn't this work?
      co = compiler.Compile(f, opt, 'eval')

      v = dis_tool.Visitor()
      v.show_code(co)
      print(eval(co))

  elif action == 'dis-tables':
    out_dir = argv[0]
    pyc_paths = argv[1:]

    out = TableOutput(out_dir)

    for pyc_path in pyc_paths:
      with open(pyc_path) as f:
        magic, unixtime, timestamp, code = dis_tool.unpack_pyc(f)
        WriteDisTables(pyc_path, code, out)

    out.Close()

  elif action == 'dis':
    opt, i = compile_spec.ParseArgv(argv)
    path = argv[i]
    v = dis_tool.Visitor()

    if path.endswith('.py'):
      with open(path) as f:
        co = compiler.Compile(f, opt, 'exec')

      log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
      v.show_code(co)

    else:  # assume pyc_path
      with open(path, 'rb') as f:
        v.Visit(f)

  elif action == 'dis-md5':
    pyc_paths = argv
    if not pyc_paths:
      raise error.Usage('dis-md5: At least one .pyc path is required.')

    for path in pyc_paths:
      h = hashlib.md5()
      with open(path) as f:
        magic = f.read(4)
        h.update(magic)
        ignored_timestamp = f.read(4)
        while True:
          b = f.read(64 * 1024)
          if not b:
            break
          h.update(b)
      print('%6d %s %s' % (os.path.getsize(path), h.hexdigest(), path))

  elif action == 'run':  # Compile and run, without writing pyc file
    # TODO: Add an option like -v in __main__

    #level = logging.DEBUG if args.verbose else logging.WARNING
    #logging.basicConfig(level=level)
    #logging.basicConfig(level=logging.DEBUG)

    opt, i = compile_spec.ParseArgv(argv)

    py_path = argv[i]
    opy_argv = argv[i:]

    if py_path.endswith('.py'):
      with open(py_path) as f:
        co = compiler.Compile(f, opt, 'exec')
      num_ticks = execfile.run_code_object(co, opy_argv)

    elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
      with open(py_path) as f:
        f.seek(8)  # past header.  TODO: validate it!
        co = marshal.load(f)
      num_ticks = execfile.run_code_object(co, opy_argv)

    else:
      raise error.Usage('Invalid path %r' % py_path)

  elif action == 'run-ovm':  # Compile and run, without writing pyc file
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    opy_argv = argv[i+1:]

    if py_path.endswith('.py'):
      #mode = 'exec'
      mode = 'ovm'  # OVM bytecode is different!
      with open(py_path) as f:
        co = compiler.Compile(f, opt, mode)
      log('Compiled to %d bytes of OVM code', len(co.co_code))
      num_ticks = ovm.run_code_object(co, opy_argv)

    elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
      with open(py_path) as f:
        f.seek(8)  # past header.  TODO: validate it!
        co = marshal.load(f)
      num_ticks = ovm.run_code_object(co, opy_argv)

    else:
      raise error.Usage('Invalid path %r' % py_path)

  else:
    raise error.Usage('Invalid action %r' % action)