コード例 #1
0
def TranslateEcholexer(echo_def):
  print(r"""
static inline void MatchEchoToken(unsigned char* line, int line_len,
                                  int start_pos, int* id, int* end_pos) {
  assert(start_pos <= line_len);  /* caller should have checked */

  unsigned char* p = line + start_pos;  /* modified by re2c */

  unsigned char* YYMARKER;  /* why do we need this? */

  for (;;) {
    /*!re2c
""")

  for is_regex, pat, token_id in echo_def:
    if is_regex:
      re2c_pat = TranslateRegex(pat)
    else:
      re2c_pat = TranslateConstant(pat)
    id_name = meta.IdName(token_id)
    print('      %-30s { *id = id__%s; break; }' % (re2c_pat, id_name))

  # EARLY RETURN: Do NOT advance past the NUL terminator.
  print('      %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \
      r'"\x00"')

  print("""
    */
  }
  *end_pos = p - line;  /* relative */
}
""")
コード例 #2
0
def TranslateOshLexer(lexer_def):
  # https://stackoverflow.com/questions/12836171/difference-between-an-inline-function-and-static-inline-function
  # Has to be 'static inline' rather than 'inline', otherwise the
  # _bin/oil.ovm-dbg build fails (but the _bin/oil.ovm doesn't!).
  # Since we reference this function in exactly one translation unit --
  # fastlex.c, the difference is moot, and we just satisfy the compiler.

  print(r"""
/* Common stuff */

/*!re2c
  re2c:define:YYCTYPE = "unsigned char";
  re2c:define:YYCURSOR = p;
  re2c:yyfill:enable = 0;  // generated code doesn't ask for more input
*/

static inline void MatchOshToken(int lex_mode, unsigned char* line, int line_len,
                              int start_pos, int* id, int* end_pos) {
  assert(start_pos <= line_len);  /* caller should have checked */

  unsigned char* p = line + start_pos;  /* modified by re2c */
  //printf("p: %p q: %p\n", p, q);

  unsigned char* YYMARKER;  /* why do we need this? */
  switch (lex_mode)  {
""")

  # TODO: Should be ordered by most common?  Or will profile-directed feedback
  # help?

  for state, pat_list in lexer_def.iteritems():
    # HACK: strip off '_e'
    prefix = state.__class__.__name__[:-2]
    print('  case %s__%s:' % (prefix, state.name))
    print('    for (;;) {')
    print('      /*!re2c')

    for is_regex, pat, token_id in pat_list:
      if is_regex:
        re2c_pat = TranslateRegex(pat)
      else:
        re2c_pat = TranslateConstant(pat)
      id_name = meta.IdName(token_id)
      print('      %-30s { *id = id__%s; break; }' % (re2c_pat, id_name))

    # EARLY RETURN: Do NOT advance past the NUL terminator.
    print('      %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \
        r'"\x00"')

    print('      */')
    print('    }')
    print('    break;')
    print()

  # This is literal code without generation:
  """
  case lex_mode__OUTER:
    for (;;) {
      /*!re2c
      literal_chunk = [a-zA-Z0-9_/.-]+;
      var_like    = [a-zA-Z_][a-zA-Z0-9_]* "=";  // might be NAME=val
      comment     = [ \t\r]* "#" [^\000\r\n]*;
      space       = [ \t\r]+;
      nul = "\000";

      literal_chunk { *id = id__Lit_Chars; break; }
      var_like      { *id = id__Lit_VarLike; break; }

      [ \t\r]* "\n" { *id = id__Op_Newline; break; }
      space         { *id = id__WS_Space; break; }

      nul           { *id = id__Eof_Real; break; }

      // anything else
      *             { *id = id__Lit_Other; break; }

      */
    }

    *end_pos = p - line;
    break;

  case lex_mode__COMMENT:
    *id = id__Lit_Other;
    *end_pos = 6;
    break;
  """

  print("""\
  default:
    assert(0);

  }
  *end_pos = p - line;  /* relative */
}
""")