def TranslateEcholexer(echo_def): print(r""" static inline void MatchEchoToken(unsigned char* line, int line_len, int start_pos, int* id, int* end_pos) { assert(start_pos <= line_len); /* caller should have checked */ unsigned char* p = line + start_pos; /* modified by re2c */ unsigned char* YYMARKER; /* why do we need this? */ for (;;) { /*!re2c """) for is_regex, pat, token_id in echo_def: if is_regex: re2c_pat = TranslateRegex(pat) else: re2c_pat = TranslateConstant(pat) id_name = meta.IdName(token_id) print(' %-30s { *id = id__%s; break; }' % (re2c_pat, id_name)) # EARLY RETURN: Do NOT advance past the NUL terminator. print(' %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \ r'"\x00"') print(""" */ } *end_pos = p - line; /* relative */ } """)
def TranslateOshLexer(lexer_def): # https://stackoverflow.com/questions/12836171/difference-between-an-inline-function-and-static-inline-function # Has to be 'static inline' rather than 'inline', otherwise the # _bin/oil.ovm-dbg build fails (but the _bin/oil.ovm doesn't!). # Since we reference this function in exactly one translation unit -- # fastlex.c, the difference is moot, and we just satisfy the compiler. print(r""" /* Common stuff */ /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; re2c:yyfill:enable = 0; // generated code doesn't ask for more input */ static inline void MatchOshToken(int lex_mode, unsigned char* line, int line_len, int start_pos, int* id, int* end_pos) { assert(start_pos <= line_len); /* caller should have checked */ unsigned char* p = line + start_pos; /* modified by re2c */ //printf("p: %p q: %p\n", p, q); unsigned char* YYMARKER; /* why do we need this? */ switch (lex_mode) { """) # TODO: Should be ordered by most common? Or will profile-directed feedback # help? for state, pat_list in lexer_def.iteritems(): # HACK: strip off '_e' prefix = state.__class__.__name__[:-2] print(' case %s__%s:' % (prefix, state.name)) print(' for (;;) {') print(' /*!re2c') for is_regex, pat, token_id in pat_list: if is_regex: re2c_pat = TranslateRegex(pat) else: re2c_pat = TranslateConstant(pat) id_name = meta.IdName(token_id) print(' %-30s { *id = id__%s; break; }' % (re2c_pat, id_name)) # EARLY RETURN: Do NOT advance past the NUL terminator. print(' %-30s { *id = id__Eol_Tok; *end_pos = start_pos; return; }' % \ r'"\x00"') print(' */') print(' }') print(' break;') print() # This is literal code without generation: """ case lex_mode__OUTER: for (;;) { /*!re2c literal_chunk = [a-zA-Z0-9_/.-]+; var_like = [a-zA-Z_][a-zA-Z0-9_]* "="; // might be NAME=val comment = [ \t\r]* "#" [^\000\r\n]*; space = [ \t\r]+; nul = "\000"; literal_chunk { *id = id__Lit_Chars; break; } var_like { *id = id__Lit_VarLike; break; } [ \t\r]* "\n" { *id = id__Op_Newline; break; } space { *id = id__WS_Space; break; } nul { *id = id__Eof_Real; break; } // anything else * { *id = id__Lit_Other; break; } */ } *end_pos = p - line; break; case lex_mode__COMMENT: *id = id__Lit_Other; *end_pos = 6; break; """ print("""\ default: assert(0); } *end_pos = p - line; /* relative */ } """)