def def_fsm_char_pattern(cls, ch, regex, *args): """Register new FSM character pattern within tokenizer. :Parameters: ch : str the FSM character regex regular expression matching all characters from the class, it may be either string describing regular expression or a regular expression object compiled with ``re.compile()``. See documentation of `ezmlex.patterns.def_pattern()` for details. :Note: FSM character patterns must not overlap. If one character class within tokenizer matches given item, no other pattern in the same tokenizer may match this item. :Note: If it doesn't exist, this method will create the dictionary ``_fsm_char_patterns`` within ``cls`` subclass (as a class attribute). :Note: This method may throw exceptions in case the `ch` was already registered. For exception specification, see documentation of `ezmlex.patterns.def_pattern()`. """ from ezmlex.patterns import def_pattern, ccat if ch == '\0': raise RuntimeError('FSM char %s is reserved' % ch) def_pattern(cls.fsm_char_patterns(), ch, ccat('^', regex, '$'), *args)
def def_token_type(cls, _id, pattern, *args): from ezmlex.tokens import def_token from ezmlex.patterns import ccat def_token(cls.token_types(), _id, ccat(pattern), *args)