def __frame(FunctionName, IteratorName, CodeTxt, DoorIdReturn, DoorIdBeyond): txt = [ \ "#ifdef __QUEX_OPTION_COUNTER\n" \ + "static void\n" \ + "%s(QUEX_TYPE_ANALYZER* me, QUEX_TYPE_LEXATOM* LexemeBegin, QUEX_TYPE_LEXATOM* LexemeEnd)\n" \ % FunctionName \ + "{\n" \ + "# define self (*me)\n" \ + "/* 'QUEX_GOTO_STATE' requires 'QUEX_LABEL_STATE_ROUTER' */\n" + "# define QUEX_LABEL_STATE_ROUTER %s\n" % dial_db.get_label_by_door_id(DoorID.global_state_router()) ] # Following function refers to the global 'variable_db' txt.append(Lng.VARIABLE_DEFINITIONS(variable_db)) txt.append( " (void)me;\n" " __QUEX_IF_COUNT_SHIFT_VALUES();\n" " /* Allow LexemeBegin == LexemeEnd (e.g. END_OF_STREAM)\n" " * => Caller does not need to check\n" " * BUT, if so quit immediately after 'shift values'. */\n" " __quex_assert(LexemeBegin <= LexemeEnd);\n" " if(LexemeBegin == LexemeEnd) return;\n" " %s = LexemeBegin;\n" % IteratorName ) txt.extend(CodeTxt) door_id_failure = DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE) door_id_bad_lexatom = DoorID.incidence(E_IncidenceIDs.BAD_LEXATOM) txt.append( "%s /* TERMINAL: BAD_LEXATOM */\n;\n" % Lng.LABEL(door_id_bad_lexatom) + "%s /* TERMINAL: FAILURE */\n%s\n" % (Lng.LABEL(door_id_failure), Lng.GOTO(DoorIdBeyond)) ) txt.append( "%s:\n" % dial_db.get_label_by_door_id(DoorIdReturn) \ + " /* Assert: lexeme in codec's character boundaries. */\n" \ + " __quex_assert(%s == LexemeEnd);\n" % IteratorName \ + " return;\n" \ + "".join(generator.do_state_router()) \ + "# undef self\n" \ + "# undef QUEX_LABEL_STATE_ROUTER\n" # If there is no MATCH_FAILURE, then DoorIdBeyond is still referenced as 'gotoed', # but MATCH_FAILURE is never implemented, later on, because its DoorId is not # referenced. + "# if ! defined(QUEX_OPTION_COMPUTED_GOTOS)\n" + " %s /* in QUEX_GOTO_STATE */\n" % Lng.GOTO(DoorID.global_state_router()) + " %s /* to BAD_LEXATOM */\n" % Lng.GOTO(DoorID.incidence(E_IncidenceIDs.BAD_LEXATOM)) + "# endif\n" + " /* Avoid compiler warning: Unused label for 'TERMINAL <BEYOND>' */\n" \ + " %s\n" % Lng.GOTO(DoorIdBeyond) \ + " %s\n" % Lng.GOTO(door_id_failure) \ + " (void)target_state_index;\n" + " (void)target_state_else_index;\n" + "}\n" \ + "#endif /* __QUEX_OPTION_COUNTER */\n" ) return "".join(Lng.GET_PLAIN_STRINGS(txt))
def get_transition_function(iid_map, Codec): if Codec == "UTF8": Setup.buffer_codec_prepare("utf8", Module=utf8_state_split) else: Setup.buffer_codec_prepare("unicode") cssm = CharacterSetStateMachine(iid_map, MaintainLexemeF=False) analyzer = analyzer_generator.do(cssm.sm, engine.CHARACTER_COUNTER) tm_txt = do_analyzer(analyzer) tm_txt = Lng.GET_PLAIN_STRINGS(tm_txt) tm_txt.append("\n") label = dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE)) for character_set, iid in iid_map: tm_txt.append("%s return (int)%s;\n" % (Lng.LABEL(DoorID.incidence(iid)), iid)) tm_txt.append("%s return (int)-1;\n" % Lng.LABEL(DoorID.drop_out(-1))) return "".join(tm_txt)
def PPT_range_skipper(NestedF, MHI, i, data, ModeName, OptionsDb, CounterDb, IncidenceDb): """Generate a PPT for a range skipper. """ # -- door_id_after: Where to go after the closing character sequence matched: # + Normally: To the begin of the analyzer. Start again. # + End(Sequence) == newline of indentation counter. # => goto indentation counter. if _match_indentation_counter_newline_pattern(OptionsDb.value("indentation"), data["closer_sequence"]): door_id_after = DoorID.incidence(E_IncidenceIDs.INDENTATION_HANDLER) else: door_id_after = DoorID.continue_without_on_after_match() if NestedF: name = "SKIP NESTED RANGE" code_generator_func = skip_nested_range.do else: name = "SKIP RANGE" code_generator_func = skip_range.do # -- data for code generation my_data = deepcopy(data) my_data["mode_name"] = ModeName my_data["on_skip_range_open"] = IncidenceDb[E_IncidenceIDs.SKIP_RANGE_OPEN] my_data["door_id_after"] = door_id_after my_data["counter_db"] = CounterDb # -- terminal and code generator priority = PatternPriority(MHI, i) pattern = deepcopy(my_data["opener_pattern"]) pattern.set_incidence_id(dial_db.new_incidence_id()) pattern.set_pattern_string("<skip_range>") code = CodeGenerated(code_generator_func, my_data, name) return PPT(priority, pattern, code)
def _add_newline(psml, SmNewlineOriginal): """Add a pair (newline state machine, terminal on newline) to 'psml'. When a newline occurs, the column count can be set to 1 and the line number is incremented. Then the indentation counting restarts. """ assert SmNewlineOriginal is not None # Disconnect from machines being used elsewhere. SmNewline = SmNewlineOriginal.clone() SmNewline.set_id(dial_db.new_incidence_id()) # The SmNewline has been used before in the main state machine with a # different incidence id. It is essential to clone! cl = [ Op.LineCountAdd(1), Op.AssignConstant(E_R.Column, 1), Op.GotoDoorId(DoorID.incidence(E_IncidenceIDs.INDENTATION_HANDLER)) ] terminal = Terminal(CodeTerminal(Lng.COMMAND_LIST(cl)), "<INDENTATION NEWLINE>") terminal.set_incidence_id(SmNewline.get_id()) psml.append((SmNewline, terminal))
def _add_comment(psml, SmCommentOriginal, CounterDb): """On matching the comment state machine goto a terminal that does the following: """ if SmCommentOriginal is None: return comment_skip_iid = dial_db.new_incidence_id() # Disconnect from machines being used elsewhere. SmComment = SmCommentOriginal.clone() SmComment.set_id(comment_skip_iid) if SmComment.last_character_set().contains_only(ord('\n')): code = Lng.COMMAND_LIST([ LineCountAdd(1), Op.AssignConstant(E_R.Column, 1), ]) else: count_info = CountInfo.from_StateMachine(SmComment, CounterDb, CodecTrafoInfo=Setup.buffer_codec) code = [ Lng.COMMAND(Op.Assign(E_R.ReferenceP, E_R.LexemeStartP)), CounterDb.do_CountInfo(count_info), Lng.COMMAND(Op.Assign(E_R.LexemeStartP, E_R.ReferenceP)) ] code.append(Lng.GOTO(DoorID.incidence(E_IncidenceIDs.INDENTATION_HANDLER))) terminal = Terminal(CodeTerminal(code), "INDENTATION COMMENT") terminal.set_incidence_id(comment_skip_iid) psml.append((SmComment, terminal))
def _add_suppressed_newline(psml, SmSuppressedNewlineOriginal): """Add a pair (suppressed newline, terminal on suppressed newline to 'psml'. A suppresed newline is not like a newline--the next line is considered as being appended to the current line. Nevertheless the line number needs to incremented, just the column number is not reset to 1. Then, it continues with indentation counting. """ if SmSuppressedNewlineOriginal is None: return # Disconnect from machines being used elsewhere. SmSuppressedNewline = SmSuppressedNewlineOriginal.clone() SmSuppressedNewline.set_id(dial_db.new_incidence_id()) # The parser MUST ensure that if there is a newline suppressor, there MUST # be a newline being defined. cl = [ Op.LineCountAdd(1), Op.AssignConstant(E_R.Column, 1), Op.GotoDoorId(DoorID.incidence(E_IncidenceIDs.INDENTATION_HANDLER)), ] terminal = Terminal(CodeTerminal(Lng.COMMAND_LIST(cl)), "<INDENTATION SUPPRESSED NEWLINE>") terminal.set_incidence_id(SmSuppressedNewline.get_id()) psml.append((SmSuppressedNewline, terminal))
def TERMINAL_CODE(self, TerminalStateList, TheAnalyzer): text = [ cpp._terminal_state_prolog ] terminal_door_id_list = [] for terminal in sorted(TerminalStateList, key=lambda x: x.incidence_id()): door_id = DoorID.incidence(terminal.incidence_id()) terminal_door_id_list.append(door_id) t_txt = ["%s\n __quex_debug(\"* TERMINAL %s\\n\");\n" % \ (self.LABEL(door_id), terminal.name())] code = terminal.code(TheAnalyzer) assert none_isinstance(code, list) t_txt.extend(code) t_txt.append("\n") text.extend(t_txt) #text.append( # "if(0) {\n" # " /* Avoid unreferenced labels. */\n" #) #text.extend( # " %s\n" % self.GOTO(door_id) # for door_id in terminal_door_id_list #) #text.append("}\n") return text
def _code_terminal_on_bad_indentation_character(code, ISetup, ModeName, incidence_db, BadIndentationIid): if ISetup.bad_character_set.get() is None: return on_bad_indentation_txt = Lng.SOURCE_REFERENCED(incidence_db[E_IncidenceIDs.INDENTATION_BAD]) code.extend([ "%s\n" % Lng.LABEL(DoorID.incidence(BadIndentationIid)), "#define BadCharacter (me->buffer._input_p[-1])\n", "%s\n" % on_bad_indentation_txt, "#undef BadCharacter\n", "%s\n" % Lng.GOTO(DoorID.global_reentry()) ])
def get_transition_function(iid_map, Codec): if Codec == "UTF8": Setup.buffer_codec_set(bc_factory.do("utf8"), 1) else: Setup.buffer_codec_set(bc_factory.do("unicode"), -1) sm = StateMachine.from_IncidenceIdMap(iid_map) dummy, sm = Setup.buffer_codec.do_state_machine(sm, beautifier) analyzer = analyzer_generator.do(sm, engine.CHARACTER_COUNTER) tm_txt = do_analyzer(analyzer) tm_txt = Lng.GET_PLAIN_STRINGS(tm_txt) tm_txt.append("\n") #label = dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE)) for character_set, iid in iid_map: tm_txt.append("%s return (int)%s;\n" % (Lng.LABEL(DoorID.incidence(iid)), iid)) tm_txt.append("%s return (int)-1;\n" % Lng.LABEL(DoorID.drop_out(-1))) return "".join(tm_txt)
def do_state_router(): routed_address_set = dial_db.routed_address_set() # If there is only one address subject to state routing, then the # state router needs to be implemented. #if len(routed_address_set) == 0: # return [] # Add the address of 'terminal_end_of_file()' if it is not there, already. # (It should not be there, if we are working on a fixed chunk, as in 'counting'. # When counting is webbed into analysis:: assert address_eof in routed_address_set) if False: address_eof = dial_db.get_address_by_door_id(DoorID.incidence(E_IncidenceIDs.END_OF_STREAM)) routed_address_set.add(address_eof) dial_db.mark_label_as_gotoed(dial_db.get_label_by_address(address_eof)) routed_state_info_list = state_router_generator.get_info(routed_address_set) return state_router_generator.do(routed_state_info_list)
def get_main_function(tm0, TranstionTxt, Codec): def indent(Txt, N): return (" " * N) + (Txt.replace("\n", "\n" + (" " * N))) if Codec == "UTF8": qtc_str = "uint8_t" else: qtc_str = "uint32_t" input_preperation = get_read_preparation(codec) entry_list = [ (0 if interval.begin < 0 else interval.begin, target) for interval, target in tm0 ] entry_list.append((tm0[-1][0].begin, -1)) entry_list.append((0x1FFFF, -1)) expected_array = [ " { 0x%06X, %s },\n" % (begin, target) for begin, target in entry_list ] txt = main_template.replace("$$ENTRY_LIST$$", "".join(expected_array)) txt = txt.replace("$$QUEX_TYPE_LEXATOM$$", qtc_str) txt = txt.replace("$$TRANSITION$$", indent(TranstionTxt, 4)) txt = txt.replace("$$PREPARE_INPUT$$", input_preperation) door_id = DoorID.incidence(E_IncidenceIDs.BAD_LEXATOM) txt = txt.replace("$$ON_BAD_LEXATOM$$", dial_db.get_label_by_door_id(door_id)) txt = txt.replace("MATCH_FAILURE", "((int)-1)") return txt
def _get_loop_terminal_list(LoopMap, EventHandler, IidLoopAfterAppendixDropOut, DoorIdLoop, IidLoopExit): """RETURNS: List of terminals of the loop state: (i) Counting terminals: Count and return to loop entry. (ii) Couple terminals: Count and goto appendix state machine. (iii) Exit terminal: Exit loop. The '<LOOP>' terminal serves as an address for the appendix state machines. If they fail, they can accept its incidence id and re-enter the loop from there. """ door_id_loop_exit = DoorID.incidence(IidLoopExit) # Terminal: Normal Loop Characters # (LOOP EXIT terminal is generated later, see below). result = [ EventHandler.get_loop_terminal_code(lei, DoorIdLoop, door_id_loop_exit) for lei in LoopMap if lei.incidence_id != IidLoopExit ] # Terminal: Re-enter Loop if IidLoopAfterAppendixDropOut is not None: result.append( Terminal(CodeTerminal(EventHandler.on_loop_after_appendix_drop_out(DoorIdLoop)), "<LOOP>", IidLoopAfterAppendixDropOut) ) # Terminal: Exit Loop result.append( Terminal(CodeTerminal(EventHandler.on_loop_exit_text()), "<LOOP EXIT>", IidLoopExit) ) return result
def RELOAD_PROCEDURE(self, ForwardF): assert self.__code_generation_reload_label is None if ForwardF: txt = cpp_reload_forward_str txt = txt.replace("$$ON_BAD_LEXATOM$$", dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.BAD_LEXATOM))) txt = txt.replace("$$ON_LOAD_FAILURE$$", dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.LOAD_FAILURE))) txt = txt.replace("$$ON_NO_SPACE_FOR_LOAD$$", dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.OVERFLOW))) else: txt = cpp_reload_backward_str txt = txt.replace("$$ON_BAD_LEXATOM$$", dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.BAD_LEXATOM))) txt = txt.replace("$$ON_LOAD_FAILURE$$", dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.LOAD_FAILURE))) txt = txt.replace("$$ON_NO_SPACE_FOR_LOAD$$", dial_db.get_label_by_door_id(DoorID.incidence(E_IncidenceIDs.OVERFLOW))) return txt
def _prepare_skip_character_set(ModeName, OptionsDb, CounterDb, IncidenceDb, MHI): """MHI = Mode hierarchie index.""" SkipSetupList = OptionsDb.value_sequence("skip") if SkipSetupList is None or len(SkipSetupList) == 0: return [], [] iterable = SkipSetupList.__iter__() pattern, total_set = iterable.next() pattern_str = pattern.pattern_string() source_reference = pattern.sr # Multiple skippers from different modes are combined into one pattern. # This means, that we cannot say exactly where a 'skip' was defined # if it intersects with another pattern. for ipattern, icharacter_set in iterable: total_set.unite_with(icharacter_set) pattern_str += "|" + ipattern.pattern_string() # The column/line number count actions for the characters in the # total_set may differ. Thus, derive a separate set of characters # for each same count action, i.e. # # map: count action --> subset of total_set # # When the first character is matched, then its terminal 'TERMINAL_x*' # is entered, i.e the count action for the first character is performed # before the skipping starts. This will look like this: # # TERMINAL_x0: # count action '0'; # goto __SKIP; # TERMINAL_x1: # count action '1'; # goto __SKIP; # ... # An optional codec transformation is done later. The state machines # are entered as pure Unicode state machines. # It is not necessary to store the count action along with the state # machine. This is done in "action_preparation.do()" for each # terminal. data = { "counter_db": CounterDb, "character_set": total_set, } # The terminal is not related to a pattern, because it is entered # from the sub_terminals. Each sub_terminal relates to a sub character # set. terminal_iid = E_IncidenceIDs.SKIP goto_terminal_str = Lng.GOTO(DoorID.incidence(terminal_iid)) code = CodeGeneratedBlock(terminal_iid, skip_character_set.do, data, "character set skipper") # Counting actions are added to the terminal automatically by the # terminal_factory. The only thing that remains for each sub-terminal: # 'goto skipper'. ccfactory = CountOpFactory.from_ParserDataLineColumn( CounterDb, total_set, Lng.INPUT_P()) new_ppt_list = [ PPT_character_set_skipper(MHI, character_set, incidence_id, CounterDb, goto_terminal_str, source_reference) for character_set, incidence_id in ccfactory.get_incidence_id_map() ] return [code], new_ppt_list
def __frame(FunctionName, CodeTxt, IteratorName, DoorIdReturn, dial_db): txt = [ \ "static void\n" \ + "%s(QUEX_TYPE_ANALYZER* me, QUEX_TYPE_LEXATOM* LexemeBegin, QUEX_TYPE_LEXATOM* LexemeEnd)\n" % FunctionName \ + "{\n" \ ] if IteratorName: state_router_adr = DoorID.global_state_router(dial_db).related_address state_router_label = Lng.LABEL_STR_BY_ADR(state_router_adr) txt.extend([ "# define self (*me)\n", "/* 'QUEX_GOTO_STATE' requires 'QUEX_LABEL_STATE_ROUTER' */\n", "# define QUEX_LABEL_STATE_ROUTER %s\n" % state_router_label ]) # Following function refers to the global 'variable_db' txt.append(Lng.VARIABLE_DEFINITIONS(variable_db)) txt.extend([ " (void)me;\n", Lng.COUNTER_SHIFT_VALUES(), "%s" % Lng.ML_COMMENT("Allow LexemeBegin == LexemeEnd (e.g. END_OF_STREAM)\n" "=> Caller does not need to check\n" "BUT, if so quit immediately after 'shift values'."), " __quex_assert(LexemeBegin <= LexemeEnd);\n", " %s" % Lng.IF("LexemeBegin", "==", "LexemeEnd"), " %s\n" % Lng.PURE_RETURN, " %s\n" % Lng.END_IF, " %s = LexemeBegin;\n" % IteratorName ]) txt.extend(CodeTxt) if IteratorName: door_id_failure = DoorID.incidence(E_IncidenceIDs.MATCH_FAILURE, dial_db) door_id_bad_lexatom = DoorID.incidence(E_IncidenceIDs.BAD_LEXATOM, dial_db) txt.append( "%s /* TERMINAL: BAD_LEXATOM */\n;\n" % Lng.LABEL(door_id_bad_lexatom) # BETTER: A lexeme that is 'counted' has already matched! # => FAILURE is impossible! # "%s /* TERMINAL: FAILURE */\n%s\n" % Lng.UNREACHABLE + "%s /* TERMINAL: FAILURE */\n%s\n" % (Lng.LABEL(door_id_failure), Lng.GOTO(DoorIdReturn, dial_db)) ) txt.append( "%s\n" % Lng.LABEL(DoorIdReturn) + "%s\n" % Lng.COMMENT("Assert: lexeme in codec's character boundaries.") \ + " __quex_assert(%s == LexemeEnd);\n" % IteratorName \ + " return;\n" \ + "".join(generator.do_state_router(dial_db)) \ + "%s\n" % Lng.UNDEFINE("self") + "%s\n" % Lng.UNDEFINE("QUEX_LABEL_STATE_ROUTER") # If there is no MATCH_FAILURE, then DoorIdBeyond is still referenced as 'gotoed', # but MATCH_FAILURE is never implemented, later on, because its DoorId is not # referenced. + "$$<not-computed-gotos>----------------------------------------------\n" + " %s /* in QUEX_GOTO_STATE */\n" % Lng.GOTO(DoorID.global_state_router(dial_db), dial_db) + " %s /* to BAD_LEXATOM */\n" % Lng.GOTO(DoorID.incidence(E_IncidenceIDs.BAD_LEXATOM, dial_db), dial_db) + "$$------------------------------------------------------------------\n" + " %s\n" % Lng.COMMENT("Avoid compiler warning: 'Unused labels'") \ + " %s\n" % Lng.GOTO(door_id_failure, dial_db) \ + " (void)target_state_index;\n" + " (void)target_state_else_index;\n" ) txt.append("}\n") return "".join(Lng.GET_PLAIN_STRINGS(txt, dial_db))
def prepare_for_reload(self, TheAnalyzer, BeforeReloadCmdList=None, AfterReloadCmdList=None): """Prepares state for reload. Reload procedure .- State 'X' ---. | | .-- Reloader------. | BUFFER LIMIT | .----------------. | | CODE detected ------->-------| Door from X: | | | | | Actions before | | | | | reload. | | | | '----------------' | | | | | | | | | reload buffer> | | .----------------. | | | | | Door for |---<-------(good)--* | | | RELOAD SUCCESS:| | | | | | Actions after | | (bad) | | | Reload. | '------- |--------' | '----------------' | | | .----------------. '---------------' | Door for | | RELOAD FAILURE | '----------------' (1) Create 'Door for RELOAD SUCCESS'. (2) Determine 'Door for RELOAD FAILURE'. (3) Create 'Door from X' in Reloader. (4) Adapt state X's transition map, so that: BUFFER LIMIT CODE --> reload procedure. """ assert self.transition_map is not None assert BeforeReloadCmdList is None or isinstance( BeforeReloadCmdList, CommandList) assert AfterReloadCmdList is None or isinstance( AfterReloadCmdList, CommandList) if not TheAnalyzer.engine_type.subject_to_reload(): # Engine type does not require reload => no reload. return elif self.transition_map.is_only_drop_out(): # If the state drops out anyway, then there is no need to reload. # -- The transition map is not adapted. # -- The reloader is not instrumented to reload for that state. return assert self.index in TheAnalyzer.state_db reload_state = TheAnalyzer.reload_state assert reload_state.index in (E_StateIndices.RELOAD_FORWARD, E_StateIndices.RELOAD_BACKWARD) # (1) Door for RELOAD SUCCESS # after_cl = [] if TheAnalyzer.engine_type.is_FORWARD(): after_cl.append(InputPIncrement()) else: after_cl.append(InputPDecrement()) after_cl.append(InputPDereference()) if AfterReloadCmdList is not None: after_cl.extend(AfterReloadCmdList) self.entry.enter_CommandList(self.index, reload_state.index, CommandList.from_iterable(after_cl)) self.entry.categorize(self.index) # Categorize => DoorID is available. on_success_door_id = self.entry.get_door_id(self.index, reload_state.index) # (2) Determin Door for RELOAD FAILURE # if TheAnalyzer.is_init_state_forward(self.index): on_failure_door_id = DoorID.incidence(E_IncidenceIDs.END_OF_STREAM) else: on_failure_door_id = TheAnalyzer.drop_out_DoorID(self.index) # (3) Create 'Door from X' in Reloader assert on_failure_door_id != on_success_door_id reload_door_id = reload_state.add_state(self.index, on_success_door_id, on_failure_door_id, BeforeReloadCmdList) # (4) Adapt transition map: BUFFER LIMIT CODE --> reload_door_id # self.transition_map.set_target(Setup.buffer_limit_code, reload_door_id) return
def do(Data, TheAnalyzer): """________________________________________________________________________ Counting whitespace at the beginning of a line. .-----<----+----------<--------------+--<----. | | count | | count = 0 | | whitespace | | .---------. | | | --------->| +--' | | | | | | | | | | | | .------------. | | | +----->----| suppressor |----' | | | | + newline | | | COUNTER | '------------' | | | .---------. | | +----->----| newline |---------------' | | '---------' | | .----------------. | |----->----| on_indentation |---------> RESTART '---------' else '----------------' Generate an indentation counter. An indentation counter is entered upon the detection of a newline (which is not followed by a newline suppressor). Indentation Counter: indentation = 0 column = 0 | |<------------------------. .-------------. | | INDENTATION | indentation += count | COUNTER | column += count '-------------' | | | +-------- whitspace -->---' | Re-Enter Analyzer An indentation counter is a single state that iterates to itself as long as whitespace occurs. During that iteration the column counter is adapted. There are two types of adaption: -- 'normal' adaption by a fixed delta. This adaption happens upon normal space characters. -- 'grid' adaption. When a grid character occurs, the column number snaps to a value given by a grid size parameter. When a newline occurs the indentation counter exits and restarts the lexical analysis. If the newline is not followed by a newline suppressor the analyzer will immediately be back to the indentation counter state. ___________________________________________________________________________ """ counter_db = Data["counter_db"] isetup = Data["indentation_setup"] incidence_db = Data["incidence_db"] default_ih_f = Data["default_indentation_handler_f"] mode_name = Data["mode_name"] sm_suppressed_newline = Data["sm_suppressed_newline"] sm_newline = isetup.sm_newline.get() sm_comment = isetup.sm_comment.get() assert sm_suppressed_newline is None or sm_suppressed_newline.is_DFA_compliant() assert sm_newline is None or sm_newline.is_DFA_compliant() assert sm_comment is None or sm_comment.is_DFA_compliant() # -- 'on_indentation' == 'on_beyond': # A handler is called as soon as an indentation has been detected. after_beyond = [ Op.IndentationHandlerCall(default_ih_f, mode_name), Op.GotoDoorId(DoorID.continue_without_on_after_match()) ] # -- 'on_bad_indentation' is invoked if a character appeared that has been # explicitly disallowed to be used as indentation. bad_indentation_iid = dial_db.new_incidence_id() if Setup.buffer_based_analyzis_f: reload_state = None else: reload_state = TheAnalyzer.reload_state sm_terminal_list = _get_state_machine_vs_terminal_list(sm_suppressed_newline, isetup.sm_newline.get(), isetup.sm_comment.get(), counter_db) # 'whitespace' --> normal counting # 'bad' --> goto bad character indentation handler # else --> non-whitespace detected => handle indentation ccfactory = CountOpFactory.from_ParserDataIndentation(isetup, counter_db, Lng.INPUT_P(), DoorID.incidence(bad_indentation_iid)) # (*) Generate Code code, \ door_id_beyond = loop.do(ccfactory, AfterBeyond = after_beyond, EngineType = TheAnalyzer.engine_type, ReloadStateExtern = reload_state, LexemeMaintainedF = True, ParallelSmTerminalPairList = sm_terminal_list) _code_terminal_on_bad_indentation_character(code, isetup, mode_name, incidence_db, bad_indentation_iid) return code
def _prepare_skip_character_set(ModeName, OptionsDb, CounterDb, IncidenceDb, MHI): """MHI = Mode hierarchie index.""" SkipSetupList = OptionsDb.value_sequence("skip") if SkipSetupList is None or len(SkipSetupList) == 0: return [], [] iterable = SkipSetupList.__iter__() pattern, total_set = iterable.next() pattern_str = pattern.pattern_string() source_reference = pattern.sr # Multiple skippers from different modes are combined into one pattern. # This means, that we cannot say exactly where a 'skip' was defined # if it intersects with another pattern. for ipattern, icharacter_set in iterable: total_set.unite_with(icharacter_set) pattern_str += "|" + ipattern.pattern_string() # The column/line number count actions for the characters in the # total_set may differ. Thus, derive a separate set of characters # for each same count action, i.e. # # map: count action --> subset of total_set # # When the first character is matched, then its terminal 'TERMINAL_x*' # is entered, i.e the count action for the first character is performed # before the skipping starts. This will look like this: # # TERMINAL_x0: # count action '0'; # goto __SKIP; # TERMINAL_x1: # count action '1'; # goto __SKIP; # ... # An optional codec transformation is done later. The state machines # are entered as pure Unicode state machines. # It is not necessary to store the count action along with the state # machine. This is done in "action_preparation.do()" for each # terminal. data = { "counter_db": CounterDb, "character_set": total_set, } # The terminal is not related to a pattern, because it is entered # from the sub_terminals. Each sub_terminal relates to a sub character # set. terminal_iid = E_IncidenceIDs.SKIP goto_terminal_str = Lng.GOTO(DoorID.incidence(terminal_iid)) code = CodeGeneratedBlock(terminal_iid, skip_character_set.do, data, "character set skipper") # Counting actions are added to the terminal automatically by the # terminal_factory. The only thing that remains for each sub-terminal: # 'goto skipper'. ccfactory = CountOpFactory.from_ParserDataLineColumn(CounterDb, total_set, Lng.INPUT_P()) new_ppt_list = [ PPT_character_set_skipper(MHI, character_set, incidence_id, CounterDb, goto_terminal_str, source_reference) for character_set, incidence_id in ccfactory.get_incidence_id_map() ] return [code], new_ppt_list
def position_and_goto(self, EngineType, X): # Position the input pointer and jump to terminal. return [ self.POSITIONING(X), self.GOTO(DoorID.incidence(X.acceptance_id)) ]
def do(Data, TheAnalyzer): """________________________________________________________________________ Counting whitespace at the beginning of a line. .-----<----+----------<--------------+--<----. | | count | | count = 0 | | whitespace | | .---------. | | | --------->| +--' | | | | | | | | | | | | .------------. | | | +----->----| suppressor |----' | | | | + newline | | | COUNTER | '------------' | | | .---------. | | +----->----| newline |---------------' | | '---------' | | .----------------. | |----->----| on_indentation |---------> RESTART '---------' else '----------------' Generate an indentation counter. An indentation counter is entered upon the detection of a newline (which is not followed by a newline suppressor). Indentation Counter: indentation = 0 column = 0 | |<------------------------. .-------------. | | INDENTATION | indentation += count | COUNTER | column += count '-------------' | | | +-------- whitspace -->---' | Re-Enter Analyzer An indentation counter is a single state that iterates to itself as long as whitespace occurs. During that iteration the column counter is adapted. There are two types of adaption: -- 'normal' adaption by a fixed delta. This adaption happens upon normal space characters. -- 'grid' adaption. When a grid character occurs, the column number snaps to a value given by a grid size parameter. When a newline occurs the indentation counter exits and restarts the lexical analysis. If the newline is not followed by a newline suppressor the analyzer will immediately be back to the indentation counter state. ___________________________________________________________________________ """ counter_db = Data["counter_db"] isetup = Data["indentation_setup"] incidence_db = Data["incidence_db"] default_ih_f = Data["default_indentation_handler_f"] mode_name = Data["mode_name"] sm_suppressed_newline = Data["sm_suppressed_newline"] sm_newline = isetup.sm_newline.get() sm_comment = isetup.sm_comment.get() assert sm_suppressed_newline is None or sm_suppressed_newline.is_DFA_compliant( ) assert sm_newline is None or sm_newline.is_DFA_compliant() assert sm_comment is None or sm_comment.is_DFA_compliant() # -- 'on_indentation' == 'on_beyond': # A handler is called as soon as an indentation has been detected. after_beyond = [ Op.IndentationHandlerCall(default_ih_f, mode_name), Op.GotoDoorId(DoorID.continue_without_on_after_match()) ] # -- 'on_bad_indentation' is invoked if a character appeared that has been # explicitly disallowed to be used as indentation. bad_indentation_iid = dial_db.new_incidence_id() if Setup.buffer_based_analyzis_f: reload_state = None else: reload_state = TheAnalyzer.reload_state sm_terminal_list = _get_state_machine_vs_terminal_list( sm_suppressed_newline, isetup.sm_newline.get(), isetup.sm_comment.get(), counter_db) # 'whitespace' --> normal counting # 'bad' --> goto bad character indentation handler # else --> non-whitespace detected => handle indentation ccfactory = CountOpFactory.from_ParserDataIndentation( isetup, counter_db, Lng.INPUT_P(), DoorID.incidence(bad_indentation_iid)) # (*) Generate Code code, \ door_id_beyond = loop.do(ccfactory, AfterBeyond = after_beyond, EngineType = TheAnalyzer.engine_type, ReloadStateExtern = reload_state, LexemeMaintainedF = True, ParallelSmTerminalPairList = sm_terminal_list) _code_terminal_on_bad_indentation_character(code, isetup, mode_name, incidence_db, bad_indentation_iid) return code
def prepare_for_reload(self, TheAnalyzer, BeforeReloadOpList=None, AfterReloadOpList=None): """Prepares state for reload. Reload procedure .- State 'X' ---. | | .-- Reloader------. | BUFFER LIMIT | .----------------. | | CODE detected ------->-------| Door from X: | | | | | Actions before | | | | | reload. | | | | '----------------' | | | | | | | | | reload buffer> | | .----------------. | | | | | Door for |---<-------(good)--* | | | RELOAD SUCCESS:| | | | | | Actions after | | (bad) | | | Reload. | '------- |--------' | '----------------' | | | .----------------. '---------------' | Door for | | RELOAD FAILURE | '----------------' (1) Create 'Door for RELOAD SUCCESS'. (2) Determine 'Door for RELOAD FAILURE'. (3) Create 'Door from X' in Reloader. (4) Adapt state X's transition map, so that: BUFFER LIMIT CODE --> reload procedure. """ assert self.transition_map is not None assert BeforeReloadOpList is None or isinstance(BeforeReloadOpList, OpList) assert AfterReloadOpList is None or isinstance(AfterReloadOpList, OpList) if not TheAnalyzer.engine_type.subject_to_reload(): # Engine type does not require reload => no reload. return elif self.transition_map.is_only_drop_out(): # If the state drops out anyway, then there is no need to reload. # -- The transition map is not adapted. # -- The reloader is not instrumented to reload for that state. return assert self.index in TheAnalyzer.state_db reload_state = TheAnalyzer.reload_state assert reload_state.index in (E_StateIndices.RELOAD_FORWARD, E_StateIndices.RELOAD_BACKWARD) # (1) Door for RELOAD SUCCESS # after_cl = [] if TheAnalyzer.engine_type.is_FORWARD(): pass # after_cl.append(Op.Increment(E_R.InputP)) else: pass # after_cl.append(Op.Decrement(E_R.InputP)) after_cl.append(Op.InputPDereference()) if AfterReloadOpList is not None: after_cl.extend(AfterReloadOpList) self.entry.enter_OpList(self.index, reload_state.index, OpList.from_iterable(after_cl)) self.entry.categorize(self.index) # Categorize => DoorID is available. on_success_door_id = self.entry.get_door_id(self.index, reload_state.index) # (2) Determine Door for RELOAD FAILURE # if TheAnalyzer.is_init_state_forward(self.index): on_failure_door_id = DoorID.incidence(E_IncidenceIDs.END_OF_STREAM) else: on_failure_door_id = TheAnalyzer.drop_out_DoorID(self.index) if on_failure_door_id is None: on_failure_door_id = DoorID.incidence(E_IncidenceIDs.END_OF_STREAM) # (3) Create 'Door from X' in Reloader assert on_failure_door_id != on_success_door_id reload_door_id = reload_state.add_state(self.index, on_success_door_id, on_failure_door_id, BeforeReloadOpList) # (4) Adapt transition map: BUFFER LIMIT CODE --> reload_door_id # self.transition_map.set_target(Setup.buffer_limit_code, reload_door_id) return
def do(CcFactory, AfterBeyond, LexemeEndCheckF=False, EngineType=None, ReloadStateExtern=None, LexemeMaintainedF=False, ParallelSmTerminalPairList=None): """Generates a (pseudo-one-state) state machine with the properties: Buffer Limit Code --> Reload Loop Character --> Loop Entry Else --> Exit Loop NOTE: This function does NOT code the FAILURE terminal. The caller needs to do this if required. Generate code to iterate over the input stream until -- A character occurs not in CharacterSet, or -- [optional] the 'LexemeEnd' is reached. That is, simplified: input in Set .--<--. | | LexemeEnd | +----->------> (Exit) .----. | --------->( Loop )--+----->------> Exit '----' input not in Set At the end of the iteration, the 'input_p' points to (the begin of) the first character which is not in CharacterSet (or the LexemeEnd). [i][i][i]..................[i][i][X][.... | input_p During the 'loop' possible line/column count commands may be applied. To achieve the iteration, a simplified pattern matching engine is implemented: transition map .------. | i0 |----------> Terminal0: OpList0 +------+ | i1 |----------> Terminal1: OpList1 +------+ | X2 |----------> Terminal Beyond: input_p--; goto TerminalExit; +------+ | i2 |----------> Terminal2: OpList2 +------+ """ assert EngineType is not None # NOT: assert (not EngineType.subject_to_reload()) or ReloadStateExtern is None # This would mean, that the user has to make these kinds of decisions. But, # we are easily able to ignore meaningless ReloadStateExtern objects. # (*) Construct State Machine and Terminals _______________________________ # parallel_sm_list = None if ParallelSmTerminalPairList is not None: parallel_sm_list = [ sm for sm, terminal in ParallelSmTerminalPairList ] CsSm = CharacterSetStateMachine.from_CountOpFactory(CcFactory, LexemeMaintainedF, ParallelSmList=parallel_sm_list) analyzer = analyzer_generator.do(CsSm.sm, EngineType, ReloadStateExtern, OnBeforeReload = OpList.from_iterable(CsSm.on_before_reload), OnAfterReload = OpList.from_iterable(CsSm.on_after_reload)) # -- The terminals # door_id_loop = _prepare_entry_and_reentry(analyzer, CsSm.on_begin, CsSm.on_step) def get_LexemeEndCheck_appendix(ccfactory, CC_Type): if not LexemeEndCheckF: return [ Op.GotoDoorId(door_id_loop) ] # # .---------------. ,----------. no # --->| Count Op |-------< LexemeEnd? >------> DoorIdOk # '---------------' '----+-----' # | yes # .---------------. # | Lexeme End | # | Count Op |----> DoorIdOnLexemeEnd # '---------------' # elif ccfactory.requires_reference_p() and CC_Type == E_CharacterCountType.COLUMN: return [ Op.GotoDoorIdIfInputPNotEqualPointer(door_id_loop, E_R.LexemeEnd), Op.ColumnCountReferencePDeltaAdd(E_R.InputP, ccfactory.column_count_per_chunk, False), ] + AfterBeyond else: return [ Op.GotoDoorIdIfInputPNotEqualPointer(door_id_loop, E_R.LexemeEnd), ] + AfterBeyond terminal_list = CcFactory.get_terminal_list(CsSm.on_end + AfterBeyond, CsSm.incidence_id_beyond, get_LexemeEndCheck_appendix) if ParallelSmTerminalPairList is not None: terminal_list.extend( terminal for sm, terminal in ParallelSmTerminalPairList ) # (*) Generate Code _______________________________________________________ txt = _get_source_code(CcFactory, analyzer, terminal_list) return txt, DoorID.incidence(CsSm.incidence_id_beyond)
def do(TheCountMap, OnLoopExit, LexemeEndCheckF=False, EngineType=None, ReloadStateExtern=None, LexemeMaintainedF=False, ParallelSmTerminalPairList=None): """Generates a structure that 'loops' quickly over incoming characters. Loop continues .---------( ++i )-----+--------<-------------------. at AFTER position of | .------. | | the first lexatom 'ir'. '--->| | | | | pure |-->[ Terminals A ] | | L |-->[ Terminals B ] | | |-->[ Terminals C ] | +------+ | | | ( i = ir ) | LaF |-->[ Terminals A ]-->-. | drop-out | |-->[ Terminals B ]-->. \ | | |-->[ Terminals C ]-->( ir = i )--[ StateMachine ]-->[ Terminals X ] | | \ +------+ '-->[ Terminals Y ] | Else |----> Exit '------' The terminals may contain a 'lexeme end check', that ensures that the borders of a lexeme are not exceeded. The loop therefore ends: (i) when a character appears, that is not a loop character. (ii) one of the appendix state machine exits. (iii) [Optional] if the lexeme end is reached. At the end of the iteration, the input pointer points to (the begin of) the first lexatom behind what is treated. [i][i][i]..................[i][i][X][.... | input_p During the 'loop' possible line/column count commands may be applied. """ parallel_terminal_list, \ parallel_sm_list = _extract_state_machines_and_terminals(ParallelSmTerminalPairList) iid_loop_exit = dial_db.new_incidence_id() iid_loop_after_appendix_drop_out = dial_db.new_incidence_id() assert EngineType is not None event_handler = LoopEventHandlers( TheCountMap.column_number_per_code_unit(), LexemeEndCheckF, LexemeMaintainedF, EngineType, ReloadStateExtern, UserOnLoopExit=OnLoopExit) # LoopMap: Associate characters with the reactions on their occurrence ____ # loop_map, \ appendix_sm_list = _get_loop_map(TheCountMap, parallel_sm_list, iid_loop_exit) # Loop represented by Analyzer-s and Terminal-s ___________________________ # analyzer_list, \ door_id_loop, \ appendix_sm_exist_f = _get_analyzer_list(loop_map, event_handler, appendix_sm_list, iid_loop_after_appendix_drop_out) if not appendix_sm_exist_f: iid_loop_after_appendix_drop_out = None terminal_list = _get_terminal_list(loop_map, event_handler, parallel_terminal_list, door_id_loop, iid_loop_exit, iid_loop_after_appendix_drop_out) # Generate Code ___________________________________________________________ # txt = _get_source_code(analyzer_list, terminal_list, TheCountMap.column_number_per_code_unit(), appendix_sm_exist_f) return txt, DoorID.incidence(iid_loop_exit)
def do(TheCountMap, OnLoopExit, LexemeEndCheckF=False, EngineType=None, ReloadStateExtern=None, LexemeMaintainedF=False, ParallelSmTerminalPairList=None): """Generates a structure that 'loops' quickly over incoming characters. Loop continues .---------( ++i )-----+--------<-------------------. at AFTER position of | .------. | | the first lexatom 'ir'. '--->| | | | | pure |-->[ Terminals A ] | | L |-->[ Terminals B ] | | |-->[ Terminals C ] | +------+ | | | ( i = ir ) | LaF |-->[ Terminals A ]-->-. | drop-out | |-->[ Terminals B ]-->. \ | | |-->[ Terminals C ]-->( ir = i )--[ StateMachine ]-->[ Terminals X ] | | \ +------+ '-->[ Terminals Y ] | Else |----> Exit '------' The terminals may contain a 'lexeme end check', that ensures that the borders of a lexeme are not exceeded. The loop therefore ends: (i) when a character appears, that is not a loop character. (ii) one of the appendix state machine exits. (iii) [Optional] if the lexeme end is reached. At the end of the iteration, the input pointer points to (the begin of) the first lexatom behind what is treated. [i][i][i]..................[i][i][X][.... | input_p During the 'loop' possible line/column count commands may be applied. """ parallel_terminal_list, \ parallel_sm_list = _extract_state_machines_and_terminals(ParallelSmTerminalPairList) iid_loop_exit = dial_db.new_incidence_id() iid_loop_after_appendix_drop_out = dial_db.new_incidence_id() assert EngineType is not None event_handler = LoopEventHandlers(TheCountMap.column_number_per_code_unit(), LexemeEndCheckF, LexemeMaintainedF, EngineType, ReloadStateExtern, UserOnLoopExit=OnLoopExit) # LoopMap: Associate characters with the reactions on their occurrence ____ # loop_map, \ appendix_sm_list = _get_loop_map(TheCountMap, parallel_sm_list, iid_loop_exit) # Loop represented by Analyzer-s and Terminal-s ___________________________ # analyzer_list, \ door_id_loop, \ appendix_sm_exist_f = _get_analyzer_list(loop_map, event_handler, appendix_sm_list, iid_loop_after_appendix_drop_out) if not appendix_sm_exist_f: iid_loop_after_appendix_drop_out = None terminal_list = _get_terminal_list(loop_map, event_handler, parallel_terminal_list, door_id_loop, iid_loop_exit, iid_loop_after_appendix_drop_out) # Generate Code ___________________________________________________________ # txt = _get_source_code(analyzer_list, terminal_list, TheCountMap.column_number_per_code_unit(), appendix_sm_exist_f) return txt, DoorID.incidence(iid_loop_exit)