def __templated_state_entries(txt, TheTemplate, SMD): """Defines the entries of templated states, so that the state key for the template is set, before the jump into the template. E.g. STATE_4711: key = 0; goto TEMPLATE_STATE_111; STATE_3123: key = 1; goto TEMPLATE_STATE_111; STATE_8912: key = 2; goto TEMPLATE_STATE_111; """ for key, state_index in enumerate(TheTemplate.template_combination().involved_state_list()): state = SMD.sm().states[state_index] if TheTemplate.uniform_state_entries_f(): if state_index != SMD.sm().init_state_index: txt.append(" __quex_assert_no_passage();\n") txt.append(get_label("$entry", state_index) + ":\n") txt.append("\n " + LanguageDB["$debug-state"](state_index, SMD.forward_lexing_f())) else: # If all state entries are uniform, the entry handling happens uniformly at # the entrance of the template, not each state. txt.extend(input_block.do(state_index, False, SMD)) txt.extend(acceptance_info.do(state, state_index, SMD, ForceSaveLastAcceptanceF=True)) txt.append(" ") txt.append(LanguageDB["$assignment"]("template_state_key", "%i" % key).replace("\n", "\n ")) txt.append(" goto %s;" % get_label("$entry", TheTemplate.core().state_index, U=True)) txt.append("\n\n")
def get_transition_to_reload(StateIdx, SMD, ReturnStateIndexStr=None): LanguageDB = Setup.language_db if SMD != None and SMD.backward_lexing_f(): direction = "BACKWARD" else: direction = "FORWARD" if SMD != None and (StateIdx == SMD.sm().init_state_index and SMD.forward_lexing_f()): return "goto __RELOAD_INIT_STATE;" elif SMD == None or not SMD.backward_input_position_detection_f(): if ReturnStateIndexStr != None: state_reference = ReturnStateIndexStr else: state_reference = "QUEX_LABEL(%i)" % get_address( "$entry", StateIdx, R=True) # Ensure that '__STATE_ROUTER' is marked as referenced get_label("$state-router", U=True) return "QUEX_GOTO_RELOAD(%s, %s, QUEX_LABEL(%i));" \ % (get_label("$reload-%s" % direction, U=True), state_reference, get_address("$drop-out", StateIdx, U=True, R=True)) else: return ""
def get_transition_to_reload(StateIdx, SMD, ReturnStateIndexStr=None): LanguageDB = Setup.language_db if SMD != None and SMD.backward_lexing_f(): direction = "BACKWARD" else: direction = "FORWARD" if SMD != None and (StateIdx == SMD.sm().init_state_index and SMD.forward_lexing_f()): return "goto __RELOAD_INIT_STATE;" elif SMD == None or not SMD.backward_input_position_detection_f(): if ReturnStateIndexStr != None: state_reference = ReturnStateIndexStr else: state_reference = "QUEX_LABEL(%i)" % get_address("$entry", StateIdx, R=True) # Ensure that '__STATE_ROUTER' is marked as referenced get_label("$state-router", U=True) return "QUEX_GOTO_RELOAD(%s, %s, QUEX_LABEL(%i));" \ % (get_label("$reload-%s" % direction, U=True), state_reference, get_address("$drop-out", StateIdx, U=True, R=True)) else: return ""
def __end_state_router(PathWalker, SMD): """After the last transition of the path, transit into the 'end state', i.e. the first state after the path. If the pathwalker contains multiple path, this might include state routing. """ assert PathWalker.uniform_state_entries_f() PathList = PathWalker.path_list() PathN = len(PathList) txt = [] txt.append(" ") txt.append(LanguageDB["$input/decrement"] + "\n") txt.append("\n ") # -- Transition to the first state after the path: if PathN == 1: # (i) There is only one path for the pathwalker, then there is only # one terminal and it is determined at compilation time. txt.append(transition.get_transition_to_state(PathList[0].end_state_index())) else: # (ii) There are multiple paths for the pathwalker, then the terminal # must be determined at run time. # -- At the end of the path, path_iterator == path_end, thus we can identify # the path by comparing simply against all path_ends. get_label("$state-router", U=True) # Make sure, that state router is referenced txt.append("QUEX_GOTO_STATE(path_end_state);") txt.append("\n") txt.append(" ") return txt
def get_transition_to_terminal(Origin): LanguageDB = Setup.language_db # No unconditional case of acceptance if type(Origin) == type(None): get_label("$terminal-router", U=True) # Mark __TERMINAL_ROUTER as used return [ LanguageDB["$goto-last_acceptance"] ] assert Origin.is_acceptance() # The seek for the end of the core pattern is part of the 'normal' terminal # if the terminal 'is' a post conditioned pattern acceptance. if Origin.post_context_id() == -1: return [ "goto %s;" % get_label("$terminal", Origin.state_machine_id, U=True) ] else: return [ "goto %s;" % get_label("$terminal-direct", Origin.state_machine_id, U=True) ]
def __backward_analyzer(self): LanguageDB = self.language_db assert self.pre_context_sm.get_orphaned_state_index_list() == [] dsm = StateMachineDecorator(self.pre_context_sm, self.state_machine_name, PostContextSM_ID_List=[], BackwardLexingF=True, BackwardInputPositionDetectionF=False) txt = [] if Setup.comment_state_machine_transitions_f: comment = LanguageDB["$ml-comment"]("BEGIN: PRE-CONTEXT STATE MACHINE\n" + \ self.pre_context_sm.get_string(NormalizeF=False) + \ "END: PRE-CONTEXT STATE MACHINE") txt.append(comment) txt.append("\n") # For safety: New content may have to start in a newline, e.g. "#ifdef ..." msg = state_machine_coder.do(dsm) txt.extend(msg) txt.append(get_label("$terminal-general-bw") + ":\n") # -- set the input stream back to the real current position. # during backward lexing the analyzer went backwards, so it needs to be reset. txt.append(" QUEX_NAME(Buffer_seek_lexeme_start)(&me->buffer);\n") return txt
def __backward_analyzer(self): LanguageDB = self.language_db assert self.pre_context_sm.get_orphaned_state_index_list() == [] dsm = StateMachineDecorator(self.pre_context_sm, self.state_machine_name, PostContextSM_ID_List=[], BackwardLexingF=True, BackwardInputPositionDetectionF=False) txt = [] if Setup.comment_state_machine_transitions_f: comment = LanguageDB["$ml-comment"]("BEGIN: PRE-CONTEXT STATE MACHINE\n" + \ self.pre_context_sm.get_string(NormalizeF=False) + \ "END: PRE-CONTEXT STATE MACHINE") txt.append(comment) txt.append( "\n" ) # For safety: New content may have to start in a newline, e.g. "#ifdef ..." msg = state_machine_coder.do(dsm) txt.extend(msg) txt.append(get_label("$terminal-general-bw") + ":\n") # -- set the input stream back to the real current position. # during backward lexing the analyzer went backwards, so it needs to be reset. txt.append(" QUEX_NAME(Buffer_seek_lexeme_start)(&me->buffer);\n") return txt
def __template_state(txt, TheTemplate, SMD): """Generate the template state that 'hosts' the templated states. """ state = TheTemplate state_index = TheTemplate.core().state_index TriggerMap = state.transitions().get_trigger_map() if TheTemplate.uniform_state_entries_f(): txt.extend(input_block.do(state_index, False, SMD)) txt.extend(acceptance_info.do(state, state_index, SMD, ForceSaveLastAcceptanceF=True)) else: label_str = " __quex_assert_no_passage();\n" + \ get_label("$entry", state_index) + ":\n" txt.append(label_str) state_index_str = None if not TheTemplate.uniform_state_entries_f(): # Templates that need to implement more than one state need to return to # dedicated state entries, if the state entries are not uniform. state_index_str = "template_%i_map_state_key_to_state_index[template_state_key]" % state_index txt.extend(transition_block.do(TriggerMap, state_index, SMD, ReturnToState_Str=state_index_str)) txt.extend(drop_out.do(state, state_index, SMD))
def __template_state(txt, TheTemplate, SMD): """Generate the template state that 'hosts' the templated states. """ state = TheTemplate state_index = TheTemplate.core().state_index TriggerMap = state.transitions().get_trigger_map() if TheTemplate.uniform_state_entries_f(): txt.extend(input_block.do(state_index, False, SMD)) txt.extend( acceptance_info.do(state, state_index, SMD, ForceSaveLastAcceptanceF=True)) else: label_str = " __quex_assert_no_passage();\n" + \ get_label("$entry", state_index) + ":\n" txt.append(label_str) state_index_str = None if not TheTemplate.uniform_state_entries_f(): # Templates that need to implement more than one state need to return to # dedicated state entries, if the state entries are not uniform. state_index_str = "template_%i_map_state_key_to_state_index[template_state_key]" % state_index txt.extend( transition_block.do(TriggerMap, state_index, SMD, ReturnToState_Str=state_index_str)) txt.extend(drop_out.do(state, state_index, SMD))
def __dead_end_state_stub(DeadEndStateInfo, SMD): """Dead end states are states which are void of any transitions. They all drop out to some terminal (or drop out totally). Many transitions to goto states can be replaced by direct transitions to the correspondent terminal. Some dead end states, though, need to be replaced by 'stubs' where some basic handling is necessary. The implementation of such stubs is handled inside this function. """ LanguageDB = Setup.language_db pre_context_dependency_f, \ winner_origin_list, \ state = DeadEndStateInfo assert isinstance(state, State) assert state.is_acceptance() if SMD.forward_lexing_f(): if not pre_context_dependency_f: assert len(winner_origin_list) == 1 # Direct transition to terminal possible, no stub required. return [] else: def _on_detection_code(Origin): return transition.get_transition_to_terminal(Origin) return acceptance_info.get_acceptance_detector( state.origins().get_list(), _on_detection_code) elif SMD.backward_lexing_f(): # When checking a pre-condition no dedicated terminal exists. However, when # we check for pre-conditions, a pre-condition flag needs to be set. return acceptance_info.backward_lexing(state.origins().get_list()) + \ [ "goto %s;" % get_label("$terminal-general-bw", U=True) ] elif SMD.backward_input_position_detection_f(): # When searching backwards for the end of the core pattern, and one reaches # a dead end state, then no position needs to be stored extra since it was # stored at the entry of the state. return [ LanguageDB["$input/decrement"], "\n"] + \ acceptance_info.backward_lexing_find_core_pattern(state.origins().get_list()) + \ [ "goto %s;" % get_label("$terminal-general-bw", U=True) ] assert False, \ "Unknown mode '%s' in terminal stub code generation." % Mode
def __dead_end_state_stub(DeadEndStateInfo, SMD): """Dead end states are states which are void of any transitions. They all drop out to some terminal (or drop out totally). Many transitions to goto states can be replaced by direct transitions to the correspondent terminal. Some dead end states, though, need to be replaced by 'stubs' where some basic handling is necessary. The implementation of such stubs is handled inside this function. """ LanguageDB = Setup.language_db pre_context_dependency_f, \ winner_origin_list, \ state = DeadEndStateInfo assert isinstance(state, State) assert state.is_acceptance() if SMD.forward_lexing_f(): if not pre_context_dependency_f: assert len(winner_origin_list) == 1 # Direct transition to terminal possible, no stub required. return [] else: def _on_detection_code(Origin): return transition.get_transition_to_terminal(Origin) return acceptance_info.get_acceptance_detector(state.origins().get_list(), _on_detection_code) elif SMD.backward_lexing_f(): # When checking a pre-condition no dedicated terminal exists. However, when # we check for pre-conditions, a pre-condition flag needs to be set. return acceptance_info.backward_lexing(state.origins().get_list()) + \ [ "goto %s;" % get_label("$terminal-general-bw", U=True) ] elif SMD.backward_input_position_detection_f(): # When searching backwards for the end of the core pattern, and one reaches # a dead end state, then no position needs to be stored extra since it was # stored at the entry of the state. return [ LanguageDB["$input/decrement"], "\n"] + \ acceptance_info.backward_lexing_find_core_pattern(state.origins().get_list()) + \ [ "goto %s;" % get_label("$terminal-general-bw", U=True) ] assert False, \ "Unknown mode '%s' in terminal stub code generation." % Mode
def get_code(self): """Template transition target states. The target state is determined at run-time based on a 'state_key' for the template. NOTE: This handles also the recursive case. """ LanguageDB = Setup.language_db if not self.recursive(): label = "template_%i_target_%i[template_state_key]" % (self.template_index, self.target_index) get_label("$state-router", U=True) # Ensure reference of state router return [ "QUEX_GOTO_STATE(%s);\n" % label ] elif not self.uniform_state_entries_f(): label = "template_%i_map_state_key_to_state_index[template_state_key]" % self.template_index get_label("$state-router", U=True) # Ensure reference of state router return [ "QUEX_GOTO_STATE(%s);\n" % label ] else: return [ "goto %s;" % get_label_of_address(self.template_index, U=True) ]
def backward_detector_function_get(self, sm): assert sm.get_orphaned_state_index_list() == [] dsm = StateMachineDecorator(sm, "BACKWARD_DETECTOR_" + repr(sm.get_id()), PostContextSM_ID_List = [], BackwardLexingF = True, BackwardInputPositionDetectionF = True) variable_db.init() init_address_handling(dsm.get_direct_transition_to_terminal_db()) function_body = state_machine_coder.do(dsm) comment = [] if Setup.comment_state_machine_transitions_f: comment = Setup.language_db["$ml-comment"]("BEGIN: BACKWARD DETECTOR STATE MACHINE\n" + \ sm.get_string(NormalizeF=False) + \ "\nEND: BACKWARD DETECTOR STATE MACHINE") comment.append("\n") # -- input position detectors simply the next 'catch' and return terminal = [] terminal.append("\n") terminal.append(" __quex_assert_no_passage();\n") terminal.append(get_label("$terminal-general-bw") + ":\n") terminal.append(" " + self.language_db["$input/seek_position"]("end_of_core_pattern_position") + "\n") terminal.append(" " + self.language_db["$input/increment"] + "\n") terminal.append(" return;\n") routed_address_set = get_address_set_subject_to_routing() state_router_txt = "" if len(routed_address_set) != 0: routed_state_info_list = state_router.get_info(routed_address_set, dsm) state_router_txt = state_router.do(routed_state_info_list) variable_db.require("target_state_index", Condition_ComputedGoto=False) variable_db.require("input") variable_db.require("end_of_core_pattern_position") local_variable_definition = self.language_db["$local-variable-defs"](variable_db.get()) # Put all things together txt = [] txt.append(bwd_prolog.replace("$$ID$$", repr(sm.get_id()).replace("L", ""))) txt.extend(local_variable_definition) txt.extend(comment) txt.extend(function_body) txt.extend(terminal) txt.append(state_router_txt) txt.append(bwd_epilog.replace("$$INIT_STATE_ID$$", get_label_of_address(sm.init_state_index))) return get_plain_strings(txt)
def get_transition_to_terminal(Origin): LanguageDB = Setup.language_db # No unconditional case of acceptance if type(Origin) == type(None): get_label("$terminal-router", U=True) # Mark __TERMINAL_ROUTER as used return [LanguageDB["$goto-last_acceptance"]] assert Origin.is_acceptance() # The seek for the end of the core pattern is part of the 'normal' terminal # if the terminal 'is' a post conditioned pattern acceptance. if Origin.post_context_id() == -1: return [ "goto %s;" % get_label("$terminal", Origin.state_machine_id, U=True) ] else: return [ "goto %s;" % get_label("$terminal-direct", Origin.state_machine_id, U=True) ]
def __templated_state_entries(txt, TheTemplate, SMD): """Defines the entries of templated states, so that the state key for the template is set, before the jump into the template. E.g. STATE_4711: key = 0; goto TEMPLATE_STATE_111; STATE_3123: key = 1; goto TEMPLATE_STATE_111; STATE_8912: key = 2; goto TEMPLATE_STATE_111; """ for key, state_index in enumerate( TheTemplate.template_combination().involved_state_list()): state = SMD.sm().states[state_index] if TheTemplate.uniform_state_entries_f(): if state_index != SMD.sm().init_state_index: txt.append(" __quex_assert_no_passage();\n") txt.append(get_label("$entry", state_index) + ":\n") txt.append("\n " + LanguageDB["$debug-state"] (state_index, SMD.forward_lexing_f())) else: # If all state entries are uniform, the entry handling happens uniformly at # the entrance of the template, not each state. txt.extend(input_block.do(state_index, False, SMD)) txt.extend( acceptance_info.do(state, state_index, SMD, ForceSaveLastAcceptanceF=True)) txt.append(" ") txt.append(LanguageDB["$assignment"]("template_state_key", "%i" % key).replace( "\n", "\n ")) txt.append(" goto %s;" % get_label("$entry", TheTemplate.core().state_index, U=True)) txt.append("\n\n")
def get_code(self): """Indentation counters may count as a consequence of a 'triggering'.""" LanguageDB = Setup.language_db # Spaces simply increment if self.type == "space": if self.number != -1: add_str = "%i" % self.number else: add_str = "me->" + self.variable_name return "me->counter._indentation += %s;" % add_str + \ "goto %s;" % get_label("$entry", self.state_index, U=True) # Grids lie on a grid: elif self.type == "grid": if self.number != -1: log2 = log(self.number)/log(2) if log2.is_integer(): # For k = a potentials of 2, the expression 'x - x % k' can be written as: x & ~log2(mask) ! # Thus: x = x - x % k + k = x & mask + k mask = (1 << int(log2)) - 1 return "me->counter._indentation &= ~ ((QUEX_TYPE_INDENTATION)0x%X);\n" % mask + \ "me->counter._indentation += %i;\n" % self.number + \ "goto %s;" % get_label("$entry", self.state_index, U=True) else: add_str = "%i" % self.number else: add_str = "me->" + self.variable_name return "me->counter._indentation = (me->counter._indentation - (me->counter._indentation %% %s)) + %s;" \ % (add_str, add_str) + \ LanguageDB["$goto"]("$entry", self.state_index) elif self.type == "bad": assert self.state_index != -1 return "goto INDENTATION_COUNTER_%i_BAD_CHARACTER;\n" % self.state_index else: assert False, "Unreachable code has been reached."
def do(StateIdx, InitStateF, SMD): """Generate the code fragment that produce the 'input' character for the subsequent transition map. In general this consists of (i) incrementing/decrementing the input pointer. (ii) dereferencing the pointer to get a value. The initial state in forward lexing is an exception! The input pointer is not increased, since it already stands on the right position from the last analyzis step. When the init state is entered from any 'normal' state it enters via the 'epilog' generated in the function do_init_state_input_epilog(). """ LanguageDB = Setup.language_db txt = [] if not InitStateF: txt.append(" __quex_assert_no_passage();\n") if InitStateF and SMD.forward_lexing_f(): txt.append(get_label("$init_state_fw_transition_block") + ":\n") txt.append(" " + LanguageDB["$debug-init-state"]) else: txt.append(get_label("$entry", StateIdx) + ":\n") txt.append(" " + LanguageDB["$debug-state"] (StateIdx, SMD.forward_lexing_f())) # The init state in forward lexing does not increase the input pointer if not (SMD.forward_lexing_f() and InitStateF): if SMD.forward_lexing_f(): cmd = LanguageDB["$input/increment"] else: cmd = LanguageDB["$input/decrement"] txt.extend([" ", cmd, "\n"]) txt.extend([" ", LanguageDB["$input/get"], "\n"]) return txt
def __state_router(PathWalker, SMD): """Create code that allows to jump to a state based on the path_iterator. NOTE: Paths cannot be recursive. Also, path transitions are linear, i.e. target states are either subsequent path states or the path is left. The current state is identified by the 'path_iterator' (1) determine to what path the path_iterator belongs. (2) 'path_iterator - path_begin' gives an integer that identifies the particular state of the path. NOTE: In the case of non-uniform path state elements, the state router takes care of the detection of the end-state, thus it has not to be determined in the '*path_iterator == PTC' section. """ assert not PathWalker.uniform_state_entries_f() PathWalkerID = PathWalker.core().state_index # Make sure that the state router is implemented, add reference: get_label("$state-router", U=True) return "QUEX_GOTO_STATE(path_walker_%i_state[path_iterator - path_walker_%i_base]);\n" % (PathWalkerID, PathWalkerID)
def get_code(self): """Template transition target states. The target state is determined at run-time based on a 'state_key' for the template. NOTE: This handles also the recursive case. """ LanguageDB = Setup.language_db if not self.recursive(): label = "template_%i_target_%i[template_state_key]" % ( self.template_index, self.target_index) get_label("$state-router", U=True) # Ensure reference of state router return ["QUEX_GOTO_STATE(%s);\n" % label] elif not self.uniform_state_entries_f(): label = "template_%i_map_state_key_to_state_index[template_state_key]" % self.template_index get_label("$state-router", U=True) # Ensure reference of state router return ["QUEX_GOTO_STATE(%s);\n" % label] else: return [ "goto %s;" % get_label_of_address(self.template_index, U=True) ]
def get_epilog(StateIdx, InitStateF, SMD): """The init state does not increment the input position, thus we do the increment in a separate fragment. This fragment acts then as the entry to the init state. Finally, it jumps to the transition block of the init state as defined above. (The backward init state decrements the input pointer, so this is not necessary.) """ LanguageDB = Setup.language_db if not (InitStateF and SMD.forward_lexing_f()): return [""] txt = ["\n"] txt.append(Address("$entry", StateIdx)) txt.append("\n") txt.extend([" ", LanguageDB["$input/increment"], "\n"]) txt.append(" goto %s;\n" % get_label("$init_state_fw_transition_block")) return txt
def get_info(StateIndexList, DSM): LanguageDB = Setup.language_db # In some strange cases, a 'dummy' state router is required so that # 'goto __STATE_ROUTER;' does not reference a non-existing label. Then, # we return an empty text array. if len(StateIndexList) == 0: return [] # Make sure, that for every state the 'drop-out' state is also mentioned result = [None] * len(StateIndexList) for i, index in enumerate(StateIndexList): assert type(index) != str if index >= 0: # Transition to state entry code = "goto %s; " % get_label_of_address(index) result[i] = (index, code) else: # Transition to a templates 'drop-out' code = "goto " + get_label("$drop-out", -index) + "; " result[i] = (get_address("$drop-out", -index), code) return result
def get_info(StateIndexList, DSM): LanguageDB = Setup.language_db # In some strange cases, a 'dummy' state router is required so that # 'goto __STATE_ROUTER;' does not reference a non-existing label. Then, # we return an empty text array. if len(StateIndexList) == 0: return [] # Make sure, that for every state the 'drop-out' state is also mentioned result = [None] * len(StateIndexList) for i, index in enumerate(StateIndexList): assert type(index) != str if index >= 0: # Transition to state entry code = "goto %s; " % get_label_of_address(index) result[i] = (index, code) else: # Transition to a templates 'drop-out' code = "goto " + get_label("$drop-out", - index) + "; " result[i] = (get_address("$drop-out", - index), code) return result
def do(state, StateIdx, SMD=False): """Produces code for all state transitions. Programming language is determined by 'Language'. """ assert isinstance(state, State) assert SMD.__class__.__name__ == "StateMachineDecorator" assert len(state.transitions().get_epsilon_target_state_index_list()) == 0, \ "Epsilon transition contained target states: state machine was not made a DFA!\n" + \ "Epsilon target states = " + repr(state.transitions().get_epsilon_target_state_index_list()) InitStateF = StateIdx == SMD.sm().init_state_index LanguageDB = Setup.language_db # (*) Dead End States # i.e. states with no further transitions. dead_end_state_info = SMD.dead_end_state_db().get(StateIdx) if dead_end_state_info != None: state_stub = __dead_end_state_stub(dead_end_state_info, SMD) # Some states do not need 'stubs' to terminal since they are straight # forward transitions to the terminal. if len(state_stub) == 0: return [] return [ get_label("$entry", StateIdx), ":\n", " ", LanguageDB["$debug-state"](StateIdx, SMD.forward_lexing_f()) ] + state_stub # (*) Normal States TriggerMap = state.transitions().get_trigger_map() assert TriggerMap != [] # Only dead end states have empty trigger maps. # # => Here, the trigger map cannot be empty. txt = [] txt.extend(input_block.do(StateIdx, InitStateF, SMD)) txt.extend(acceptance_info.do(state, StateIdx, SMD)) txt.extend(transition_block.do(TriggerMap, StateIdx, SMD)) txt.extend(drop_out.do(state, StateIdx, SMD)) txt.extend(get_epilog(StateIdx, InitStateF, SMD)) return txt
def get_skipper(EndSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""): assert type(EndSequence) == list assert len(EndSequence) >= 1 assert map(type, EndSequence) == [int] * len(EndSequence) local_variable_db = {} global template_str LanguageDB = Setup.language_db # Name the $$SKIPPER$$ skipper_index = sm_index.get() # Determine the $$DELIMITER$$ delimiter_str, delimiter_length_str, delimiter_comment_str = get_character_sequence(EndSequence) delimiter_comment_str = LanguageDB["$comment"](" Delimiter: " + delimiter_comment_str) # Determine the check for the tail of the delimiter delimiter_remainder_test_str = "" if len(EndSequence) != 1: txt = "" i = 0 for letter in EndSequence[1:]: i += 1 txt += " " + LanguageDB["$input/get-offset"](i - 1) + "\n" txt += " " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i) txt += " goto %s;" % get_label("$entry", skipper_index, U=True) txt += " " + LanguageDB["$endif"] delimiter_remainder_test_str = txt if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, EndSequence): goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True) else: # If there is indentation counting involved, then the counter's terminal id must # be determined at this place. assert IndentationCounterTerminalID != None # If the ending delimiter is a subset of what the 'newline' pattern triggers # in indentation counting => move on to the indentation counter. goto_after_end_of_skipping_str = "goto %s;" % get_label( "$terminal-direct", IndentationCounterTerminalID, U=True ) if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr else: on_skip_range_open_str = get_on_skip_range_open(Mode, EndSequence) # The main part code_str = blue_print( template_str, [ ["$$DELIMITER$$", delimiter_str], ["$$DELIMITER_LENGTH$$", delimiter_length_str], ["$$DELIMITER_COMMENT$$", delimiter_comment_str], ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]], ["$$END_WHILE$$", LanguageDB["$loop-end"]], ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", get_label("$entry", skipper_index)], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index, U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str], ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str], ], ) # Line and column number counting code_str, reference_p_f = __lc_counting_replacements(code_str, EndSequence) # The finishing touch code_str = blue_print( code_str, [["$$SKIPPER_INDEX$$", __nice(skipper_index)], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)]], ) if reference_p_f: local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = Variable( "reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING", ) return code_str, local_variable_db
def __path_walker(txt, PathWalker, SMD): """Generates the path walker, that walks along the character sequence. """ PathList = PathWalker.path_list() Skeleton = PathList[0].skeleton() PathWalkerID = PathWalker.core().state_index if PathWalker.uniform_state_entries_f(): # (1) Input Block (get the new character) txt.extend(input_block.do(PathWalkerID, False, SMD)) # (2) Acceptance information/Store Input positions txt.extend(acceptance_info.do(PathWalker, PathWalkerID, SMD, ForceSaveLastAcceptanceF=True)) else: txt.append(" __quex_assert_no_passage();\n") txt.append(get_label("$entry", PathWalkerID) + ":\n") txt.append(" __quex_debug(\"path walker %i\");\n" % PathWalkerID) # (3) Transition Map # (3.1) The comparison with the path's current character # If terminating zero is reached, the path's end state is entered. if PathWalker.uniform_state_entries_f(): next_state = [ "goto %s;\n" % get_label_of_address(PathWalkerID, U=True) ] end_state = __end_state_router(PathWalker, SMD) else: next_state = [ __state_router(PathWalker, SMD) ] end_state = [" "] + next_state txt.append(" ") txt.append(LanguageDB["$if =="]("*path_iterator")) txt.append(" ") txt.append(LanguageDB["$increment"]("path_iterator")) txt.append("\n") txt.append(" ") txt.extend(next_state) txt.append(" ") txt.append(LanguageDB["$elseif"] \ + LanguageDB["$=="]("*path_iterator", "QUEX_SETTING_PATH_TERMINATION_CODE") \ + LanguageDB["$then"]) txt.extend(end_state) txt.append(" ") txt.append(LanguageDB["$endif"]) txt.append("\n") # (3.2) Transition map of the 'skeleton' trigger_map = PathWalker.transitions().get_trigger_map() if len(trigger_map) == 0: # (This happens, for example, if there are only keywords and no # 'overlaying' identifier pattern.) # Even if the skeleton/trigger map is empty there must be something # that catches the 'buffer limit code'. # => Define an 'all drop out' trigger_map and then, # => Adapt the trigger map, so that the 'buffer limit' is an # isolated single interval. trigger_map = [ (Interval(-sys.maxint, sys.maxint), None) ] state_index_str = None if not PathWalker.uniform_state_entries_f(): state_index_str = "path_walker_%i_state[path_iterator - path_walker_%i_base]" % (PathWalkerID, PathWalkerID) txt.extend(transition_block.do(trigger_map, PathWalkerID, SMD, ReturnToState_Str=state_index_str)) # (4) The drop out (nothing matched) # (Path iterator has not been increased yet) txt.extend(drop_out.do(PathWalker, PathWalkerID, SMD)) return
def __state_entries(txt, PathWalker, SMD): """Defines the entries of the path's states, so that the state key for the template is set, before the jump into the template. E.g. STATE_4711: path_iterator = path_23 + 0; goto PATHWALKER_23; STATE_3123: path_iterator = path_23 + 1; goto PATHWALKER_23; STATE_8912: path_iterator = path_23 + 2; goto PATHWALKER_23; """ sm = SMD.sm() PathN = len(PathWalker.path_list()) require_path_end_state_variable_f = False txt.append("\n") for path in PathWalker.path_list(): prev_state_index = None # Last state of sequence is not in the path, it is the first state after. for i, info in enumerate(path.sequence()[:-1]): state_index = info[0] # No need for state router if: # (i) PathWalker is uniform, because then even after reload no dedicated # state entry is required. # (ii) The state is not entered from any other state except the predecessor # on the path. # But: # The first state always needs an entry. if prev_state_index != None: candidate = sm.get_only_entry_to_state(state_index) if PathWalker.uniform_state_entries_f() and prev_state_index == candidate: prev_state_index = state_index continue state = SMD.sm().states[state_index] entry_txt = [] if PathWalker.uniform_state_entries_f(): # If all state entries are uniform, the entry handling happens uniformly at # the entrance of the template, not each state. label_str = get_label("$entry", state_index) + ":\n" if state_index != SMD.sm().init_state_index: label_str = " __quex_assert_no_passage();\n" + label_str entry_txt.append(label_str) entry_txt.append(" ") entry_txt.append(LanguageDB["$debug-state"](state_index, SMD.forward_lexing_f())) else: entry_txt.extend(input_block.do(state_index, False, SMD)) entry_txt.extend(acceptance_info.do(state, state_index, SMD, ForceSaveLastAcceptanceF=True)) if PathWalker.uniform_state_entries_f() and PathN != 1: require_path_end_state_variable_f = True end_state_index = path.sequence()[-1][0] entry_txt.append(" path_end_state = QUEX_LABEL(%i);\n" \ % get_address("$entry", end_state_index, U=True, R=True)) entry_txt.append(" ") entry_txt.append(LanguageDB["$assignment"]( "path_iterator ", "path_%i + %i" % (path.index(), i))) entry_txt.append("goto %s;\n\n" % get_label_of_address(PathWalker.core().state_index, U=True)) txt.append(Address("$entry", state_index, Code=entry_txt)) prev_state_index = state_index return require_path_end_state_variable_f
def get_transition_to_drop_out(CurrentStateIdx): LanguageDB = Setup.language_db return "goto %s;" % get_label("$drop-out", CurrentStateIdx, U=True)
def get_skipper(EndSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""): assert type(EndSequence) == list assert len(EndSequence) >= 1 assert map(type, EndSequence) == [int] * len(EndSequence) local_variable_db = {} global template_str LanguageDB = Setup.language_db # Name the $$SKIPPER$$ skipper_index = sm_index.get() # Determine the $$DELIMITER$$ delimiter_str, \ delimiter_length_str, \ delimiter_comment_str \ = get_character_sequence(EndSequence) delimiter_comment_str = LanguageDB["$comment"](" Delimiter: " + delimiter_comment_str) # Determine the check for the tail of the delimiter delimiter_remainder_test_str = "" if len(EndSequence) != 1: txt = "" i = 0 for letter in EndSequence[1:]: i += 1 txt += " " + LanguageDB["$input/get-offset"](i-1) + "\n" txt += " " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i) txt += " goto %s;" % get_label("$entry", skipper_index, U=True) txt += " " + LanguageDB["$endif"] delimiter_remainder_test_str = txt if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, EndSequence): goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True) else: # If there is indentation counting involved, then the counter's terminal id must # be determined at this place. assert IndentationCounterTerminalID != None # If the ending delimiter is a subset of what the 'newline' pattern triggers # in indentation counting => move on to the indentation counter. goto_after_end_of_skipping_str = "goto %s;" % get_label("$terminal-direct", IndentationCounterTerminalID, U=True) if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr else: on_skip_range_open_str = get_on_skip_range_open(Mode, EndSequence) # The main part code_str = blue_print(template_str, [["$$DELIMITER$$", delimiter_str], ["$$DELIMITER_LENGTH$$", delimiter_length_str], ["$$DELIMITER_COMMENT$$", delimiter_comment_str], ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]], ["$$END_WHILE$$", LanguageDB["$loop-end"]], ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", get_label("$entry", skipper_index)], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index, U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str], ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str], ]) # Line and column number counting code_str, reference_p_f = __lc_counting_replacements(code_str, EndSequence) # The finishing touch code_str = blue_print(code_str, [["$$SKIPPER_INDEX$$", __nice(skipper_index)], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)]]) if reference_p_f: local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \ Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") return code_str, local_variable_db
def get_skipper(OpenerSequence, CloserSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""): assert OpenerSequence.__class__ == list assert len(OpenerSequence) >= 1 assert map(type, OpenerSequence) == [int] * len(OpenerSequence) assert CloserSequence.__class__ == list assert len(CloserSequence) >= 1 assert map(type, CloserSequence) == [int] * len(CloserSequence) assert OpenerSequence != CloserSequence LanguageDB = Setup.language_db skipper_index = sm_index.get() opener_str, opener_length_str, opener_comment_str = get_character_sequence(OpenerSequence) closer_str, closer_length_str, closer_comment_str = get_character_sequence(CloserSequence) if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, CloserSequence): goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True) else: # If there is indentation counting involved, then the counter's terminal id must # be determined at this place. assert IndentationCounterTerminalID != None # If the ending delimiter is a subset of what the 'newline' pattern triggers # in indentation counting => move on to the indentation counter. goto_after_end_of_skipping_str = "goto %s;" % get_label("$terminal-direct", IndentationCounterTerminalID, U=True) if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr else: on_skip_range_open_str = get_on_skip_range_open(Mode, CloserSequence) local_variable_db = { "counter": Variable("counter", "size_t", None, "0"), "QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p": Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") } reference_p_def = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" reference_p_def = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" before_reload = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \ " - reference_p));\n" after_reload = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" if CloserSequence[-1] == ord('\n'): end_procedure = " __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n" end_procedure += " __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n" else: end_procedure = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \ " - reference_p));\n" code_str = blue_print(template_str, [ ["$$SKIPPER_INDEX$$", __nice(skipper_index)], # ["$$OPENER$$", opener_str], ["$$OPENER_LENGTH$$", opener_length_str], ["$$OPENER_COMMENT$$", opener_comment_str], ["$$CLOSER$$", closer_str], ["$$CLOSER_LENGTH$$", closer_length_str], ["$$CLOSER_COMMENT$$", closer_comment_str], # ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", get_label("$entry", skipper_index)], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index)], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str], # ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def], ["$$LC_COUNT_IN_LOOP$$", line_column_counter_in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ]) return code_str, local_variable_db
def parse_mode_option(fh, new_mode): LanguageDB = Setup.language_db def fit_state_machine(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return result identifier = read_option_start(fh) if identifier == None: return False verify_word_in_list(identifier, lexer_mode.mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set( fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName=fh.name, LineN=get_current_line_info_number(fh)) action.data["character_set"] = trigger_set pattern_sm = fit_state_machine(pattern_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. pattern_sm.side_info = SideInfo() new_mode.add_match(pattern_str, action, pattern_sm) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = parse_string_constant( fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine() idx = opener_sm.init_state_index for letter in opener_sequence: idx = opener_sm.add_transition(idx, letter) opener_sm.states[idx].set_acceptance(True) else: opener_str, opener_sm = regular_expression.parse(fh) # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = parse_string_constant( fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function = { "skip_range": skip_range.do, "skip_nested_range": skip_nested_range.do, }[identifier] action = GeneratedCode(generator_function, FileName=fh.name, LineN=get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name fit_state_machine(opener_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. opener_sm.side_info = SideInfo() new_mode.add_match(opener_str, action, opener_sm) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern = "" if value.newline_suppressor_state_machine.get() != None: suppressed_newline_pattern = \ "(" + value.newline_suppressor_state_machine.pattern_str + ")" \ + "(" + value.newline_state_machine.pattern_str + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code_fragment = UserCodeFragment( "goto %s;" % get_label("$start", U=True), FileName, LineN) suppressed_newline_sm = fit_state_machine(suppressed_newline_sm) # Analyze pattern for constant number of newlines, characters, etc. suppressed_newline_sm.side_info = SideInfo( character_counter.get_newline_n(suppressed_newline_sm), character_counter.get_character_n(suppressed_newline_sm)) new_mode.add_match(suppressed_newline_pattern, code_fragment, suppressed_newline_sm, Comment="indentation newline suppressor") # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = hopcroft.do(nfa_to_dfa.do(x4), CreateNewStateMachineF=False) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value sm = fit_state_machine(sm) sm.side_info = SideInfo(character_counter.get_newline_n(sm), character_counter.get_character_n(sm)) new_mode.add_match(value.newline_state_machine.pattern_str, action, sm, Comment="indentation newline") # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert lexer_mode.mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = lexer_mode.mode_option_info_db[identifier] if option_info.domain != None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \ "Though, possible for this option are only: %s." % repr(oi.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True
def backward_detector_function_get(self, sm): assert sm.get_orphaned_state_index_list() == [] dsm = StateMachineDecorator(sm, "BACKWARD_DETECTOR_" + repr(sm.get_id()), PostContextSM_ID_List=[], BackwardLexingF=True, BackwardInputPositionDetectionF=True) variable_db.init() init_address_handling(dsm.get_direct_transition_to_terminal_db()) function_body = state_machine_coder.do(dsm) comment = [] if Setup.comment_state_machine_transitions_f: comment = Setup.language_db["$ml-comment"]("BEGIN: BACKWARD DETECTOR STATE MACHINE\n" + \ sm.get_string(NormalizeF=False) + \ "\nEND: BACKWARD DETECTOR STATE MACHINE") comment.append("\n") # -- input position detectors simply the next 'catch' and return terminal = [] terminal.append("\n") terminal.append(" __quex_assert_no_passage();\n") terminal.append(get_label("$terminal-general-bw") + ":\n") terminal.append(" " + self.language_db["$input/seek_position"] ("end_of_core_pattern_position") + "\n") terminal.append(" " + self.language_db["$input/increment"] + "\n") terminal.append(" return;\n") routed_address_set = get_address_set_subject_to_routing() state_router_txt = "" if len(routed_address_set) != 0: routed_state_info_list = state_router.get_info( routed_address_set, dsm) state_router_txt = state_router.do(routed_state_info_list) variable_db.require("target_state_index", Condition_ComputedGoto=False) variable_db.require("input") variable_db.require("end_of_core_pattern_position") local_variable_definition = self.language_db["$local-variable-defs"]( variable_db.get()) # Put all things together txt = [] txt.append( bwd_prolog.replace("$$ID$$", repr(sm.get_id()).replace("L", ""))) txt.extend(local_variable_definition) txt.extend(comment) txt.extend(function_body) txt.extend(terminal) txt.append(state_router_txt) txt.append( bwd_epilog.replace("$$INIT_STATE_ID$$", get_label_of_address(sm.init_state_index))) return get_plain_strings(txt)
def get_skipper(TriggerSet): """This function implements simple 'skipping' in the sense of passing by characters that belong to a given set of characters--the TriggerSet. """ global template_str assert TriggerSet.__class__.__name__ == "NumberSet" assert not TriggerSet.is_empty() LanguageDB = Setup.language_db skipper_index = sm_index.get() # Mini trigger map: [ trigger set ] --> loop start # That means: As long as characters of the trigger set appear, we go to the loop start. transition_map = TransitionMap() # (don't worry about 'drop-out-ranges' etc.) transition_map.add_transition(TriggerSet, skipper_index) # On buffer limit code, the skipper must transit to a dedicated reloader iteration_code = transition_block.do(transition_map.get_trigger_map(), skipper_index, DSM=None, GotoReload_Str="goto %s;" % get_label("$reload", skipper_index)) comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string()) # Line and column number counting prolog = __lc_counting_replacements(prolog_txt, TriggerSet) epilog = __lc_counting_replacements(epilog_txt, TriggerSet) prolog = blue_print(prolog, [ ["$$DELIMITER_COMMENT$$", comment_str], ["$$SKIPPER_INDEX$$", "%i" % skipper_index], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ]) epilog = blue_print(epilog, [ ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$LOOP_REENTRANCE$$", get_label("$entry", skipper_index)], ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$", LanguageDB["$BLC"]], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$DROP_OUT_DIRECT$$", get_label("$drop-out", skipper_index, U=True)], ["$$SKIPPER_INDEX$$", "%i" % skipper_index], ["$$GOTO_TERMINAL_EOF$$", get_label("$terminal-EOF", U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_START$$", get_label("$start", U=True)], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ]) code = [ prolog ] code.extend(iteration_code) code.append(epilog) local_variable_db = {} local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \ Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") return code, local_variable_db
def get_transition_to_state(TargetInfo): LanguageDB = Setup.language_db return "goto %s;" % get_label("$entry", TargetInfo, U=True)
def do(Data): """The generated code is very similar to the 'skipper' code. It is to be executed as soon as a 'real' newline arrived. Then it skips whitespace until the next non-whitepace (also newline may trigger a 'stop'). Dependent on the setup the indentation is determined. """ IndentationSetup = Data["indentation_setup"] assert IndentationSetup.__class__.__name__ == "IndentationSetup" LanguageDB = Setup.language_db Mode = None if IndentationSetup.containing_mode_name() != "": Mode = lexer_mode.mode_db[IndentationSetup.containing_mode_name()] counter_index = sm_index.get() # Mini trigger map: [ trigger set ] --> loop start # That means: As long as characters of the trigger set appear, we go to the loop start. trigger_map = [] # If the indentation consists only of spaces, than it is 'uniform' ... if IndentationSetup.has_only_single_spaces(): # Count indentation/column at end of run; # simply: current position - reference_p character_set = IndentationSetup.space_db.values()[0] for interval in character_set.get().get_intervals(PromiseToTreatWellF=True): trigger_map.append([interval, counter_index]) # Reference Pointer: Define Variable, Initialize, determine how to subtact. end_procedure = \ " me->counter._indentation = (size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer) - reference_p);\n" else: # Count the indentation/column during the 'run' # Add the space counters for count, character_set in IndentationSetup.space_db.items(): for interval in character_set.get().get_intervals(PromiseToTreatWellF=True): trigger_map.append([interval, IndentationCounter("space", count, counter_index)]) # Add the grid counters for count, character_set in IndentationSetup.grid_db.items(): for interval in character_set.get().get_intervals(PromiseToTreatWellF=True): trigger_map.append([interval, IndentationCounter("grid", count, counter_index)]) # Reference Pointer: Not required. # No subtraction 'current_position - reference_p'. # (however, we pass 'reference_p' to indentation handler) end_procedure = "" # Bad character detection if IndentationSetup.bad_character_set.get().is_empty() == False: for interval in IndentationSetup.bad_character_set.get().get_intervals(PromiseToTreatWellF=True): trigger_map.append([interval, IndentationCounter("bad", None, counter_index)]) # Since we do not use a 'TransitionMap', there are some things we need # to do by hand. arrange_trigger_map(trigger_map) local_variable_db = { "reference_p" : Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0") } init_reference_p = " reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer);\n" + \ " me->counter._indentation = (QUEX_TYPE_INDENTATION)0;\n" iteration_code = transition_block.do(trigger_map, counter_index, DSM=None, GotoReload_Str="goto %s;" % get_label("$reload", counter_index)) comment_str = LanguageDB["$comment"]("Skip whitespace at line begin; count indentation.") # NOTE: Line and column number counting is off # -- No newline can occur # -- column number = indentation at the end of the process end_procedure += " __QUEX_IF_COUNT_COLUMNS_ADD(me->counter._indentation);\n" if Mode == None or Mode.default_indentation_handler_sufficient(): end_procedure += " QUEX_NAME(on_indentation)(me, me->counter._indentation, reference_p);\n" else: # Definition of '%s_on_indentation' in mode_classes.py. end_procedure += " QUEX_NAME(%s_on_indentation)(me, me->counter._indentation, reference_p);\n" \ % Mode.name # The finishing touch prolog = blue_print(prolog_txt, [ ["$$DELIMITER_COMMENT$$", comment_str], ["$$INIT_REFERENCE_POINTER$$", init_reference_p], ["$$COUNTER_INDEX$$", repr(counter_index)], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ]) # The finishing touch epilog = blue_print(epilog_txt, [ ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$COUNTER_INDEX$$[0]")], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$LOOP_REENTRANCE$$", get_label("$entry", counter_index)], ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$", LanguageDB["$BLC"]], ["$$RELOAD$$", get_label("$reload", counter_index)], ["$$COUNTER_INDEX$$", repr(counter_index)], ["$$GOTO_TERMINAL_EOF$$", get_label("$terminal-EOF", U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_START$$", get_label("$start", U=True)], ["$$END_PROCEDURE$$", end_procedure], ["$$BAD_CHARACTER_HANDLING$$", get_bad_character_handler(Mode, IndentationSetup, counter_index)], ]) txt = [prolog] txt.extend(iteration_code) # txt.append(Address("$drop-out", counter_index)) txt.append("\n") txt.append(epilog) return txt, local_variable_db
def get_skipper(TriggerSet): """This function implements simple 'skipping' in the sense of passing by characters that belong to a given set of characters--the TriggerSet. """ global template_str assert TriggerSet.__class__.__name__ == "NumberSet" assert not TriggerSet.is_empty() LanguageDB = Setup.language_db skipper_index = sm_index.get() # Mini trigger map: [ trigger set ] --> loop start # That means: As long as characters of the trigger set appear, we go to the loop start. transition_map = TransitionMap( ) # (don't worry about 'drop-out-ranges' etc.) transition_map.add_transition(TriggerSet, skipper_index) # On buffer limit code, the skipper must transit to a dedicated reloader iteration_code = transition_block.do(transition_map.get_trigger_map(), skipper_index, DSM=None, GotoReload_Str="goto %s;" % get_label("$reload", skipper_index)) comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string()) # Line and column number counting prolog = __lc_counting_replacements(prolog_txt, TriggerSet) epilog = __lc_counting_replacements(epilog_txt, TriggerSet) prolog = blue_print(prolog, [ ["$$DELIMITER_COMMENT$$", comment_str], ["$$SKIPPER_INDEX$$", "%i" % skipper_index], ["$$INPUT_GET$$", LanguageDB["$input/get"]], ]) epilog = blue_print( epilog, [ ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], [ "$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]") ], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$LOOP_REENTRANCE$$", get_label("$entry", skipper_index)], ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$", LanguageDB["$BLC"]], ["$$RELOAD$$", get_label("$reload", skipper_index)], [ "$$DROP_OUT_DIRECT$$", get_label("$drop-out", skipper_index, U=True) ], ["$$SKIPPER_INDEX$$", "%i" % skipper_index], ["$$GOTO_TERMINAL_EOF$$", get_label("$terminal-EOF", U=True)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_START$$", get_label("$start", U=True)], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ]) code = [prolog] code.extend(iteration_code) code.append(epilog) local_variable_db = {} local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \ Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") return code, local_variable_db
def parse_mode_option(fh, new_mode): LanguageDB = Setup.language_db def fit_state_machine(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return result identifier = read_option_start(fh) if identifier == None: return False verify_word_in_list(identifier, lexer_mode.mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["character_set"] = trigger_set pattern_sm = fit_state_machine(pattern_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. pattern_sm.side_info = SideInfo() new_mode.add_match(pattern_str, action, pattern_sm) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = parse_string_constant(fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine() idx = opener_sm.init_state_index for letter in opener_sequence: idx = opener_sm.add_transition(idx, letter) opener_sm.states[idx].set_acceptance(True) else: opener_str, opener_sm = regular_expression.parse(fh) # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = parse_string_constant(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function = { "skip_range": skip_range.do, "skip_nested_range": skip_nested_range.do, }[identifier] action = GeneratedCode(generator_function, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name fit_state_machine(opener_sm) # For skippers line and column counting detection is not really a topic # It is done in the skipper itself. opener_sm.side_info = SideInfo() new_mode.add_match(opener_str, action, opener_sm) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern = "" if value.newline_suppressor_state_machine.get() != None: suppressed_newline_pattern = \ "(" + value.newline_suppressor_state_machine.pattern_str + ")" \ + "(" + value.newline_state_machine.pattern_str + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code_fragment = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN) suppressed_newline_sm = fit_state_machine(suppressed_newline_sm) # Analyze pattern for constant number of newlines, characters, etc. suppressed_newline_sm.side_info = SideInfo( character_counter.get_newline_n(suppressed_newline_sm), character_counter.get_character_n(suppressed_newline_sm)) new_mode.add_match(suppressed_newline_pattern, code_fragment, suppressed_newline_sm, Comment="indentation newline suppressor") # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = hopcroft.do(nfa_to_dfa.do(x4), CreateNewStateMachineF=False) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value sm = fit_state_machine(sm) sm.side_info = SideInfo(character_counter.get_newline_n(sm), character_counter.get_character_n(sm)) new_mode.add_match(value.newline_state_machine.pattern_str, action, sm, Comment="indentation newline") # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert lexer_mode.mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = lexer_mode.mode_option_info_db[identifier] if option_info.domain != None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (Value, Option) + \ "Though, possible for this option are only: %s." % repr(oi.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True
def get_skipper(OpenerSequence, CloserSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""): assert OpenerSequence.__class__ == list assert len(OpenerSequence) >= 1 assert map(type, OpenerSequence) == [int] * len(OpenerSequence) assert CloserSequence.__class__ == list assert len(CloserSequence) >= 1 assert map(type, CloserSequence) == [int] * len(CloserSequence) assert OpenerSequence != CloserSequence LanguageDB = Setup.language_db skipper_index = sm_index.get() opener_str, opener_length_str, opener_comment_str = get_character_sequence( OpenerSequence) closer_str, closer_length_str, closer_comment_str = get_character_sequence( CloserSequence) if not end_delimiter_is_subset_of_indentation_counter_newline( Mode, CloserSequence): goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True) else: # If there is indentation counting involved, then the counter's terminal id must # be determined at this place. assert IndentationCounterTerminalID != None # If the ending delimiter is a subset of what the 'newline' pattern triggers # in indentation counting => move on to the indentation counter. goto_after_end_of_skipping_str = "goto %s;" % get_label( "$terminal-direct", IndentationCounterTerminalID, U=True) if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr else: on_skip_range_open_str = get_on_skip_range_open(Mode, CloserSequence) local_variable_db = { "counter": Variable("counter", "size_t", None, "0"), "QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p": Variable("reference_p", "QUEX_TYPE_CHARACTER_POSITION", None, "(QUEX_TYPE_CHARACTER_POSITION)0x0", "QUEX_OPTION_COLUMN_NUMBER_COUNTING") } reference_p_def = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" reference_p_def = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" before_reload = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \ " - reference_p));\n" after_reload = " __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n" if CloserSequence[-1] == ord('\n'): end_procedure = " __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n" end_procedure += " __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n" else: end_procedure = " __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \ " - reference_p));\n" code_str = blue_print( template_str, [ ["$$SKIPPER_INDEX$$", __nice(skipper_index)], # ["$$OPENER$$", opener_str], ["$$OPENER_LENGTH$$", opener_length_str], ["$$OPENER_COMMENT$$", opener_comment_str], ["$$CLOSER$$", closer_str], ["$$CLOSER_LENGTH$$", closer_length_str], ["$$CLOSER_COMMENT$$", closer_comment_str], # ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]], ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]], ["$$INPUT_GET$$", LanguageDB["$input/get"]], [ "$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]") ], ["$$ENDIF$$", LanguageDB["$endif"]], ["$$ENTRY$$", get_label("$entry", skipper_index)], ["$$RELOAD$$", get_label("$reload", skipper_index)], ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)], # When things were skipped, no change to acceptance flags or modes has # happend. One can jump immediately to the start without re-entry preparation. ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index)], ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]], ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str], # ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def], ["$$LC_COUNT_IN_LOOP$$", line_column_counter_in_loop], ["$$LC_COUNT_END_PROCEDURE$$", end_procedure], ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload], ["$$LC_COUNT_AFTER_RELOAD$$", after_reload], ]) return code_str, local_variable_db