def create_range_skipper_code(Language, TestStr, CloserSequence, QuexBufferSize=1024, CommentTestStrF=False, ShowPositionF=False): assert QuexBufferSize >= len(CloserSequence) + 2 end_str = __prepare(Language) door_id_on_skip_range_open = dial_db.new_door_id() data = { "closer_sequence": CloserSequence, "closer_pattern": Pattern(StateMachine.from_sequence(CloserSequence), PatternString="<skip range closer>"), "mode_name": "MrUnitTest", "on_skip_range_open": CodeFragment([end_str]), "door_id_after": DoorID.continue_without_on_after_match(), } skipper_code = range_skipper.do(data, Analyzer) __require_variables() return create_customized_analyzer_function(Language, TestStr, skipper_code, QuexBufferSize, CommentTestStrF, ShowPositionF, end_str, MarkerCharList = [], LocalVariableDB = deepcopy(variable_db.get()), DoorIdOnSkipRangeOpen=door_id_on_skip_range_open)
def _get_state_machine_and_terminal(Sequence, Name, OpList): """Create state machine that detects the 'Sequence', names the terminal with 'Name', and implements the 'CmdList' in the terminal. RETURNS: (state machine, terminal) """ sm = StateMachine.from_sequence(Sequence) sm.set_id(dial_db.new_incidence_id()) terminal = Terminal(CodeTerminal(Lng.COMMAND_LIST(OpList)), Name, sm.get_id()) terminal.set_requires_goto_loop_entry_f() # --> Goto Loop Entry return sm, terminal
def _get_state_machine_vs_terminal_list(CloserSequence, CounterDb): """Additionally to all characters, the loop shall walk along the 'closer'. If the closer matches, the range skipping exits. Characters need to be counted properly. RETURNS: list(state machine, terminal) The list contains only one single element. """ sm = StateMachine.from_sequence(CloserSequence) sm.set_id(dial_db.new_incidence_id()) code = [ Lng.GOTO(DoorID.continue_without_on_after_match()) ] terminal = Terminal(CodeTerminal(code), "<SKIP RANGE TERMINATED>", sm.get_id()) return [ (sm, terminal) ]
def _get_state_machine_vs_terminal_list(CloserSequence, CounterDb): """Additionally to all characters, the loop shall walk along the 'closer'. If the closer matches, the range skipping exits. Characters need to be counted properly. RETURNS: list(state machine, terminal) The list contains only one single element. """ sm = StateMachine.from_sequence(CloserSequence) sm.set_id(dial_db.new_incidence_id()) code = [Lng.GOTO(DoorID.continue_without_on_after_match())] terminal = Terminal(CodeTerminal(code), "<SKIP RANGE TERMINATED>", sm.get_id()) return [(sm, terminal)]
def __parse_option(fh, new_mode): def get_pattern_object(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return Pattern(result, AllowStateMachineTrafoF=True) identifier = read_option_start(fh) if identifier is None: return False verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["character_set"] = trigger_set new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), Comment=E_SpecialPatterns.SKIP) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = __parse_string(fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine.from_sequence(opener_sequence) else: opener_str, opener_pattern = regular_expression.parse(fh) opener_sm = opener_pattern.sm # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = __parse_string(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function, comment = { "skip_range": (skip_range.do, E_SpecialPatterns.SKIP_RANGE), "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE), }[identifier] action = GeneratedCode(generator_function, FileName = fh.name, LineN = get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern_str = "" if value.newline_suppressor_state_machine.get() is not None: suppressed_newline_pattern_str = \ "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \ + "(" + value.newline_state_machine.pattern_string() + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN) new_mode.add_match(suppressed_newline_pattern_str, code, get_pattern_object(suppressed_newline_sm), Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE) # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = beautifier.do(x4) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value new_mode.add_match(value.newline_state_machine.pattern_string(), action, get_pattern_object(sm), Comment=E_SpecialPatterns.INDENTATION_NEWLINE) # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = mode_option_info_db[identifier] if option_info.domain is not None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \ "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True
def do(A, B): """RETURNS: True - if A == SUPERSET of B False - if not """ if isinstance(A, StateMachine): assert isinstance(B, StateMachine) return Checker(A, B).do() assert not isinstance(B, StateMachine) # (*) Core Pattern ________________________________________________________ # # (including the mounted post context, if there is one). # # NOTE: Post-conditions do not change anything, since they match only when # the whole lexeme has matched (from begin to end of post condition). # Post-conditions only tell something about the place where the # analyzer returns after the match. superset_f = Checker(A.sm, B.sm).do() if not superset_f: return False # NOW: For the core state machines it holds: # # 'core(A)' matches a super set of 'core(B)'. # # (*) Pre-Condition _______________________________________________________ # if not A.has_pre_context(): # core(A) is a superset of core(B). # A is not restricted. B may be (who cares). # => A can match more than B. return True # NOW: Acceptance of A is restricted by a pre-context. # if not B.has_pre_context(): # A is restricted by pre-context, B is not. # => B can match things that A cannot. return False # NOW: A is restricted by pre-context. # B is restricted by pre-context. # # For A to be a superset of B, A must be less or equally restricted than B. # # pre(B) is a superset of pre(A) # # if B.pre_context_trivial_begin_of_line_f: if not A.pre_context_trivial_begin_of_line_f: # pre(A) can never be a subset of pre(B) return False else: # pre(A) = pre(B) which fulfills the condition return True # NOW: B is a 'real' pre-context not only a 'begin-of-line' # # Decision about "pre(A) is subset of pre(B)" done by Checker if not A.pre_context_trivial_begin_of_line_f: A_pre_sm = A.inverse_pre_context_sm else: # A contains only 'begin-of-line'. Note, however, that # -- newline definition may include '\r\n' so inversion is # required. # -- at this point in time we are dealing with transformed # machines. So this has also to be transformed. A_pre_sm = StateMachine.from_sequence("\n").get_inverse() A_pre_sm = transformation.try_this(A_pre_sm, fh=-1) return Checker(B.inverse_pre_context_sm, A_pre_sm).do()
def __parse_option(fh, new_mode): def get_pattern_object(SM): if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM) else: result = SM result = hopcroft.do(result, CreateNewStateMachineF=False) return Pattern(result, AllowStateMachineTrafoF=True) identifier = read_option_start(fh) if identifier is None: return False verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option", fh.name, get_current_line_info_number(fh)) if identifier == "skip": # A skipper 'eats' characters at the beginning of a pattern that belong # to a specified set of characters. A useful application is most probably # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to # implement a very effective way to skip these regions. pattern_str, trigger_set = regular_expression.parse_character_set( fh, PatternStringF=True) skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'." % identifier, fh) if trigger_set.is_empty(): error_msg("Empty trigger set for skipper." % identifier, fh) # TriggerSet skipping is implemented the following way: As soon as one element of the # trigger set appears, the state machine enters the 'trigger set skipper section'. # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action. # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)' pattern_sm = StateMachine() pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True) # Skipper code is to be generated later action = GeneratedCode(skip_character_set.do, FileName=fh.name, LineN=get_current_line_info_number(fh)) action.data["character_set"] = trigger_set new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), Comment=E_SpecialPatterns.SKIP) return True elif identifier in ["skip_range", "skip_nested_range"]: # A non-nesting skipper can contain a full fledged regular expression as opener, # since it only effects the trigger. Not so the nested range skipper-see below. # -- opener skip_whitespace(fh) if identifier == "skip_nested_range": # Nested range state machines only accept 'strings' not state machines opener_str, opener_sequence = __parse_string( fh, "Opener pattern for 'skip_nested_range'") opener_sm = StateMachine.from_sequence(opener_sequence) else: opener_str, opener_pattern = regular_expression.parse(fh) opener_sm = opener_pattern.sm # For 'range skipping' the opener sequence is not needed, only the opener state # machine is webbed into the pattern matching state machine. opener_sequence = None skip_whitespace(fh) # -- closer closer_str, closer_sequence = __parse_string( fh, "Closing pattern for 'skip_range' or 'skip_nested_range'") skip_whitespace(fh) if fh.read(1) != ">": error_msg("missing closing '>' for mode option '%s'" % identifier, fh) # Skipper code is to be generated later generator_function, comment = { "skip_range": (skip_range.do, E_SpecialPatterns.SKIP_RANGE), "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE), }[identifier] action = GeneratedCode(generator_function, FileName=fh.name, LineN=get_current_line_info_number(fh)) action.data["opener_sequence"] = opener_sequence action.data["closer_sequence"] = closer_sequence action.data["mode_name"] = new_mode.name new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment) return True elif identifier == "indentation": value = indentation_setup.do(fh) # Enter 'Newline' and 'Suppressed Newline' as matches into the engine. # Similar to skippers, the indentation count is then triggered by the newline. # -- Suppressed Newline = Suppressor followed by Newline, # then newline does not trigger indentation counting. suppressed_newline_pattern_str = "" if value.newline_suppressor_state_machine.get() is not None: suppressed_newline_pattern_str = \ "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \ + "(" + value.newline_state_machine.pattern_string() + ")" suppressed_newline_sm = \ sequentialize.do([value.newline_suppressor_state_machine.get(), value.newline_state_machine.get()]) FileName = value.newline_suppressor_state_machine.file_name LineN = value.newline_suppressor_state_machine.line_n # Go back to start. code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN) new_mode.add_match( suppressed_newline_pattern_str, code, get_pattern_object(suppressed_newline_sm), Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE) # When there is an empty line, then there shall be no indentation count on it. # Here comes the trick: # # Let newline # be defined as: newline ([space]* newline])* # # This way empty lines are eating away before the indentation count is activated. # -- 'space' x0 = StateMachine() x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), AcceptanceF=True) # -- '[space]*' x1 = repeat.do(x0) # -- '[space]* newline' x2 = sequentialize.do([x1, value.newline_state_machine.get()]) # -- '([space]* newline)*' x3 = repeat.do(x2) # -- 'newline ([space]* newline)*' x4 = sequentialize.do([value.newline_state_machine.get(), x3]) # -- nfa to dfa; hopcroft optimization sm = beautifier.do(x4) FileName = value.newline_state_machine.file_name LineN = value.newline_state_machine.line_n action = GeneratedCode(indentation_counter.do, FileName, LineN) action.data["indentation_setup"] = value new_mode.add_match(value.newline_state_machine.pattern_string(), action, get_pattern_object(sm), Comment=E_SpecialPatterns.INDENTATION_NEWLINE) # Announce the mode to which the setup belongs value.set_containing_mode_name(new_mode.name) else: value = read_option_value(fh) # The 'verify_word_in_list()' call must have ensured that the following holds assert mode_option_info_db.has_key(identifier) # Is the option of the appropriate value? option_info = mode_option_info_db[identifier] if option_info.domain is not None and value not in option_info.domain: error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \ "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh) # Finally, set the option new_mode.add_option(identifier, value) return True