예제 #1
0
def create_range_skipper_code(Language, TestStr, CloserSequence, QuexBufferSize=1024, 
                              CommentTestStrF=False, ShowPositionF=False):
    assert QuexBufferSize >= len(CloserSequence) + 2

    end_str = __prepare(Language)

    door_id_on_skip_range_open = dial_db.new_door_id()

    data = { 
        "closer_sequence":    CloserSequence, 
        "closer_pattern":     Pattern(StateMachine.from_sequence(CloserSequence), 
                                      PatternString="<skip range closer>"),
        "mode_name":          "MrUnitTest",
        "on_skip_range_open": CodeFragment([end_str]),
        "door_id_after":      DoorID.continue_without_on_after_match(),
    }

    skipper_code = range_skipper.do(data, Analyzer)
    __require_variables()

    return create_customized_analyzer_function(Language, TestStr, skipper_code,
                                               QuexBufferSize, CommentTestStrF, ShowPositionF, end_str,
                                               MarkerCharList  = [], 
                                               LocalVariableDB = deepcopy(variable_db.get()),
                                               DoorIdOnSkipRangeOpen=door_id_on_skip_range_open) 
예제 #2
0
def _get_state_machine_and_terminal(Sequence, Name, OpList):
    """Create state machine that detects the 'Sequence', names the terminal
    with 'Name', and implements the 'CmdList' in the terminal.

    RETURNS: (state machine, terminal)
    """
    sm = StateMachine.from_sequence(Sequence)
    sm.set_id(dial_db.new_incidence_id())
    terminal = Terminal(CodeTerminal(Lng.COMMAND_LIST(OpList)), Name, sm.get_id())
    terminal.set_requires_goto_loop_entry_f()  # --> Goto Loop Entry

    return sm, terminal
예제 #3
0
def _get_state_machine_and_terminal(Sequence, Name, OpList):
    """Create state machine that detects the 'Sequence', names the terminal
    with 'Name', and implements the 'CmdList' in the terminal.

    RETURNS: (state machine, terminal)
    """
    sm = StateMachine.from_sequence(Sequence)
    sm.set_id(dial_db.new_incidence_id())
    terminal = Terminal(CodeTerminal(Lng.COMMAND_LIST(OpList)), Name,
                        sm.get_id())
    terminal.set_requires_goto_loop_entry_f()  # --> Goto Loop Entry

    return sm, terminal
예제 #4
0
파일: range.py 프로젝트: xxyzzzq/quex
def _get_state_machine_vs_terminal_list(CloserSequence, CounterDb): 
    """Additionally to all characters, the loop shall walk along the 'closer'.
    If the closer matches, the range skipping exits. Characters need to be 
    counted properly.

    RETURNS: list(state machine, terminal)

    The list contains only one single element.
    """
    sm = StateMachine.from_sequence(CloserSequence)
    sm.set_id(dial_db.new_incidence_id())

    code = [ Lng.GOTO(DoorID.continue_without_on_after_match()) ]
    terminal = Terminal(CodeTerminal(code), "<SKIP RANGE TERMINATED>", sm.get_id())
    return [ (sm, terminal) ]
예제 #5
0
파일: range.py 프로젝트: xxyzzzq/quex
def _get_state_machine_vs_terminal_list(CloserSequence, CounterDb):
    """Additionally to all characters, the loop shall walk along the 'closer'.
    If the closer matches, the range skipping exits. Characters need to be 
    counted properly.

    RETURNS: list(state machine, terminal)

    The list contains only one single element.
    """
    sm = StateMachine.from_sequence(CloserSequence)
    sm.set_id(dial_db.new_incidence_id())

    code = [Lng.GOTO(DoorID.continue_without_on_after_match())]
    terminal = Terminal(CodeTerminal(code), "<SKIP RANGE TERMINATED>",
                        sm.get_id())
    return [(sm, terminal)]
예제 #6
0
파일: mode.py 프로젝트: coderjames/pascal
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else:                         result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier, fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the 
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm  = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do, 
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), 
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence       = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier, fh)

        # Skipper code is to be generated later
        generator_function, comment = { 
                "skip_range":        (skip_range.do,        E_SpecialPatterns.SKIP_RANGE),
                "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"]       = new_mode.name

        new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment)

        return True
        
    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"
                                           
            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])
                 
            FileName = value.newline_suppressor_state_machine.file_name
            LineN    = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN)

            new_mode.add_match(suppressed_newline_pattern_str, code, 
                               get_pattern_object(suppressed_newline_sm),
                               Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick: 
        #
        #      Let               newline         
        #      be defined as:    newline ([space]* newline])*
        # 
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), 
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN    = value.newline_state_machine.line_n
        action   = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(), action, 
                           get_pattern_object(sm), 
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
예제 #7
0
def do(A, B):
    """RETURNS: True  - if A == SUPERSET of B
                False - if not
    """
    if isinstance(A, StateMachine):
        assert isinstance(B, StateMachine)
        return Checker(A, B).do()

    assert not isinstance(B, StateMachine)
    # (*) Core Pattern ________________________________________________________
    #
    #     (including the mounted post context, if there is one).
    #
    # NOTE: Post-conditions do not change anything, since they match only when
    #       the whole lexeme has matched (from begin to end of post condition).
    #       Post-conditions only tell something about the place where the 
    #       analyzer returns after the match.
    superset_f = Checker(A.sm, B.sm).do()

    if not superset_f: return False

    # NOW: For the core state machines it holds: 
    #
    #                      'core(A)' matches a super set of 'core(B)'.
    #

    # (*) Pre-Condition _______________________________________________________
    #
    if not A.has_pre_context(): 
        # core(A) is a superset of core(B). 
        # A is not restricted. B may be (who cares).
        # => A can match more than B.
        return True

    # NOW: Acceptance of A is restricted by a pre-context.
    #
    if not B.has_pre_context(): 
        # A is restricted by pre-context, B is not.
        # => B can match things that A cannot. 
        return False

    # NOW: A is restricted by pre-context. 
    #      B is restricted by pre-context. 
    #
    #      For A to be a superset of B, A must be less or equally restricted than B.
    #
    #                 pre(B) is a superset of pre(A) 
    # 
    #
    if B.pre_context_trivial_begin_of_line_f:
        if not A.pre_context_trivial_begin_of_line_f:
            # pre(A) can never be a subset of pre(B)
            return False
        else:
            # pre(A) = pre(B) which fulfills the condition
            return True

    # NOW: B is a 'real' pre-context not only a 'begin-of-line'
    #
    # Decision about "pre(A) is subset of pre(B)" done by Checker
    if not A.pre_context_trivial_begin_of_line_f:
        A_pre_sm = A.inverse_pre_context_sm
    else:
        # A contains only 'begin-of-line'. Note, however, that 
        # -- newline definition may include '\r\n' so inversion is 
        #    required. 
        # -- at this point in time we are dealing with transformed 
        #    machines. So this has also to be transformed.
        A_pre_sm = StateMachine.from_sequence("\n").get_inverse()
        A_pre_sm = transformation.try_this(A_pre_sm, fh=-1)

    return Checker(B.inverse_pre_context_sm, A_pre_sm).do()
예제 #8
0
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else: result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option",
                        fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(
            fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier,
                      fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index,
                                  trigger_set,
                                  AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str,
                           action,
                           get_pattern_object(pattern_sm),
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(
                fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(
            fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier,
                      fh)

        # Skipper code is to be generated later
        generator_function, comment = {
            "skip_range": (skip_range.do, E_SpecialPatterns.SKIP_RANGE),
            "skip_nested_range":
            (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"] = new_mode.name

        new_mode.add_match(opener_str,
                           action,
                           get_pattern_object(opener_sm),
                           Comment=comment)

        return True

    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"

            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])

            FileName = value.newline_suppressor_state_machine.file_name
            LineN = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True),
                                    FileName, LineN)

            new_mode.add_match(
                suppressed_newline_pattern_str,
                code,
                get_pattern_object(suppressed_newline_sm),
                Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick:
        #
        #      Let               newline
        #      be defined as:    newline ([space]* newline])*
        #
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index,
                          value.indentation_count_character_set(),
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN = value.newline_state_machine.line_n
        action = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(),
                           action,
                           get_pattern_object(sm),
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
예제 #9
0
파일: superset.py 프로젝트: liancheng/rose
def do(A, B):
    """RETURNS: True  - if A == SUPERSET of B
                False - if not
    """
    if isinstance(A, StateMachine):
        assert isinstance(B, StateMachine)
        return Checker(A, B).do()

    assert not isinstance(B, StateMachine)
    # (*) Core Pattern ________________________________________________________
    #
    #     (including the mounted post context, if there is one).
    #
    # NOTE: Post-conditions do not change anything, since they match only when
    #       the whole lexeme has matched (from begin to end of post condition).
    #       Post-conditions only tell something about the place where the
    #       analyzer returns after the match.
    superset_f = Checker(A.sm, B.sm).do()

    if not superset_f: return False

    # NOW: For the core state machines it holds:
    #
    #                      'core(A)' matches a super set of 'core(B)'.
    #

    # (*) Pre-Condition _______________________________________________________
    #
    if not A.has_pre_context():
        # core(A) is a superset of core(B).
        # A is not restricted. B may be (who cares).
        # => A can match more than B.
        return True

    # NOW: Acceptance of A is restricted by a pre-context.
    #
    if not B.has_pre_context():
        # A is restricted by pre-context, B is not.
        # => B can match things that A cannot.
        return False

    # NOW: A is restricted by pre-context.
    #      B is restricted by pre-context.
    #
    #      For A to be a superset of B, A must be less or equally restricted than B.
    #
    #                 pre(B) is a superset of pre(A)
    #
    #
    if B.pre_context_trivial_begin_of_line_f:
        if not A.pre_context_trivial_begin_of_line_f:
            # pre(A) can never be a subset of pre(B)
            return False
        else:
            # pre(A) = pre(B) which fulfills the condition
            return True

    # NOW: B is a 'real' pre-context not only a 'begin-of-line'
    #
    # Decision about "pre(A) is subset of pre(B)" done by Checker
    if not A.pre_context_trivial_begin_of_line_f:
        A_pre_sm = A.inverse_pre_context_sm
    else:
        # A contains only 'begin-of-line'. Note, however, that
        # -- newline definition may include '\r\n' so inversion is
        #    required.
        # -- at this point in time we are dealing with transformed
        #    machines. So this has also to be transformed.
        A_pre_sm = StateMachine.from_sequence("\n").get_inverse()
        A_pre_sm = transformation.try_this(A_pre_sm, fh=-1)

    return Checker(B.inverse_pre_context_sm, A_pre_sm).do()