예제 #1
0
파일: engine.py 프로젝트: liancheng/rose
def snap_term(stream, PatternDict):
    """term:  primary
              primary term 
    """
    __debug_entry("term", stream)

    # -- primary
    result = snap_primary(stream, PatternDict)
    __debug_print("##primary(in term):", result)
    if result is None: return __debug_exit(None, stream)
    position_1 = stream.tell()

    # -- optional 'term'
    result_2 = snap_term(stream, PatternDict)
    __debug_print("##term(in term):", result_2)
    if result_2 is None:
        stream.seek(position_1)
        return __debug_exit(result, stream)

    ## print "##1:", result.get_string(NormalizeF=False)
    ## print "##2:", result_2.get_string(NormalizeF=False)
    result = sequentialize.do([result, result_2],
                              MountToFirstStateMachineF=True,
                              CloneRemainingStateMachinesF=False)

    return __debug_exit(beautifier.do(result), stream)
예제 #2
0
파일: engine.py 프로젝트: coderjames/pascal
def snap_term(stream, PatternDict):
    """term:  primary
              primary term 
    """
    __debug_entry("term", stream)    

    # -- primary
    result = snap_primary(stream, PatternDict) 
    __debug_print("##primary(in term):", result)
    if result is None: return __debug_exit(None, stream)
    position_1 = stream.tell()

    # -- optional 'term' 
    result_2 = snap_term(stream, PatternDict) 
    __debug_print("##term(in term):",  result_2)
    if result_2 is None: 
        stream.seek(position_1)
        return __debug_exit(result, stream)
    
    ## print "##1:", result.get_string(NormalizeF=False)
    ## print "##2:", result_2.get_string(NormalizeF=False)
    result = sequentialize.do([result, result_2], 
                              MountToFirstStateMachineF=True, 
                              CloneRemainingStateMachinesF=False)    

    return __debug_exit(beautifier.do(result), stream)
예제 #3
0
def _prepare_indentation_counter(ModeName, OptionsDb, CounterDb, IncidenceDb, MHI):
    """Prepare indentation counter. An indentation counter is implemented by 
    the following:

    'newline' pattern --> triggers as soon as an UNSUPPRESSED newline occurs. 
                      --> entry to the INDENTATION COUNTER.

    'suppressed newline' --> INDENTATION COUNTER is NOT triggered.
     
    The supressed newline pattern is longer (and has precedence) over the
    newline pattern. With the suppressed newline it is possible to write
    lines which overstep the newline (see backslahs in Python, for example).

    RETURNS: List of:
             [0] newline PPT and
             [1] optionally the PPT of the newline suppressor.

    The primary pattern action pair list is to be the head of all pattern
    action pairs.

    MHI = Mode hierarchie index defining the priority of the current mode.
    """
    ISetup = OptionsDb.value("indentation")
    if ISetup is None: return [], []

    check_indentation_setup(ISetup)

    if ISetup.sm_newline_suppressor.get() is not None:
        sm_suppressed_newline = sequentialize.do([ISetup.sm_newline_suppressor.get(),
                                                  ISetup.sm_newline.get()])
        sm_suppressed_newline = beautifier.do(sm_suppressed_newline)
    else:
        sm_suppressed_newline = None

    data = { 
        "counter_db":                    CounterDb,
        "indentation_setup":             ISetup,
        "incidence_db":                  IncidenceDb,
        "default_indentation_handler_f": IncidenceDb.default_indentation_handler_f(),
        "mode_name":                     ModeName,
        "sm_suppressed_newline":         sm_suppressed_newline,
    }

    ppt_list = [
        # 'newline' triggers --> indentation counter
        PPT_indentation_handler_newline(MHI, data, ISetup, CounterDb)
    ]

    if sm_suppressed_newline is not None:
        ppt_list.append(
            # 'newline-suppressor' followed by 'newline' is ignored (skipped)
            PPT_indentation_handler_suppressed_newline(MHI, 
                                                       sm_suppressed_newline)
        )

    return [], ppt_list
예제 #4
0
def do(SM_A, SM_B):
    """\NotIn{P Q} = \NotBegin{P \Any*(Q+)}
    """
    all_star      = repeat.do(special.get_any(), min_repetition_n=0)
    sm_b_repeated = repeat.do(SM_B, min_repetition_n=1)

    tmp = sequentialize.do([all_star, sm_b_repeated], 
                           MountToFirstStateMachineF=True, 
                           CloneRemainingStateMachinesF=True)

    tmp = beautifier.do(tmp)

    # There might be many paths which have no hope to reach acceptance
    tmp.clean_up()

    return complement_begin.do(SM_A, tmp)
예제 #5
0
def do(SM_A, SM_B):
    """\NotIn{P Q} = \NotBegin{P \Any*(Q+)}
    """
    all_star = repeat.do(special.get_any(), min_repetition_n=0)
    sm_b_repeated = repeat.do(SM_B, min_repetition_n=1)

    tmp = sequentialize.do([all_star, sm_b_repeated],
                           MountToFirstStateMachineF=True,
                           CloneRemainingStateMachinesF=True)

    tmp = beautifier.do(tmp)

    # There might be many paths which have no hope to reach acceptance
    tmp.clean_up()

    return complement_begin.do(SM_A, tmp)
예제 #6
0
파일: mode.py 프로젝트: coderjames/pascal
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else:                         result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier, fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the 
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm  = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do, 
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), 
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence       = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier, fh)

        # Skipper code is to be generated later
        generator_function, comment = { 
                "skip_range":        (skip_range.do,        E_SpecialPatterns.SKIP_RANGE),
                "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"]       = new_mode.name

        new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment)

        return True
        
    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"
                                           
            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])
                 
            FileName = value.newline_suppressor_state_machine.file_name
            LineN    = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN)

            new_mode.add_match(suppressed_newline_pattern_str, code, 
                               get_pattern_object(suppressed_newline_sm),
                               Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick: 
        #
        #      Let               newline         
        #      be defined as:    newline ([space]* newline])*
        # 
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), 
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN    = value.newline_state_machine.line_n
        action   = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(), action, 
                           get_pattern_object(sm), 
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
예제 #7
0
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else: result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option",
                        fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(
            fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier,
                      fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index,
                                  trigger_set,
                                  AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str,
                           action,
                           get_pattern_object(pattern_sm),
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(
                fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(
            fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier,
                      fh)

        # Skipper code is to be generated later
        generator_function, comment = {
            "skip_range": (skip_range.do, E_SpecialPatterns.SKIP_RANGE),
            "skip_nested_range":
            (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"] = new_mode.name

        new_mode.add_match(opener_str,
                           action,
                           get_pattern_object(opener_sm),
                           Comment=comment)

        return True

    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"

            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])

            FileName = value.newline_suppressor_state_machine.file_name
            LineN = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True),
                                    FileName, LineN)

            new_mode.add_match(
                suppressed_newline_pattern_str,
                code,
                get_pattern_object(suppressed_newline_sm),
                Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick:
        #
        #      Let               newline
        #      be defined as:    newline ([space]* newline])*
        #
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index,
                          value.indentation_count_character_set(),
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN = value.newline_state_machine.line_n
        action = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(),
                           action,
                           get_pattern_object(sm),
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
예제 #8
0
def mount(the_state_machine, PostConditionSM):
    """This function mounts a post condition to a state machine with
       a mechanism that is able to handle the pseudo ambigous post-
       condition. Note, that this mechanism can also treat 'normal'
       post-conditions. However, it is slightly less efficient.

                core-        post-    
           -----0000000000000111111111--------------

       (1)      |-------------------->
                                     acceptance

       (2)                   <-------|
                             reset input position

       The first step is performed by 'normal' lexing. The second step
       via the backward detector, which is basically an inverse state
       machine of the post-condition.

       NOTE: This function does **not** return a state machine that is
             necessarily deterministic. Run nfa_to_dfa on the result
             of this function.

       NOTE: This function is very similar to the function that mounts
             a pre-condition to a state machine. The only major difference
             is that the post condition is actually webbed into the 
             state machine for forward lexing. For backward lexing
             a reference is stored that points to the backward detecting
             state machine.
    """
    assert the_state_machine.__class__.__name__ == "StateMachine"
    assert PostConditionSM.__class__.__name__ == "StateMachine"
    # -- state machines with no states are senseless here. 
    assert not the_state_machine.is_empty() 
    assert not PostConditionSM.is_empty()

    # -- trivial pre-conditions should be added last, for simplicity
    # (*) concatinate the two state machines:
    #   -- deletes acceptance states of the core pattern
    #   -- leaves acceptance states of the post condition
    sequentialize.do([the_state_machine, PostConditionSM], MountToFirstStateMachineF=True)

    # (*) The Backward Input Position detector CANNOT be inverted here.
    #     The inversion may depend on the input codec(!). So, it is 
    #     done just before code generation.
    backward_detector_sm_to_be_inverted = PostConditionSM.clone()
    ## DOES NOT WORK: acceptance_pruning.do(backward_detector_sm)

    # NOTE: We do not need to mark any origins in the backward detector,
    #       since it is not concerned with acceptance states. Its only
    #       task is to reset the input stream.
    # NOTE: It is not necessary that the state machine directly refers to
    #       the backward detector. The origins of the acceptance state will do so.
    acceptance_state_list = the_state_machine.get_acceptance_state_list()
    assert len(acceptance_state_list) != 0, \
            "error: mounting pseudo-ambiguous post condition:\n" + \
            "error: no acceptance state in sequentialized state machine."

    # We cannot do a NFA to DFA and Hopcroft Optimization, because otherwise we
    # would create a new state machine. This function, though, is considered to 
    # 'mount' something on an existing state machine, i.e. change the object
    # that is referenced by the first function argument 'the_state_machine'.
    return backward_detector_sm_to_be_inverted
예제 #9
0
def mount(the_state_machine, PostConditionSM):
    """This function mounts a post condition to a state machine with
       a mechanism that is able to handle the pseudo ambigous post-
       condition. Note, that this mechanism can also treat 'normal'
       post-conditions. However, it is slightly less efficient.

                core-        post-    
           -----0000000000000111111111--------------

       (1)      |-------------------->
                                     acceptance

       (2)                   <-------|
                             reset input position

       The first step is performed by 'normal' lexing. The second step
       via the backward detector, which is basically an inverse state
       machine of the post-condition.

       NOTE: This function does **not** return a state machine that is
             necessarily deterministic. Run nfa_to_dfa on the result
             of this function.

       NOTE: This function is very similar to the function that mounts
             a pre-condition to a state machine. The only major difference
             is that the post condition is actually webbed into the 
             state machine for forward lexing. For backward lexing
             a reference is stored that points to the backward detecting
             state machine.
    """
    assert the_state_machine.__class__.__name__ == "StateMachine"
    assert PostConditionSM.__class__.__name__ == "StateMachine"
    # -- state machines with no states are senseless here.
    assert not the_state_machine.is_empty()
    assert not PostConditionSM.is_empty()

    # -- trivial pre-conditions should be added last, for simplicity
    # (*) concatinate the two state machines:
    #   -- deletes acceptance states of the core pattern
    #   -- leaves acceptance states of the post condition
    sequentialize.do([the_state_machine, PostConditionSM],
                     MountToFirstStateMachineF=True)

    # (*) get the state machine that can go backwards from the acceptance
    #     state of the post condition to the start of the post-condition.
    #     The start of the post condition is at the same time the end
    #     of the core pattern.
    backward_detector_sm = __get_inverse_state_machine_that_finds_end_of_core_expression(
        PostConditionSM)
    ## DOES NOT WORK: acceptance_pruning.do(backward_detector_sm)

    # NOTE: We do not need to mark any origins in the backward detector,
    #       since it is not concerned with acceptance states. Its only
    #       task is to reset the input stream.
    # NOTE: It is not necessary that the state machine directly refers to
    #       the backward detector. The origins of the acceptance state will do so.
    acceptance_state_list = the_state_machine.get_acceptance_state_list()
    assert len(acceptance_state_list) != 0, \
            "error: mounting pseudo-ambiguous post condition:\n" + \
            "error: no acceptance state in sequentialized state machine."

    # We cannot do a NFA to DFA and Hopcroft Optimization, because otherwise we
    # would create a new state machine. This function, though, is considered to
    # 'mount' something on an existing state machine, i.e. change the object
    # that is referenced by the first function argument 'the_state_machine'.
    return backward_detector_sm