예제 #1
0
def get_setup(L0, L1, FSM0, FSM1, FSM2):
    # SPECIALITIES: -- sm0 and sm1 have an intersection between their second 
    #                  transition.
    #               -- sm1 transits further upon acceptance.
    #               -- sm2 has only one transition.
    ci_list = [
        CountInfo(dial_db.new_incidence_id(), NumberSet.from_range(L0, L1), 
                  CountAction(E_CharacterCountType.COLUMN, 0)),
    ]

    # Generate State Machine that does not have any intersection with 
    # the loop transitions.
    sm0 = StateMachine()
    si = sm0.add_transition(sm0.init_state_index, FSM0)
    si = sm0.add_transition(si, NS_A, AcceptanceF=True)
    sm0.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    sm1 = StateMachine()
    si0 = sm1.add_transition(sm1.init_state_index, FSM1)
    si  = sm1.add_transition(si0, NS_A, AcceptanceF=True)
    iid1 = dial_db.new_incidence_id()
    sm1.states[si].mark_acceptance_id(iid1)
    si  = sm1.add_transition(si, NS_B, si0)
    sm1.states[si].mark_acceptance_id(iid1)

    sm2 = StateMachine()
    si = sm2.add_transition(sm2.init_state_index, FSM2, AcceptanceF=True)
    sm2.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    return ci_list, [sm0, sm1, sm2]
예제 #2
0
    def seal(self):
        if len(self.space_db) == 0 and len(self.grid_db) == 0:
            default_space = ord(' ')
            default_tab = ord('\t')
            bad = self.bad_character_set
            if bad.get().contains(default_space) == False:
                self.specify_space("[ ]", NumberSet(default_space), 1, self.fh)
            if bad.get().contains(default_tab) == False:
                self.specify_grid("[\\t]", NumberSet(default_tab), 4, self.fh)

            if len(self.space_db) == 0 and len(self.grid_db) == 0:
                error_msg(
                    "No space or grid defined for indentation counting. Default\n"
                    "values ' ' and '\\t' could not be used since they are specified as 'bad'.",
                    bad.file_name, bad.line_n)

        if self.newline_state_machine.get() is None:
            sm = StateMachine()
            end_idx = sm.add_transition(sm.init_state_index,
                                        NumberSet(ord('\n')),
                                        AcceptanceF=True)
            mid_idx = sm.add_transition(sm.init_state_index,
                                        NumberSet(ord('\r')),
                                        AcceptanceF=False)
            sm.add_transition(mid_idx,
                              NumberSet(ord('\n')),
                              end_idx,
                              AcceptanceF=False)
            self.specify_newline("(\\r\\n)|(\\n)", sm, self.fh)
예제 #3
0
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index, self.charset, AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "Name = " + self.name, 
        for interval in self.charset.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                utf8_seq = unicode_to_utf8(i)

                # Apply sequence to state machine
                s_idx = result.init_state_index
                for byte in utf8_seq:
                    s_idx = result.states[s_idx].target_map.get_resulting_target_state_index(byte)

                # All acceptance flags must belong to the original state machine
                for cmd in result.states[s_idx].single_entry:
                    if cmd.__class__ != SeAccept: continue
                    # HERE: As soon as something is wrong --> fire an exception
                    assert cmd.acceptance_id() == self.id
        print " (OK=%i)" % self.id
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index,
                               self.charset,
                               AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "Name = " + self.name,
        for interval in self.charset.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                utf8_seq = unicode_to_utf8(i)

                # Apply sequence to state machine
                s_idx = result.init_state_index
                for byte in utf8_seq:
                    s_idx = result.states[
                        s_idx].target_map.get_resulting_target_state_index(
                            byte)

                # All acceptance flags must belong to the original state machine
                for cmd in result.states[s_idx].single_entry:
                    if cmd.__class__ != SeAccept: continue
                    # HERE: As soon as something is wrong --> fire an exception
                    assert cmd.acceptance_id() == self.id
        print " (OK=%i)" % self.id
예제 #5
0
def get_setup(L0, L1, FSM0, FSM1, FSM2):
    # SPECIALITIES: -- sm0 and sm1 have an intersection between their second
    #                  transition.
    #               -- sm1 transits further upon acceptance.
    #               -- sm2 has only one transition.
    ci_list = [
        CountInfo(dial_db.new_incidence_id(), NumberSet.from_range(L0, L1),
                  CountAction(E_CharacterCountType.COLUMN, 0)),
    ]

    # Generate State Machine that does not have any intersection with
    # the loop transitions.
    sm0 = StateMachine()
    si = sm0.add_transition(sm0.init_state_index, FSM0)
    si = sm0.add_transition(si, NS_A, AcceptanceF=True)
    sm0.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    sm1 = StateMachine()
    si0 = sm1.add_transition(sm1.init_state_index, FSM1)
    si = sm1.add_transition(si0, NS_A, AcceptanceF=True)
    iid1 = dial_db.new_incidence_id()
    sm1.states[si].mark_acceptance_id(iid1)
    si = sm1.add_transition(si, NS_B, si0)
    sm1.states[si].mark_acceptance_id(iid1)

    sm2 = StateMachine()
    si = sm2.add_transition(sm2.init_state_index, FSM2, AcceptanceF=True)
    sm2.states[si].mark_acceptance_id(dial_db.new_incidence_id())

    return ci_list, [sm0, sm1, sm2]
예제 #6
0
파일: test-set.py 프로젝트: xxyzzzq/quex
def test(TestString):
    print "expression    = \"" + TestString + "\""
    sm = StateMachine()
    try:
        trigger_set = character_set.do(StringIO(TestString + "]"))
        sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)
        print "state machine\n", sm 
    except RegularExpressionException, x:
        print repr(x)
예제 #7
0
파일: engine.py 프로젝트: coderjames/pascal
def snap_non_control_character(stream, PatternDict):
    __debug_entry("non-control characters", stream)

    # (*) read first character
    char_code = utf8.__read_one_utf8_code_from_stream(stream)
    if char_code is None:
        error_msg("Character could not be interpreted as UTF8 code or End of File reached prematurely.", 
                  stream)
    result = StateMachine()
    result.add_transition(result.init_state_index, char_code, AcceptanceF=True)
    return __debug_exit(result, stream)
예제 #8
0
파일: special.py 프로젝트: dkopecek/amplify
def get_any():
    """RETURNS:

       A state machine that 'eats' any character, but only one. 

           (0)--- \Any --->(( 0 ))
    """
    result = StateMachine()
    result.add_transition(result.init_state_index, NumberSet(Interval(-sys.maxint, sys.maxint)), AcceptanceF=True)

    return result
예제 #9
0
파일: engine.py 프로젝트: yifsun/amplify
def snap_non_control_character(stream, PatternDict):
    __debug_entry("non-control characters", stream)

    # (*) read first character
    char_code = utf8.__read_one_utf8_code_from_stream(stream)
    if char_code is None:
        error_msg("Character could not be interpreted as UTF8 code or End of File reached prematurely.", 
                  stream)
    result = StateMachine()
    result.add_transition(result.init_state_index, char_code, AcceptanceF=True)
    return __debug_exit(result, stream)
예제 #10
0
파일: engine.py 프로젝트: coderjames/pascal
def create_ALL_BUT_NEWLINE_state_machine():
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n")).inverse()) 

    if Setup.get_character_value_limit() != sys.maxint:
        trigger_set.intersect_with(Interval(0, Setup.get_character_value_limit()))

    result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) 
    return result
예제 #11
0
파일: special.py 프로젝트: yifsun/amplify
def get_any():
    """RETURNS:

       A state machine that 'eats' any character, but only one. 

           (0)--- \Any --->(( 0 ))
    """
    result = StateMachine()
    result.add_transition(result.init_state_index,
                          NumberSet(Interval(-sys.maxint, sys.maxint)),
                          AcceptanceF=True)

    return result
예제 #12
0
파일: engine.py 프로젝트: yifsun/amplify
def create_ALL_BUT_NEWLINE_state_machine(stream):
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n"))).get_complement(Setup.buffer_codec.source_set)
    if trigger_set.is_empty():
        error_msg("The set of admissible characters contains only newline.\n"
                  "The '.' for 'all but newline' is an empty set.",
                  SourceRef.from_FileHandle(stream))

    result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) 
    return result
예제 #13
0
def do(stream, PatternDict):
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None: 
        raise RegularExpressionException("Regular Expression: character_set_expression called for something\n" + \
                                         "that does not start with '[:', '[' or '\\P'")
    if trigger_set.is_empty():
        raise RegularExpressionException("Regular Expression: Character set expression results in empty set.")

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
예제 #14
0
파일: engine.py 프로젝트: liancheng/rose
def create_ALL_BUT_NEWLINE_state_machine():
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n")).inverse())

    if Setup.get_character_value_limit() != sys.maxint:
        trigger_set.intersect_with(
            Interval(0, Setup.get_character_value_limit()))

    result.add_transition(result.init_state_index,
                          trigger_set,
                          AcceptanceF=True)
    return result
예제 #15
0
def do(sh):
    """Converts a uni-code string into a state machine that parses 
       its letters sequentially. Each state in the sequence correponds
       to the sucessful triggering of a letter. Only the last state, though,
       is an acceptance state. Any bailing out before is 'not accepted'. 
       Example:

       "hey" is translated into the state machine:

           (0)-- 'h' -->(1)-- 'e' -->(2)-- 'y' --> ACCEPTANCE
            |            |            |
           FAIL         FAIL         FAIL
    
      Note: The state indices are globally unique. But, they are not necessarily
            0, 1, 2, ... 
    """
    assert     sh.__class__.__name__ == "StringIO" \
            or sh.__class__.__name__ == "file"

    # resulting state machine
    result = StateMachine()
    state_idx = result.init_state_index

    # Only \" is a special character '"', any other backslashed character
    # remains as the sequence 'backslash' + character
    for char_code in get_character_code_sequence(sh):
        state_idx = result.add_transition(state_idx, char_code)

    # when the last state has trigger it is supposed to end up in 'acceptance'
    result.states[state_idx].set_acceptance()
    return result
예제 #16
0
파일: helper.py 프로젝트: xxyzzzq/quex
def test_on_UCS_range(Trafo, Source, Drain, CharacterBackwardTrafo):

    sm     = StateMachine()
    acc_db = {}
    for x in range(Source.begin, Source.end):
        ti = sm.add_transition(sm.init_state_index, x, AcceptanceF=True)
        acc_id    = len(acc_db)
        sm.states[ti].mark_acceptance_id(acc_id)
        acc_db[x] = acc_id

    if Setup.bad_lexatom_detection_f:
        acc_db[None] = E_IncidenceIDs.BAD_LEXATOM
    else:
        acc_db[None] = None

    state_n_before, result = transform(Trafo, sm)
    # assert state_n_before == len(result.states)

    init_state = result.get_init_state()
    count      = 0
    for y in range(Drain.begin, Drain.end):
        # Translate character into 
        x  = CharacterBackwardTrafo(y)
        # Transit on the translated charater
        ti = init_state.target_map.get_resulting_target_state_index(y)
        # Compare resulting state with the expected state's acceptance
        assert_only_acceptance_id(sm.states, ti, acc_db, x, y)

        count += 1

    print "<terminated: %i transitions ok>" % count
예제 #17
0
def do(sh):
    """Converts a uni-code string into a state machine that parses 
       its letters sequentially. Each state in the sequence correponds
       to the sucessful triggering of a letter. Only the last state, though,
       is an acceptance state. Any bailing out before is 'not accepted'. 
       Example:

       "hey" is translated into the state machine:

           (0)-- 'h' -->(1)-- 'e' -->(2)-- 'y' --> ACCEPTANCE
            |            |            |
           FAIL         FAIL         FAIL
    
      Note: The state indices are globally unique. But, they are not necessarily
            0, 1, 2, ... 
    """
    assert     sh.__class__.__name__ == "StringIO" \
            or sh.__class__.__name__ == "file"

    # resulting state machine
    result    = StateMachine()
    state_idx = result.init_state_index

    # Only \" is a special character '"', any other backslashed character
    # remains as the sequence 'backslash' + character
    for char_code in get_character_code_sequence(sh):
        state_idx = result.add_transition(state_idx, char_code)

    # when the last state has trigger it is supposed to end up in 'acceptance'
    result.states[state_idx].set_acceptance()
    return result
예제 #18
0
def test_on_UCS_range(Trafo, Source, Drain, CharacterBackwardTrafo):

    sm = StateMachine()
    acc_db = {}
    for x in range(Source.begin, Source.end):
        ti = sm.add_transition(sm.init_state_index, x, AcceptanceF=True)
        acc_id = len(acc_db)
        sm.states[ti].mark_acceptance_id(acc_id)
        acc_db[x] = acc_id

    if Setup.bad_lexatom_detection_f:
        acc_db[None] = E_IncidenceIDs.BAD_LEXATOM
    else:
        acc_db[None] = None

    state_n_before, result = transform(Trafo, sm)
    # assert state_n_before == len(result.states)

    init_state = result.get_init_state()
    count = 0
    for y in range(Drain.begin, Drain.end):
        # Translate character into
        x = CharacterBackwardTrafo(y)
        # Transit on the translated charater
        ti = init_state.target_map.get_resulting_target_state_index(y)
        # Compare resulting state with the expected state's acceptance
        assert_only_acceptance_id(sm.states, ti, acc_db, x, y)

        count += 1

    print "<terminated: %i transitions ok>" % count
예제 #19
0
def do(stream, PatternDict):
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None:
        raise RegularExpressionException("Regular Expression: character_set_expression called for something\n" + \
                                         "that does not start with '[:', '[' or '\\P'")
    if trigger_set.is_empty():
        raise RegularExpressionException(
            "Regular Expression: Character set expression results in empty set."
        )

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
예제 #20
0
파일: engine.py 프로젝트: nyulacska/gpr
def snap_character_set_expression(stream, PatternDict):
    # GRAMMAR:
    #
    # set_expression:
    #                 [: set_term :]
    #                 traditional character set
    #                 \P '{' propperty string '}'
    #                 '{' identifier '}'
    #
    # set_term:
    #                 "alnum"
    #                 "alpha"
    #                 "blank"
    #                 "cntrl"
    #                 "digit"
    #                 "graph"
    #                 "lower"
    #                 "print"
    #                 "punct"
    #                 "space"
    #                 "upper"
    #                 "xdigit"
    #                 "union"        '(' set_term [ ',' set_term ]+ ')'
    #                 "intersection" '(' set_term [ ',' set_term ]+ ')'
    #                 "difference"   '(' set_term [ ',' set_term ]+ ')'
    #                 "inverse"      '(' set_term ')'
    #                 set_expression
    #
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None:
        error.log("Regular Expression: snap_character_set_expression called for something\n" + \
                  "that does not start with '[:', '[' or '\\P'", stream)
    elif trigger_set.is_empty():
        error.warning(
            "Regular Expression: Character set expression results in empty set.",
            stream)

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
예제 #21
0
파일: engine.py 프로젝트: dkopecek/amplify
def snap_character_set_expression(stream, PatternDict):
    # GRAMMAR:
    #
    # set_expression: 
    #                 [: set_term :]
    #                 traditional character set
    #                 \P '{' propperty string '}'
    #                 '{' identifier '}'
    #
    # set_term:
    #                 "alnum" 
    #                 "alpha" 
    #                 "blank" 
    #                 "cntrl" 
    #                 "digit" 
    #                 "graph" 
    #                 "lower" 
    #                 "print" 
    #                 "punct" 
    #                 "space" 
    #                 "upper" 
    #                 "xdigit"
    #                 "union"        '(' set_term [ ',' set_term ]+ ')'
    #                 "intersection" '(' set_term [ ',' set_term ]+ ')'
    #                 "difference"   '(' set_term [ ',' set_term ]+ ')'
    #                 "inverse"      '(' set_term ')'
    #                 set_expression
    # 
    trigger_set = snap_set_expression(stream, PatternDict)

    if trigger_set is None: 
        error_msg("Regular Expression: snap_character_set_expression called for something\n" + \
                  "that does not start with '[:', '[' or '\\P'", stream)
    elif trigger_set.is_empty():
        error_msg("Regular Expression: Character set expression results in empty set.", stream, DontExitF=True)

    # Create state machine that triggers with the trigger set to SUCCESS
    # NOTE: The default for the ELSE transition is FAIL.
    sm = StateMachine()
    sm.add_transition(sm.init_state_index, trigger_set, AcceptanceF=True)

    return __debug_exit(sm, stream)
예제 #22
0
    def seal(self):
        if len(self.space_db) == 0 and len(self.grid_db) == 0:
            default_space = ord(' ')
            default_tab   = ord('\t')
            bad = self.bad_character_set
            if bad.get().contains(default_space) == False:
                self.specify_space("[ ]", NumberSet(default_space), 1, self.fh)
            if bad.get().contains(default_tab) == False:
                self.specify_grid("[\\t]", NumberSet(default_tab), 4, self.fh)

            if len(self.space_db) == 0 and len(self.grid_db) == 0:
                error_msg("No space or grid defined for indentation counting. Default\n"
                          "values ' ' and '\\t' could not be used since they are specified as 'bad'.",
                          bad.file_name, bad.line_n)


        if self.newline_state_machine.get() is None:
            sm   = StateMachine()
            end_idx = sm.add_transition(sm.init_state_index, NumberSet(ord('\n')), AcceptanceF=True)
            mid_idx = sm.add_transition(sm.init_state_index, NumberSet(ord('\r')), AcceptanceF=False)
            sm.add_transition(mid_idx, NumberSet(ord('\n')), end_idx, AcceptanceF=False)
            self.specify_newline("(\\r\\n)|(\\n)", sm, self.fh)
예제 #23
0
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index,
                               self.charset,
                               AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM, TransformFunc):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "## [%i] Name = %s" % (self.id, self.name),
        interval_list = self.charset.get_intervals(PromiseToTreatWellF=True)
        interval_count = len(interval_list)
        for interval in interval_list:
            for i in range(interval.begin, interval.end):
                lexatom_seq = TransformFunc(i)

                # Apply sequence to state machine
                state = SM.apply_sequence(lexatom_seq)
                if state is None:
                    error(self.sm, SM, lexatom_seq)

                # All acceptance flags must belong to the original state machine
                acceptance_id_list = [
                    cmd.acceptance_id()
                    for cmd in state.single_entry.get_iterable(SeAccept)
                ]
                if acceptance_id_list and self.id not in acceptance_id_list:
                    print eval("u'\U%08X'" % i)
                    print "#Seq:  ", ["%02X" % x for x in lexatom_seq]
                    print "#acceptance-ids:", acceptance_id_list
                    error(self.sm, SM, lexatom_seq)

        print " (OK=%i)" % interval_count
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index,
                               self.charset,
                               AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "Name = " + self.name,
        for interval in self.charset.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                utf16_seq = unicode_to_utf16(i)

                # Apply sequence to state machine
                s_idx = result.init_state_index
                for word in utf16_seq:
                    s_idx = result.states[
                        s_idx].target_map.get_resulting_target_state_index(
                            word)

                assert s_idx is not None, \
                       "No acceptance for %X in [%X,%X] --> %s" % \
                       (i, interval.begin, interval.end - 1, repr(map(lambda x: "%04X." % x, utf16_seq)))

                # All acceptance flags must belong to the original state machine
                for cmd in result.states[s_idx].single_entry.get_iterable(
                        SeAccept):
                    # HERE: As soon as something is wrong --> fire an exception
                    assert cmd.acceptance_id() == self.id

        print " (OK=%i)" % self.id
예제 #25
0
파일: helper.py 프로젝트: xxyzzzq/quex
class X:
    def __init__(self, Name):
        sh = StringIO("[:\\P{Script=%s}:]" % Name)
        self.name = Name
        self.charset = regex.snap_set_expression(sh, {})
        self.sm = StateMachine()
        self.sm.add_transition(self.sm.init_state_index, self.charset, AcceptanceF=True)
        self.id = self.sm.get_id()

    def check(self, SM, TransformFunc):
        """This function throws an exception as soon as one single value
           is not matched according to the expectation.
        """
        print "## [%i] Name = %s" % (self.id, self.name), 
        interval_list  = self.charset.get_intervals(PromiseToTreatWellF=True)
        interval_count = len(interval_list)
        for interval in interval_list:
            for i in range(interval.begin, interval.end):
                lexatom_seq = TransformFunc(i)

                # Apply sequence to state machine
                state = SM.apply_sequence(lexatom_seq)
                if state is None:
                    error(self.sm, SM, lexatom_seq)

                # All acceptance flags must belong to the original state machine
                acceptance_id_list = [
                    cmd.acceptance_id()
                    for cmd in state.single_entry.get_iterable(SeAccept)
                ]
                if acceptance_id_list and self.id not in acceptance_id_list: 
                    print eval("u'\U%08X'" % i) 
                    print "#Seq:  ", ["%02X" % x for x in lexatom_seq]
                    print "#acceptance-ids:", acceptance_id_list
                    error(self.sm, SM, lexatom_seq)

        print " (OK=%i)" % interval_count
예제 #26
0
def StateMachine_Newline():
    """Creates a state machine matching newline according to what has been 
    specified in the setup (Setup.dos_carriage_return_newline_f). 

    That is, if is DOS newline then the state machine represents '\r\n' and
    if it is unix only, then it represents '\n'. If both is required they 
    are implemented in parallel.

    RETURNS: StateMachine
    """
    UnixF = True
    DosF = Setup.dos_carriage_return_newline_f

    NL = ord('\n')  # (pure) newline, i.e. line feed
    CR = ord('\r')  # carriage return

    sm = StateMachine()
    if UnixF:
        sm.add_transition(sm.init_state_index, NL, AcceptanceF=True)
    if DosF:
        idx = sm.add_transition(sm.init_state_index, CR, AcceptanceF=False)
        sm.add_transition(idx, NL, AcceptanceF=True)

    return beautifier.do(sm)
예제 #27
0
def StateMachine_Newline():
    """Creates a state machine matching newline according to what has been 
    specified in the setup (Setup.dos_carriage_return_newline_f). 

    That is, if is DOS newline then the state machine represents '\r\n' and
    if it is unix only, then it represents '\n'. If both is required they 
    are implemented in parallel.

    RETURNS: StateMachine
    """
    UnixF = True
    DosF  = Setup.dos_carriage_return_newline_f

    NL = ord('\n')  # (pure) newline, i.e. line feed
    CR = ord('\r')  # carriage return

    sm = StateMachine()
    if UnixF:
        sm.add_transition(sm.init_state_index, NL, AcceptanceF=True)
    if DosF:
        idx = sm.add_transition(sm.init_state_index, CR, AcceptanceF=False)
        sm.add_transition(idx, NL, AcceptanceF=True)

    return beautifier.do(sm)
예제 #28
0
    ci_list, sm_list = get_setup(0x10, 0x60, 
                                 NumberSet.from_range(0x10, 0x40), 
                                 NumberSet.from_range(0x20, 0x50), 
                                 NumberSet.from_range(0x30, 0x60))

    # Test for each 'sm' in 'sm_list' is superfluous. 
    # It is done in 'AppendixNoI'.
    test(ci_list, sm_list)

elif "Split" in sys.argv:
    # A first transition of a state machine is separated into two, because
    # it is covered by more than one different count action.
    NS1 = NumberSet.from_range(0x10, 0x20)
    NS2 = NumberSet.from_range(0x20, 0x30)
    NS3 = NumberSet.from_range(0x30, 0x40)
    NS4 = NumberSet.from_range(0x40, 0x50)
    ci_list = [
        CountInfo(dial_db.new_incidence_id(), NS1, CountAction(E_CharacterCountType.COLUMN, 1)),
        CountInfo(dial_db.new_incidence_id(), NS2, CountAction(E_CharacterCountType.COLUMN, 2)),
        CountInfo(dial_db.new_incidence_id(), NS3, CountAction(E_CharacterCountType.COLUMN, 3)),
        CountInfo(dial_db.new_incidence_id(), NS4, CountAction(E_CharacterCountType.COLUMN, 4))
    ]

    sm  = StateMachine()
    si  = sm.init_state_index
    iid = dial_db.new_incidence_id()
    ti0 = sm.add_transition(si, NumberSet.from_range(0x1A, 0x4B))
    ac0 = sm.add_transition(ti0, NS_A, AcceptanceF=True)

    test(ci_list, [sm])
예제 #29
0
파일: mode.py 프로젝트: coderjames/pascal
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else:                         result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier, fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the 
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm  = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do, 
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), 
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence       = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier, fh)

        # Skipper code is to be generated later
        generator_function, comment = { 
                "skip_range":        (skip_range.do,        E_SpecialPatterns.SKIP_RANGE),
                "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"]       = new_mode.name

        new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment)

        return True
        
    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"
                                           
            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])
                 
            FileName = value.newline_suppressor_state_machine.file_name
            LineN    = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN)

            new_mode.add_match(suppressed_newline_pattern_str, code, 
                               get_pattern_object(suppressed_newline_sm),
                               Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick: 
        #
        #      Let               newline         
        #      be defined as:    newline ([space]* newline])*
        # 
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), 
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN    = value.newline_state_machine.line_n
        action   = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(), action, 
                           get_pattern_object(sm), 
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
예제 #30
0
파일: engine.py 프로젝트: liancheng/rose
def snap_primary(stream, PatternDict):
    """primary:  " non_double_quote *  "              = character string
                 [ non_rect_bracket_close ]           = set of characters
                 { identifier }                       = pattern replacement
                 ( expression )
                 non_control_character+               = lonely characters
                 primary repetition_cmd
    """
    __debug_entry("primary", stream)
    x = stream.read(1)
    lookahead = stream.read(1)
    if x != "" and lookahead != "": stream.seek(-1, 1)
    if x == "": return __debug_exit(None, stream)

    # -- 'primary' primary
    if x == "\"": result = snap_character_string.do(stream)
    elif x == "[":
        stream.seek(-1, 1)
        result = character_set_expression.do(stream, PatternDict)
    elif x == "{":
        result = snap_replacement(stream, PatternDict)
    elif x == ".":
        result = create_ALL_BUT_NEWLINE_state_machine()
    elif x == "(":
        result = snap_bracketed_expression(stream, PatternDict)

    elif x.isspace():
        # a lonestanding space ends the regular expression
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    elif x in ["*", "+", "?"]:
        raise RegularExpressionException(
            "lonely operator '%s' without expression proceeding." % x)

    elif x == "\\":
        if lookahead == "C":
            stream.read(1)
            result = snap_case_folded_pattern(stream, PatternDict)
        elif lookahead == "R":
            result = get_expression_in_brackets(stream, PatternDict,
                                                "reverse operator",
                                                "R").get_inverse()
        elif lookahead == "A":
            result = get_expression_in_brackets(stream, PatternDict,
                                                "anti-pattern operator", "A")
            result.transform_to_anti_pattern()
        else:
            stream.seek(-1, 1)
            trigger_set = character_set_expression.snap_property_set(stream)
            if trigger_set is None:
                stream.seek(
                    1, 1)  # snap_property_set() leaves tream right before '\\'
                char_code = snap_backslashed_character.do(stream)
                if char_code is None:
                    raise RegularExpressionException(
                        "Backslash followed by unrecognized character code.")
                trigger_set = char_code
            result = StateMachine()
            result.add_transition(result.init_state_index,
                                  trigger_set,
                                  AcceptanceF=True)

    elif x not in CONTROL_CHARACTERS:
        # NOTE: The '\' is not inside the control characters---for a reason.
        #       It is used to define for example character codes using '\x' etc.
        stream.seek(-1, 1)
        result = snap_non_control_character(stream, PatternDict)

    else:
        # NOTE: This includes the '$' sign which means 'end of line'
        #       because the '$' sign is in CONTROL_CHARACTERS, but is not checked
        #       against. Thus, it it good to leave here on '$' because the
        #       '$' sign is handled on the very top level.
        # this is not a valid primary
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    # -- optional repetition command?
    result_repeated = __snap_repetition_range(result, stream)
    if result_repeated is not None: result = result_repeated
    return __debug_exit(beautifier.do(result), stream)
예제 #31
0
def do(SM_List):
    """Intersection: 

       Only match on patterns which are matched by all state machines
       in 'SM_List'.

       (C) 2013 Frank-Rene Schaefer
       ________________________________________________________________________

       A lexeme which matches all patterns must reach an acceptance in each 
       given state machine. That is, 
       
          For each state machine; there is a path from the init 
          state to an acceptance state triggered along the by 
          the characters of the lexeme.

       We cannot go forward, since we cannot omit a path upon non-fit.

       Now, consider the super-state consisting of all acceptance states
       of all state machines. There there must be a way backward from the 
       super-acceptance-state to the init state states. As soon, as a
       path is interupted, it can be thrown away. This can be achieved
       by reversed state machines which are combined into a single one.
       
       Reverse all state machines; The epsilon closure of the init state
       corresponds to the super acceptance state. The transitions in the
       super-state machine correspond to the way backwards in the state
       machine. For each feasible state in the super-state machine create
       a new state. 

       The acceptance state of the reversed state machines correspond to 
       the init state of the original state machines. If the super state
       contains an acceptance state of the original state, it can become
       an acceptance state of the intersection, because we now found a 
       path. The found state machine must be reversed at the end.

    """
    for sm in SM_List:
        if special.is_none(sm):         # If one state machine is '\None'
            return special.get_none()   # then, the intersection is '\None'

    reverse_sm_list          = [ reverse.do(sm)                            for sm in SM_List ]
    state_id_set_list        = [ set(sm.states.iterkeys())                 for sm in reverse_sm_list ]
    acceptance_state_id_list = [ set(sm.get_acceptance_state_index_list()) for sm in reverse_sm_list ]

    def has_one_from_each(StateIDSet_List, StateIDSet):
        """StateIDSet_List[i] is the set of state indices from state 
        machine 'i' in 'reverse_sm_list'. 

        RETURNS: True -- If the StateIDSet has at least one state 
                         from every state machine.
                 False -- If there is at least one state machine 
                          that has no state in 'StateIDSet'.
        """
        for state_id_set in StateIDSet_List:
            if state_id_set.isdisjoint(StateIDSet): 
                return False
        return True

    def get_merged_state(AcceptanceStateIndexList, EpsilonClosure):
        """Create the new target state in the state machine
           Accept only if all accept.
        """
        acceptance_f = has_one_from_each(AcceptanceStateIndexList, 
                                         EpsilonClosure)
        return State(AcceptanceF=acceptance_f)

    # Plain merge of all states of all state machines with an 
    # epsilon transition from the init state to all init states
    # of the reverse_sm
    sm = StateMachine()
    for rsm in reverse_sm_list:
        sm.states.update(rsm.states)
        sm.add_epsilon_transition(sm.init_state_index, rsm.init_state_index) 

    initial_state_epsilon_closure = sm.get_epsilon_closure(sm.init_state_index) 

    InitState = get_merged_state(acceptance_state_id_list, 
                                 initial_state_epsilon_closure)

    result    = StateMachine(InitStateIndex=index.get(), InitState=InitState)

    # (*) prepare the initial worklist
    worklist = [ ( result.init_state_index, initial_state_epsilon_closure) ]

    epsilon_closure_db = sm.get_epsilon_closure_db()

    while len(worklist) != 0:
        # 'start_state_index' is the index of an **existing** state in the state machine.
        # It was either created above, in StateMachine's constructor, or as a target
        # state index.
        start_state_index, start_state_combination = worklist.pop()
 
        # (*) compute the elementary trigger sets together with the 
        #     epsilon closure of target state combinations that they trigger to.
        #     In other words: find the ranges of characters where the state triggers to
        #     a unique state combination. E.g:
        #                Range        Target State Combination 
        #                [0:23]   --> [ State1, State2, State10 ]
        #                [24:60]  --> [ State1 ]
        #                [61:123] --> [ State2, State10 ]
        #
        elementary_trigger_set_infos = sm.get_elementary_trigger_sets(start_state_combination,
                                                                      epsilon_closure_db)
        ## DEBUG_print(start_state_combination, elementary_trigger_set_infos)

        # (*) loop over all elementary trigger sets
        for epsilon_closure_of_target_state_combination, trigger_set in elementary_trigger_set_infos.iteritems():
            #  -- if there is no trigger to the given target state combination, then drop it
            if trigger_set.is_empty(): 
                continue
            elif not has_one_from_each(state_id_set_list, epsilon_closure_of_target_state_combination):
                continue

            # -- add a new target state representing the state combination
            #    (if this did not happen yet)
            target_state_index = \
                 map_state_combination_to_index(epsilon_closure_of_target_state_combination)

            # -- if target state combination was not considered yet, then create 
            #    a new state in the state machine
            if not result.states.has_key(target_state_index):
                result.states[target_state_index] = get_merged_state(acceptance_state_id_list, 
                                                                     epsilon_closure_of_target_state_combination)

                worklist.append((target_state_index, epsilon_closure_of_target_state_combination))  

            # -- add the transition 'start state to target state'
            result.add_transition(start_state_index, trigger_set, target_state_index)

    if not result.has_acceptance_states():
        return StateMachine()
    else:
        return beautifier.do(reverse.do(result))
예제 #32
0
파일: engine.py 프로젝트: coderjames/pascal
def snap_primary(stream, PatternDict):
    """primary:  " non_double_quote *  "              = character string
                 [ non_rect_bracket_close ]           = set of characters
                 { identifier }                       = pattern replacement
                 ( expression )
                 non_control_character+               = lonely characters
                 primary repetition_cmd
    """
    __debug_entry("primary", stream)    
    x = stream.read(1); lookahead = stream.read(1); 
    if x != "" and lookahead != "": stream.seek(-1, 1)
    if x == "": return __debug_exit(None, stream)

    # -- 'primary' primary
    if   x == "\"": result = snap_character_string.do(stream)
    elif x == "[":  
        stream.seek(-1, 1); 
        result = character_set_expression.do(stream, PatternDict)
    elif x == "{":  result = snap_replacement(stream, PatternDict)
    elif x == ".":  result = create_ALL_BUT_NEWLINE_state_machine()
    elif x == "(":  result = snap_bracketed_expression(stream, PatternDict)

    elif x.isspace():
        # a lonestanding space ends the regular expression
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    elif x in ["*", "+", "?"]:
        raise RegularExpressionException("lonely operator '%s' without expression proceeding." % x) 

    elif x == "\\":
        if lookahead == "C":
            stream.read(1)
            result = snap_case_folded_pattern(stream, PatternDict)
        elif lookahead == "R":
            result = get_expression_in_brackets(stream, PatternDict, "reverse operator", "R").get_inverse()
        elif lookahead == "A":
            result =  get_expression_in_brackets(stream, PatternDict, "anti-pattern operator", "A")
            result.transform_to_anti_pattern()
        else:
            stream.seek(-1, 1)
            trigger_set = character_set_expression.snap_property_set(stream)
            if trigger_set is None:
                stream.seek(1, 1)  # snap_property_set() leaves tream right before '\\'
                char_code = snap_backslashed_character.do(stream)
                if char_code is None:
                    raise RegularExpressionException("Backslash followed by unrecognized character code.")
                trigger_set = char_code
            result = StateMachine()
            result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True)

    elif x not in CONTROL_CHARACTERS:
        # NOTE: The '\' is not inside the control characters---for a reason.
        #       It is used to define for example character codes using '\x' etc.
        stream.seek(-1, 1)
        result = snap_non_control_character(stream, PatternDict)

    else:
        # NOTE: This includes the '$' sign which means 'end of line'
        #       because the '$' sign is in CONTROL_CHARACTERS, but is not checked
        #       against. Thus, it it good to leave here on '$' because the
        #       '$' sign is handled on the very top level.
        # this is not a valid primary
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    # -- optional repetition command? 
    result_repeated = __snap_repetition_range(result, stream) 
    if result_repeated is not None: result = result_repeated
    return __debug_exit(beautifier.do(result), stream)
예제 #33
0
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else: result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option",
                        fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(
            fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier,
                      fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index,
                                  trigger_set,
                                  AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str,
                           action,
                           get_pattern_object(pattern_sm),
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(
                fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(
            fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier,
                      fh)

        # Skipper code is to be generated later
        generator_function, comment = {
            "skip_range": (skip_range.do, E_SpecialPatterns.SKIP_RANGE),
            "skip_nested_range":
            (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"] = new_mode.name

        new_mode.add_match(opener_str,
                           action,
                           get_pattern_object(opener_sm),
                           Comment=comment)

        return True

    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"

            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])

            FileName = value.newline_suppressor_state_machine.file_name
            LineN = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True),
                                    FileName, LineN)

            new_mode.add_match(
                suppressed_newline_pattern_str,
                code,
                get_pattern_object(suppressed_newline_sm),
                Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick:
        #
        #      Let               newline
        #      be defined as:    newline ([space]* newline])*
        #
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index,
                          value.indentation_count_character_set(),
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN = value.newline_state_machine.line_n
        action = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(),
                           action,
                           get_pattern_object(sm),
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
예제 #34
0
파일: engine.py 프로젝트: yifsun/amplify
def snap_primary(stream, PatternDict):
    """primary:  " non_double_quote *  "              = character string
                 [ non_rect_bracket_close ]           = set of characters
                 { identifier }                       = pattern replacement
                 ( expression )
                 non_control_character+               = lonely characters
                 primary repetition_cmd
    """
    global SPECIAL_TERMINATOR 

    __debug_entry("primary", stream)    
    x = stream.read(1)
    if   x == "": return __debug_exit(None, stream)

    # -- 'primary' primary
    if   x == "\"": result = snap_character_string.do(stream)
    elif x == "[":  
        stream.seek(-1, 1); 
        result = snap_character_set_expression(stream, PatternDict)
    elif x == "{":  result = snap_replacement(stream, PatternDict)
    elif x == ".":  result = create_ALL_BUT_NEWLINE_state_machine(stream)
    elif x == "(":  result = snap_bracketed_expression(stream, PatternDict)

    elif x.isspace():
        # a lonestanding space ends the regular expression
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    elif x in ["*", "+", "?"]:
        raise RegularExpressionException("lonely operator '%s' without expression proceeding." % x) 

    elif x == "\\":
        result = snap_command(stream, PatternDict)
        if result is None:
            stream.seek(-1, 1)
            trigger_set = snap_property_set(stream)
            if trigger_set is None:
                # snap the '\'
                stream.read(1)
                char_code = snap_backslashed_character.do(stream)
                if char_code is None:
                    raise RegularExpressionException("Backslash followed by unrecognized character code.")
                trigger_set = char_code
            result = StateMachine()
            result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True)

    elif x not in CONTROL_CHARACTERS and x != SPECIAL_TERMINATOR:
        # NOTE: The '\' is not inside the control characters---for a reason.
        #       It is used to define for example character codes using '\x' etc.
        stream.seek(-1, 1)
        result = snap_non_control_character(stream, PatternDict)

    else:
        # NOTE: This includes the '$' sign which means 'end of line'
        #       because the '$' sign is in CONTROL_CHARACTERS, but is not checked
        #       against. Thus, it it good to leave here on '$' because the
        #       '$' sign is handled on the very top level.
        # this is not a valid primary
        stream.seek(-1, 1)
        return __debug_exit(None, stream)

    # -- optional repetition command? 
    result_repeated = __snap_repetition_range(result, stream) 
    if result_repeated is not None: result = result_repeated

    # There's something going wrong with pseudo-ambigous post context
    # if we do not clean-up here. TODO: Investigate why?
    # See tests in generator/TEST directory.
    return __debug_exit(beautifier.do(result), stream)
예제 #35
0
    # Test for each 'sm' in 'sm_list' is superfluous.
    # It is done in 'AppendixNoI'.
    test(ci_list, sm_list)

elif "Split" in sys.argv:
    # A first transition of a state machine is separated into two, because
    # it is covered by more than one different count action.
    NS1 = NumberSet.from_range(0x10, 0x20)
    NS2 = NumberSet.from_range(0x20, 0x30)
    NS3 = NumberSet.from_range(0x30, 0x40)
    NS4 = NumberSet.from_range(0x40, 0x50)
    ci_list = [
        CountInfo(dial_db.new_incidence_id(), NS1,
                  CountAction(E_CharacterCountType.COLUMN, 1)),
        CountInfo(dial_db.new_incidence_id(), NS2,
                  CountAction(E_CharacterCountType.COLUMN, 2)),
        CountInfo(dial_db.new_incidence_id(), NS3,
                  CountAction(E_CharacterCountType.COLUMN, 3)),
        CountInfo(dial_db.new_incidence_id(), NS4,
                  CountAction(E_CharacterCountType.COLUMN, 4))
    ]

    sm = StateMachine()
    si = sm.init_state_index
    iid = dial_db.new_incidence_id()
    ti0 = sm.add_transition(si, NumberSet.from_range(0x1A, 0x4B))
    ac0 = sm.add_transition(ti0, NS_A, AcceptanceF=True)

    test(ci_list, [sm])