Exemple #1
0
def create_ALL_BUT_NEWLINE_state_machine():
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n")).inverse()) 

    if Setup.get_character_value_limit() != sys.maxint:
        trigger_set.intersect_with(Interval(0, Setup.get_character_value_limit()))

    result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) 
    return result
Exemple #2
0
def create_ALL_BUT_NEWLINE_state_machine():
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n")).inverse())

    if Setup.get_character_value_limit() != sys.maxint:
        trigger_set.intersect_with(
            Interval(0, Setup.get_character_value_limit()))

    result.add_transition(result.init_state_index,
                          trigger_set,
                          AcceptanceF=True)
    return result
Exemple #3
0
def prune_range(TriggerMap):
    """Consider the 'useful range' starting from zero. Thus, the first 
       interval to be considered is the first that intersects with 0.
       Then 'begin' must become '0' instead of a negative value.
    """
    LowerLimit = 0
    UpperLimit = Setup.get_character_value_limit()

    # Iterate from 'low' to 'high'
    for i, info in enumerate(TriggerMap):
        interval, target = info
        if interval.end <= LowerLimit: continue

        # Found an interval that intersects with 'LowerLimit' line
        interval.begin = LowerLimit
        if i != 0: del TriggerMap[:i]
        return

    L = len(TriggerMap)
    # Iterate from 'high' to 'low'
    for ii, info in enumerate(reversed(TriggerMap)):
        interval, target = info
        if interval.begin > UpperLimit: continue

        # Found an interval that intersects with 'UpperLimit' line
        interval.end = UpperLimit 
        if ii != 0: 
            i = L - ii - 1  # index = Size - inverse index - 1
            del TriggerMap[i+1:]
        return

    # The whole trigger map happens below 0. This is trash, no doubt!
    assert False
Exemple #4
0
def prune_range(TriggerMap):
    """Consider the 'useful range' starting from zero. Thus, the first 
       interval to be considered is the first that intersects with 0.
       Then 'begin' must become '0' instead of a negative value.
    """
    LowerLimit = 0
    UpperLimit = Setup.get_character_value_limit()

    # Iterate from 'low' to 'high'
    for i, info in enumerate(TriggerMap):
        interval, target = info
        if interval.end <= LowerLimit: continue

        # Found an interval that intersects with 'LowerLimit' line
        interval.begin = LowerLimit
        if i != 0: del TriggerMap[:i]
        return

    L = len(TriggerMap)
    # Iterate from 'high' to 'low'
    for ii, info in enumerate(reversed(TriggerMap)):
        interval, target = info
        if interval.begin > UpperLimit: continue

        # Found an interval that intersects with 'UpperLimit' line
        interval.end = UpperLimit 
        if ii != 0: 
            i = L - ii - 1  # index = Size - inverse index - 1
            del TriggerMap[i+1:]
        return

    # The whole trigger map happens below 0. This is trash, no doubt!
    assert False
Exemple #5
0
def __delete_forbidden_ranges(sm, fh):
    """Unicode does define all code points >= 0. Thus there can be no code points
       below zero as it might result from some number set operations.

       NOTE: This operation might result in orphaned states that have to 
             be deleted.
    """
    global Setup

    character_value_limit = Setup.get_character_value_limit()
    for state in sm.states.values():

        for target_state_index, trigger_set in state.transitions().get_map().items():

            # Make sure, all transitions lie inside the unicode code range 
            if trigger_set.minimum() < UnicodeInterval.begin or trigger_set.supremum() >= UnicodeInterval.end:
                trigger_set.intersect_with(UnicodeInterval)

            if trigger_set.supremum() > character_value_limit:
                error_msg("Pattern contains character beyond the scope of the buffer element size (%s)\n" \
                          % Setup.get_character_value_limit_str() + \
                          "Please, cut the character range of the regular expression,\n"
                          "adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n"       + \
                          "or specify '--buffer-element-size-irrelevant' to ignore the issue.", fh)

            if Setup.buffer_codec in ["utf16-le", "utf16-be"]:
                # Delete the forbidden interval: D800-DFFF
                if trigger_set.has_intersection(ForbiddenRange):
                    error_msg("Pattern contains characters in unicode range 0xD800-0xDFFF.\n"
                              "This range is not covered by UTF16. Cutting Interval.", fh, DontExitF=True)
                    trigger_set.cut_interval(ForbiddenRange)
            
            # If the operation resulted in cutting the path to the target state, then delete it.
            if trigger_set.is_empty():
                state.transitions().delete_transitions_to_target(target_state_index)
Exemple #6
0
def __delete_forbidden_ranges(sm, fh):
    """Unicode does define all code points >= 0. Thus there can be no code points
       below zero as it might result from some number set operations.

       NOTE: This operation might result in orphaned states that have to 
             be deleted.
    """
    global Setup

    character_value_limit = Setup.get_character_value_limit()
    for state in sm.states.values():

        for target_state_index, trigger_set in state.transitions().get_map(
        ).items():

            # Make sure, all transitions lie inside the unicode code range
            if trigger_set.minimum(
            ) < UnicodeInterval.begin or trigger_set.supremum(
            ) >= UnicodeInterval.end:
                trigger_set.intersect_with(UnicodeInterval)

            if trigger_set.supremum() > character_value_limit:
                error_msg("Pattern contains character beyond the scope of the buffer element size (%s)\n" \
                          % Setup.get_character_value_limit_str() + \
                          "Please, cut the character range of the regular expression,\n"
                          "adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n"       + \
                          "or specify '--buffer-element-size-irrelevant' to ignore the issue.", fh)

            if Setup.buffer_codec in ["utf16-le", "utf16-be"]:
                # Delete the forbidden interval: D800-DFFF
                if trigger_set.has_intersection(ForbiddenRange):
                    error_msg(
                        "Pattern contains characters in unicode range 0xD800-0xDFFF.\n"
                        "This range is not covered by UTF16. Cutting Interval.",
                        fh,
                        DontExitF=True)
                    trigger_set.cut_interval(ForbiddenRange)

            # If the operation resulted in cutting the path to the target state, then delete it.
            if trigger_set.is_empty():
                state.transitions().delete_transitions_to_target(
                    target_state_index)