def create_ALL_BUT_NEWLINE_state_machine(): global Setup result = StateMachine() # NOTE: Buffer control characters are supposed to be filtered out by the code # generator. trigger_set = NumberSet(Interval(ord("\n")).inverse()) if Setup.get_character_value_limit() != sys.maxint: trigger_set.intersect_with(Interval(0, Setup.get_character_value_limit())) result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) return result
def create_ALL_BUT_NEWLINE_state_machine(): global Setup result = StateMachine() # NOTE: Buffer control characters are supposed to be filtered out by the code # generator. trigger_set = NumberSet(Interval(ord("\n")).inverse()) if Setup.get_character_value_limit() != sys.maxint: trigger_set.intersect_with( Interval(0, Setup.get_character_value_limit())) result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) return result
def prune_range(TriggerMap): """Consider the 'useful range' starting from zero. Thus, the first interval to be considered is the first that intersects with 0. Then 'begin' must become '0' instead of a negative value. """ LowerLimit = 0 UpperLimit = Setup.get_character_value_limit() # Iterate from 'low' to 'high' for i, info in enumerate(TriggerMap): interval, target = info if interval.end <= LowerLimit: continue # Found an interval that intersects with 'LowerLimit' line interval.begin = LowerLimit if i != 0: del TriggerMap[:i] return L = len(TriggerMap) # Iterate from 'high' to 'low' for ii, info in enumerate(reversed(TriggerMap)): interval, target = info if interval.begin > UpperLimit: continue # Found an interval that intersects with 'UpperLimit' line interval.end = UpperLimit if ii != 0: i = L - ii - 1 # index = Size - inverse index - 1 del TriggerMap[i+1:] return # The whole trigger map happens below 0. This is trash, no doubt! assert False
def prune_range(TriggerMap): """Consider the 'useful range' starting from zero. Thus, the first interval to be considered is the first that intersects with 0. Then 'begin' must become '0' instead of a negative value. """ LowerLimit = 0 UpperLimit = Setup.get_character_value_limit() # Iterate from 'low' to 'high' for i, info in enumerate(TriggerMap): interval, target = info if interval.end <= LowerLimit: continue # Found an interval that intersects with 'LowerLimit' line interval.begin = LowerLimit if i != 0: del TriggerMap[:i] return L = len(TriggerMap) # Iterate from 'high' to 'low' for ii, info in enumerate(reversed(TriggerMap)): interval, target = info if interval.begin > UpperLimit: continue # Found an interval that intersects with 'UpperLimit' line interval.end = UpperLimit if ii != 0: i = L - ii - 1 # index = Size - inverse index - 1 del TriggerMap[i+1:] return # The whole trigger map happens below 0. This is trash, no doubt! assert False
def __delete_forbidden_ranges(sm, fh): """Unicode does define all code points >= 0. Thus there can be no code points below zero as it might result from some number set operations. NOTE: This operation might result in orphaned states that have to be deleted. """ global Setup character_value_limit = Setup.get_character_value_limit() for state in sm.states.values(): for target_state_index, trigger_set in state.transitions().get_map().items(): # Make sure, all transitions lie inside the unicode code range if trigger_set.minimum() < UnicodeInterval.begin or trigger_set.supremum() >= UnicodeInterval.end: trigger_set.intersect_with(UnicodeInterval) if trigger_set.supremum() > character_value_limit: error_msg("Pattern contains character beyond the scope of the buffer element size (%s)\n" \ % Setup.get_character_value_limit_str() + \ "Please, cut the character range of the regular expression,\n" "adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n" + \ "or specify '--buffer-element-size-irrelevant' to ignore the issue.", fh) if Setup.buffer_codec in ["utf16-le", "utf16-be"]: # Delete the forbidden interval: D800-DFFF if trigger_set.has_intersection(ForbiddenRange): error_msg("Pattern contains characters in unicode range 0xD800-0xDFFF.\n" "This range is not covered by UTF16. Cutting Interval.", fh, DontExitF=True) trigger_set.cut_interval(ForbiddenRange) # If the operation resulted in cutting the path to the target state, then delete it. if trigger_set.is_empty(): state.transitions().delete_transitions_to_target(target_state_index)
def __delete_forbidden_ranges(sm, fh): """Unicode does define all code points >= 0. Thus there can be no code points below zero as it might result from some number set operations. NOTE: This operation might result in orphaned states that have to be deleted. """ global Setup character_value_limit = Setup.get_character_value_limit() for state in sm.states.values(): for target_state_index, trigger_set in state.transitions().get_map( ).items(): # Make sure, all transitions lie inside the unicode code range if trigger_set.minimum( ) < UnicodeInterval.begin or trigger_set.supremum( ) >= UnicodeInterval.end: trigger_set.intersect_with(UnicodeInterval) if trigger_set.supremum() > character_value_limit: error_msg("Pattern contains character beyond the scope of the buffer element size (%s)\n" \ % Setup.get_character_value_limit_str() + \ "Please, cut the character range of the regular expression,\n" "adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n" + \ "or specify '--buffer-element-size-irrelevant' to ignore the issue.", fh) if Setup.buffer_codec in ["utf16-le", "utf16-be"]: # Delete the forbidden interval: D800-DFFF if trigger_set.has_intersection(ForbiddenRange): error_msg( "Pattern contains characters in unicode range 0xD800-0xDFFF.\n" "This range is not covered by UTF16. Cutting Interval.", fh, DontExitF=True) trigger_set.cut_interval(ForbiddenRange) # If the operation resulted in cutting the path to the target state, then delete it. if trigger_set.is_empty(): state.transitions().delete_transitions_to_target( target_state_index)