def seal(self): if len(self.space_db) == 0 and len(self.grid_db) == 0: default_space = ord(' ') default_tab = ord('\t') bad = self.bad_character_set if bad.get().contains(default_space) == False: self.specify_space("[ ]", NumberSet(default_space), 1, self.fh) if bad.get().contains(default_tab) == False: self.specify_grid("[\\t]", NumberSet(default_tab), 4, self.fh) if len(self.space_db) == 0 and len(self.grid_db) == 0: error_msg( "No space or grid defined for indentation counting. Default\n" "values ' ' and '\\t' could not be used since they are specified as 'bad'.", bad.file_name, bad.line_n) if self.newline_state_machine.get() is None: sm = StateMachine() end_idx = sm.add_transition(sm.init_state_index, NumberSet(ord('\n')), AcceptanceF=True) mid_idx = sm.add_transition(sm.init_state_index, NumberSet(ord('\r')), AcceptanceF=False) sm.add_transition(mid_idx, NumberSet(ord('\n')), end_idx, AcceptanceF=False) self.specify_newline("(\\r\\n)|(\\n)", sm, self.fh)
def buffer_codec_prepare(self, BufferCodecName, BufferCodecFileName=None, Module=None): """Determines: Setup.buffer_codec_name Setup.buffer_codec """ if BufferCodecName in ("utf8", "utf16"): assert Module is not None result = codec_db.CodecDynamicInfo(BufferCodecName, Module) elif BufferCodecFileName: os.path.splitext(os.path.basename(BufferCodecFileName)) try: os.path.splitext(os.path.basename(BufferCodecFileName)) except: file_in.error_msg("cannot interpret string following '--codec-file'") result = codec_db.CodecTransformationInfo(FileName=BufferCodecFileName) elif BufferCodecName == "unicode": # (Still, 'icu' or 'iconv' may provide converted content, but ...) # If the internal buffer is 'unicode', then the pattern's state # machines are not converted. The requirement for the pattern's # range is the same as for the 'buffer element chunks'. result = codec_db.CodecInfo("unicode", NumberSet.from_range(0, self.get_character_value_limit()), NumberSet.from_range(0, self.get_character_value_limit())) elif BufferCodecName == "unit-test": result = codec_db.CodecInfo("unicode", NumberSet.from_range(-sys.maxint, sys.maxint), NumberSet.from_range(-sys.maxint, sys.maxint)) else: result = codec_db.CodecTransformationInfo(BufferCodecName) self.buffer_codec = result
def __sm_newline_default(self): """Default newline: '(\n)|(\r\n)' """ global cc_type_name_db newline_set = NumberSet(ord('\n')) retour_set = NumberSet(ord('\r')) before = self.count_command_map.find_occupier(newline_set, set()) if before is not None: error_msg("Trying to implement default newline: '\\n' or '\\r\\n'.\n" "The '\\n' option is not possible, since it has been occupied by '%s'.\n" \ "No newline can be defined by default." % cc_type_name_db[before.cc_type], before.sr, DontExitF=True, SuppressCode=NotificationDB.warning_default_newline_0A_impossible) # In this case, no newline can be defined! return sm = StateMachine.from_character_set(newline_set) if Setup.dos_carriage_return_newline_f: before = self.count_command_map.find_occupier(retour_set, set()) if before is not None: error_msg("Trying to implement default newline: '\\n' or '\\r\\n'.\n" "The '\\r\\n' option is not possible, since '\\r' has been occupied by '%s'." \ % cc_type_name_db[before.cc_type], before.sr, DontExitF=True, SuppressCode=NotificationDB.warning_default_newline_0D_impossible) else: sm.add_transition_sequence(sm.init_state_index, [retour_set, newline_set]) return sm
def add_transition(self, Trigger, TargetStateIdx): """Adds a transition according to trigger and target index. RETURNS: The target state index (may be created newly). """ assert type(TargetStateIdx) == long or TargetStateIdx is None assert Trigger.__class__ in [int, long, list, Interval, NumberSet ] or Trigger is None if Trigger is None: # This is a shorthand to trigger via the remaining triggers Trigger = self.get_trigger_set_union().inverse() elif type(Trigger) == long: Trigger = Interval(int(Trigger), int(Trigger + 1)) elif type(Trigger) == int: Trigger = Interval(Trigger, Trigger + 1) elif type(Trigger) == list: Trigger = NumberSet(Trigger, ArgumentIsYoursF=True) if Trigger.__class__ == Interval: if self.__db.has_key(TargetStateIdx): self.__db[TargetStateIdx].add_interval(Trigger) else: self.__db[TargetStateIdx] = NumberSet(Trigger, ArgumentIsYoursF=True) else: if self.__db.has_key(TargetStateIdx): self.__db[TargetStateIdx].unite_with(Trigger) else: self.__db[TargetStateIdx] = Trigger return TargetStateIdx
def __indentation_add(Info): # (0) If all involved counts are single spaces, the 'counting' can be done # easily by subtracting 'end - begin', no adaption. indent_txt = " " * 16 if Info.has_only_single_spaces(): return "" def __do(txt, CharSet, Operation): txt.append(indent_txt + "if( ") __condition(txt, CharSet) txt.append(" ) { ") txt.append(Operation) txt.append(" }\\\n") txt = [] spaces_db = {} # Sort same space counts together grid_db = {} # Sort same grid counts together for name, count_parameter in Info.count_db.items(): count = count_parameter.get() character_set = Info.character_set_db[name].get() if count == "bad": continue # grid counts are indicated by negative integer for count. if count >= 0: spaces_db.setdefault(count, NumberSet()).unite_with(character_set) else: grid_db.setdefault(count, NumberSet()).unite_with(character_set) for count, character_set in spaces_db.items(): __do(txt, character_set, "(I) += %i;" % count) for count, character_set in grid_db.items(): __do(txt, character_set, "(I) += (%i - ((I) %% %i));" % (abs(count), abs(count))) return "".join(txt)
def get_supported_unicode_character_set(CodecAlias=None, FileName=None, FH=-1, LineN=None): assert CodecAlias is not None or FileName is not None mapping_list = get_codec_transformation_info(CodecAlias, FileName, FH, LineN) result = NumberSet() for source_begin, source_end, target_begin in mapping_list: result.add_interval(Interval(source_begin, source_end)) return result
def load_UnicodeData(self): fh = open_data_base_file("UnicodeData.txt") # some rows contain aliases, so they need to get converted into values property_general_category = self.db["gc"] property_bidi_class = self.db["bc"] def convert(Property, ValueAlias): """Convert specified ValueAlias to Value of the given property.""" if Property.alias_to_name_map.has_key(ValueAlias): return Property.alias_to_name_map[ValueAlias] return ValueAlias names_db = {} general_category_db = {} bidi_class_db = {} numeric_value_db = {} names_uc1_db = {} iso_comment_db = {} for line in fh.readlines(): if line.find("#") != -1: line = line[:line.find("#")] if line == "" or line.isspace(): continue x = line.split(";") code_point = int("0x" + x[0].strip(), 16) # CodePointIdx = 0 name = x[1].strip().replace(" ", "_") # NameIdx = 1 general_category = x[2].strip().replace( " ", "_") # GeneralCategoryIdx = 2 general_category = convert(property_general_category, general_category) bidi_class = x[4].strip().replace(" ", "_") # BidiClassIdx = 4 bidi_class = convert(property_bidi_class, bidi_class) numeric_value = x[6].strip() # NumericValueIdx = 6 uc1_name = x[10].strip().replace(" ", "_") # NameUC1Idx = 10 iso_comment = x[11].strip().replace(" ", "_") # ISO_CommentIdx = 11 names_db[name] = code_point general_category_db.setdefault( general_category, NumberSet()).quick_append_value(code_point) bidi_class_db.setdefault( bidi_class, NumberSet()).quick_append_value(code_point) numeric_value_db.setdefault( numeric_value, NumberSet()).quick_append_value(code_point) names_uc1_db[uc1_name] = code_point iso_comment_db[iso_comment] = str(code_point) self.db["na"].code_point_db = names_db # Name self.db["gc"].code_point_db = general_category_db # General Category self.db["bc"].code_point_db = bidi_class_db # BidiClass self.db["nv"].code_point_db = numeric_value_db # Numeric Value self.db["na1"].code_point_db = names_uc1_db # Name Unicode 1 self.db["isc"].code_point_db = iso_comment_db # ISO_Comment
def __get_remaining_set(self): ignored = (E_CharacterCountType.BAD, E_CharacterCountType.BEGIN_NEWLINE_SUPPRESSOR, E_CharacterCountType.BEGIN_NEWLINE, E_CharacterCountType.END_NEWLINE) result = NumberSet() for character_set, info in self.__map: if info.cc_type in ignored: continue result.unite_with(character_set) return result.get_complement(Setup.buffer_codec.source_set)
def load_Composition_Exclusion(self): # Column 0 contains what is interesting ... table = parse_table("CompositionExclusions.txt", NumberColumnList=[0]) number_set = NumberSet() for row in table: begin = row[0] number_set.quick_append_interval(Interval(begin, begin + 1)) number_set.clean() self.db["CE"].code_point_db = number_set
def indentation_count_character_set(self): """Returns the superset of all characters that are involved in indentation counting. That is the set of character that can appear between newline and the first non whitespace character. """ result = NumberSet() for character_set in self.space_db.values(): result.unite_with(character_set.get()) for character_set in self.grid_db.values(): result.unite_with(character_set.get()) return result
def create_ALL_BUT_NEWLINE_state_machine(): global Setup result = StateMachine() # NOTE: Buffer control characters are supposed to be filtered out by the code # generator. trigger_set = NumberSet(Interval(ord("\n")).inverse()) if Setup.get_character_value_limit() != sys.maxint: trigger_set.intersect_with(Interval(0, Setup.get_character_value_limit())) result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) return result
def __wildcard_value_match(self, WildCardValue): result = NumberSet() value_list = self.get_wildcard_value_matches(WildCardValue) if len(value_list) == 0: return None for value in value_list: result.unite_with(NumberSet(self.code_point_db[value])) # No decoupling, since result is computed each fresh and new return result
def create_ALL_BUT_NEWLINE_state_machine(stream): global Setup result = StateMachine() # NOTE: Buffer control characters are supposed to be filtered out by the code # generator. trigger_set = NumberSet(Interval(ord("\n"))).get_complement(Setup.buffer_codec.source_set) if trigger_set.is_empty(): error_msg("The set of admissible characters contains only newline.\n" "The '.' for 'all but newline' is an empty set.", SourceRef.from_FileHandle(stream)) result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) return result
class Tracker: def __init__(self): self.match_set = NumberSet() self.negation_f = False def consider_interval(self, Begin, End): if Begin > End: raise RegularExpressionException("Character range: '-' requires character with 'lower code' to preceed\n" + \ "found range '%s-%s' which corresponds to %i-%i as unicode code points." % \ (utf8.map_unicode_to_utf8(Begin), utf8.map_unicode_to_utf8(End), Begin, End)) self.match_set.add_interval(Interval(Begin, End)) def consider_letter(self, CharCode): self.consider_interval(CharCode, CharCode + 1)
class Tracker: def __init__(self): self.match_set = NumberSet() self.negation_f = False def consider_interval(self, Begin, End): if Begin > End: raise RegularExpressionException("Character range: '-' requires character with 'lower code' to preceed\n" + \ "found range '%s-%s' which corresponds to %i-%i as unicode code points." % \ (utf8.map_unicode_to_utf8(Begin), utf8.map_unicode_to_utf8(End), Begin, End)) self.match_set.add_interval(Interval(Begin, End)) def consider_letter(self, CharCode): self.consider_interval(CharCode, CharCode+1)
def create_ALL_BUT_NEWLINE_state_machine(): global Setup result = StateMachine() # NOTE: Buffer control characters are supposed to be filtered out by the code # generator. trigger_set = NumberSet(Interval(ord("\n")).inverse()) if Setup.get_character_value_limit() != sys.maxint: trigger_set.intersect_with( Interval(0, Setup.get_character_value_limit())) result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) return result
def get_trigger_set_to_target(self, TargetIdx): """Returns all triggers that lead to target 'TargetIdx'. If a trigger 'None' is returned it means that the epsilon transition triggers to target state. If the TargetIndex is omitted the set of all triggers, except the epsilon triggers, are returned. """ if self.__db.has_key(TargetIdx): return self.__db[TargetIdx] else: return NumberSet()
def CounterSetupLineColumn_Default(): global _CounterSetupLineColumn_Default if _CounterSetupLineColumn_Default is None: count_command_map = CountCmdMap() count_command_map.add(NumberSet(ord('\n')), "newline", 1, SourceRef_DEFAULT) count_command_map.add(NumberSet(ord('\t')), "grid", 4, SourceRef_DEFAULT) count_command_map.define_else("space", 1, SourceRef_DEFAULT) # Define: "\else" count_command_map.assign_else_count_command( Setup.buffer_codec.source_set.minimum(), Setup.buffer_codec.source_set.supremum(), # Apply: "\else" SourceRef_DEFAULT) _CounterSetupLineColumn_Default = ParserDataLineColumn(SourceRef_DEFAULT, count_command_map) return _CounterSetupLineColumn_Default
def get_newline_in_codec(TrafoInfo): """Translate the code for the newline character into the given codec by 'TrafoInfo'. RETURNS: None if the transformation is not possible. """ tmp = NumberSet(ord('\n')) if isinstance(TrafoInfo, (str, unicode)): if TrafoInfo == "utf8-state-split": pass elif TrafoInfo == "utf16-state-split": pass else: error_msg("Character encoding '%s' unknown to skipper.\n" % TrafoInfo + \ "For line number counting assume code of newline character code to be '0x%02X'." % ord('\n'), DontExitF=True) return ord('\n') tmp.transform(TrafoInfo) return tmp.get_the_only_element() # Returns 'None' if there is none
def is_DFA_compliant(self): """Checks if the current state transitions are DFA compliant, i.e. it investigates if trigger sets pointing to different targets intersect. RETURN: True => OK False => Same triggers point to different target. This cannot be part of a deterministic finite automaton (DFA). """ # DFA's do not have epsilon transitions if len(self.__epsilon_target_index_list) != 0: return False # check whether trigger sets intersect all_trigger_sets = NumberSet() for trigger_set in self.__db.values(): if all_trigger_sets.has_intersection(trigger_set): return False else: all_trigger_sets.unite_with(trigger_set) return True
def is_DFA_compliant(self): """Checks if the current state transitions are DFA compliant, i.e. it investigates if trigger sets pointing to different targets intersect. RETURNS: True => OK False => Same triggers point to different target. This cannot be part of a deterministic finite automaton (DFA). """ # DFA's do not have epsilon transitions if len(self.__epsilon_target_index_list) != 0: return False # check whether trigger sets intersect all_trigger_sets = NumberSet() for trigger_set in self.__db.itervalues(): if all_trigger_sets.has_intersection(trigger_set): return False else: all_trigger_sets.unite_with(trigger_set) return True
def __whitespace_default(self): """Try to define default whitespace ' ' or '\t' if their positions are not yet occupied in the count_command_map. """ cs0 = NumberSet(ord(" ")) cs1 = NumberSet(ord("\t")) result = NumberSet() if not self.count_command_map.find_occupier(cs0, set()): result.unite_with(cs0) if not self.count_command_map.find_occupier(cs1, set()): result.unite_with(cs1) if result.is_empty(): error_msg("Trying to implement default whitespace ' ' or '\\t' failed.\n" "Characters are occupied by other elements.", self.sr) return result
def get_all(): """RETURNS: A state machine that 'eats' absolutely everything, i.e. .--- \Any ---. | | (0)--- \Any --->(( 0 ))<--------' """ result = StateMachine() i = index.get() state = State(AcceptanceF=True) state.add_transition(NumberSet(Interval(-sys.maxint, sys.maxint)), i) result.states[i] = state result.get_init_state().add_transition( NumberSet(Interval(-sys.maxint, sys.maxint)), i) return result
def get_any(): """RETURNS: A state machine that 'eats' any character, but only one. (0)--- \Any --->(( 0 )) """ result = StateMachine() result.add_transition(result.init_state_index, NumberSet(Interval(-sys.maxint, sys.maxint)), AcceptanceF=True) return result
def __display_set(CharSet, cl): if Setup.query_numeric_f: display = "hex" else: display = "utf8" CharSet.intersect_with(NumberSet(Interval(0, 0x110000))) print "Characters:\n" if Setup.query_interval_f: __print_set_in_intervals(CharSet, display, 80) elif Setup.query_unicode_names_f: __print_set_character_names(CharSet, display, 80) else: __print_set_single_characters(CharSet, display, 80) print
def __display_set(CharSet, cl): if cl.search("--numeric"): display = "hex" else: display = "utf8" CharSet.intersect_with(NumberSet(Interval(0, 0x110000))) print "Characters:\n", if cl.search("--intervals"): __print_set_in_intervals(CharSet, display, 80) elif cl.search("--names"): __print_set_character_names(CharSet, display, 80) else: __print_set_single_characters(CharSet, display, 80) print
def do(section_list, fh): """Parses a codec information file. The described codec can only be a 'static character length' encoding. That is every character in the code occupies the same number of bytes. RETURNS: [0] Set of characters in unicode which are covered by the described codec. [1] Range of values in the codec elements. """ source_set = NumberSet() drain_set = NumberSet() error_str = None try: while error_str is None: skip_whitespace(fh) source_begin = read_integer(fh) if source_begin is None: error_str = "Missing integer (source interval begin) in codec file." continue skip_whitespace(fh) source_size = read_integer(fh) if source_size is None: error_str = "Missing integer (source interval size) in codec file." continue skip_whitespace(fh) target_begin = read_integer(fh) if target_begin is None: error_str = "Missing integer (target interval begin) in codec file." continue source_end = source_begin + source_size list.append(section_list, [source_begin, source_end, target_begin]) source_set.add_interval(Interval(source_begin, source_end)) drain_set.add_interval( Interval(target_begin, target_begin + source_size)) except EndOfStreamException: pass return source_set, drain_set, error_str
def __init__(self, fh=-1): self.fh = fh if fh != -1: self.file_name = fh.name self.line_n = get_current_line_info_number(fh) else: self.file_name = "no file handle" self.line_n = -1 self.space_db = {} # Maps: space width --> character_set self.grid_db = {} # Maps: grid width --> character_set self.bad_character_set = LocalizedParameter("bad", NumberSet()) self.newline_state_machine = LocalizedParameter("newline", None) self.newline_suppressor_state_machine = LocalizedParameter( "suppressor", None) self.__containing_mode_name = ""
def get_incidence_id_map(self, BeyondIncidenceId=None): """RETURNS: A list of pairs: (character_set, incidence_id) All same counting actions are referred to by the same incidence id. If BeyondIncidenceId is given, then the remaining set of characters is associated with 'BeyondIncidenceId'. """ result = [(x.character_set, x.incidence_id) for x in self.__map] if BeyondIncidenceId is None: return result all_set = NumberSet.from_union_of_iterable(x.character_set for x in self.__map) beyond_set = all_set.get_complement(Setup.buffer_codec.source_set) if not beyond_set.is_empty(): result.append((beyond_set, BeyondIncidenceId)) return result
def convert_table_to_associative_map(table, ValueColumnIdx, ValueType, KeyColumnIdx): """Produces a dictionary that maps from 'keys' to NumberSets. The number sets represent the code points for which the key (property) is valid. ValueColumnIdx: Column that contains the character code interval or string to which one wishes to map. KeyColmnIdx: Column that contains the 'key' to be used for the map self.db = database to contain the associative map. """ db = {} if ValueType == "NumberSet": for record in table: key = record[KeyColumnIdx].strip() key = key.replace(" ", "_") value = record[ValueColumnIdx] if type(value) == int: value = Interval(value) db.setdefault(key, NumberSet()).quick_append_interval(value, SortF=False) elif ValueType == "number" or ValueType == "string": for record in table: key = record[KeyColumnIdx].strip() key = key.replace(" ", "_") value = record[ValueColumnIdx] db[key] = value else: raise BaseException("ValueType = '%s' unknown.\n" % ValueType) # if the content was a number set, it might be simplified, try it. if ValueType == "NumberSet": for key, number_set in db.items(): number_set.clean() return db
def do(SM): """RETURNS: A state machines that matches anything which is not matched by SM. Idea: The paths along SM do not guide to acceptance states, but to normal states. Any drop-out is translated into a transition into the 'accept all state'. NOTE: This function produces a finite state automaton which is not applicable by itself. It would eat ANYTHING from a certain state on. """ result = deepcopy(SM) # Not clone accept_all_state_index = index.get() state = State(AcceptanceF=True) state.add_transition(NumberSet(Interval(-sys.maxint, sys.maxint)), accept_all_state_index) result.states[accept_all_state_index] = state def is_accept_all_state(sm, StateIndex): state = sm.states[StateIndex] if not state.is_acceptance(): return False tm = state.target_map.get_map() if len(tm) != 1: return False elif tm.iterkeys().next() != StateIndex: return False elif not tm.itervalues().next().is_all(): return False # Target is an 'Accept-All' state. Delete the transition. return True for state_index, state in SM.states.iteritems(): # deepcopy --> use same state indices in SM and result result_state = result.states[state_index] assert state.target_map.is_DFA_compliant(), \ "State machine must be transformed to DFA first: nfa_to_dfa.do()" # -- Every transition to 'Accept-All' state becomes a drop-out. for target_index in ( i for i in state.target_map.get_target_state_index_list() if is_accept_all_state(SM, i)): result_state.target_map.delete_transitions_to_target(target_index) # -- Every drop-out becomes a transition to 'Accept-All' state. trigger_set = state.target_map.get_trigger_set_union() inverse_trigger_set = trigger_set.get_complement( Setup.buffer_codec.source_set) if not inverse_trigger_set.is_empty(): result_state.add_transition(inverse_trigger_set, accept_all_state_index) # Every acceptance state becomes a non-acceptance state. # Every non-acceptance state becomes an acceptance state. for state_index, state in SM.states.iteritems(): if state.is_acceptance(): result.states[state_index].set_acceptance(False) elif state_index != SM.init_state_index: result.states[state_index].set_acceptance(True) result.clean_up() return result.clone()
def get_unicode_range(): return NumberSet.from_range(0, 0x110000)
def get_codec_element_range(): """Codec element's size is 2 bytes.""" return NumberSet.from_range(0, 0x10000)
def get_character_set(self, Value=None): """Returns the character set that corresponds to 'Property==Value'. 'Value' can be a property value or a property value alias. For binary properties 'Value' must be None. """ assert self.type != "Binary" or Value is None def get_value_combination(CmbAlias): result = [] for alias in self.alias_to_alias_combination_db[CmbAlias]: name = self.alias_to_name_map.get(alias) if name is None: return "Unicode database error: no name related to alias '%s'" % alias result.append(name) return result if self.type != "Binary" and Value is None: return "Property '%s' requires a value setting.\n" % self.name + \ "Possible Values: " + \ self.get_value_list_help() if self.code_point_db is None: self.init_code_point_db() if self.type == "Binary": # Decouple, since we refer to an internal database return deepcopy(self.code_point_db) adapted_value = Value.replace(" ", "_") if self.code_point_db.has_key(adapted_value): # 'value' is present as name in the code point database value = adapted_value elif Value in self.alias_to_name_map.keys(): # 'value' is present as alias in code pointer database value = self.alias_to_name_map[adapted_value] elif Value in self.alias_to_alias_combination_db.keys(): # 'value' is present as a combination of aliases value = get_value_combination(adapted_value) elif self.name_to_alias_map.has_key(adapted_value): # The value was a combination of values value = get_value_combination(self.name_to_alias_map[adapted_value]) else: # -- WILDCARD MATCH: Results in a list of property values character_set = self.__wildcard_value_match(adapted_value) if character_set is None: return "Property '%s' cannot have a value or value alias '%s'.\n" % (self.name, Value) + \ "Possible Values: " + \ self.get_value_list_help() # No need to decouple, since character is not a reference to # internal database (for safety, do it) return deepcopy(character_set) if type(value) == list: result = NumberSet() for element in value: if element == "Unassigned": continue entry = self.code_point_db.get(element) if entry is None: return "%s/%s is not supported by Unicode database." % (self.name, repr(element)) result.unite_with(entry) else: result = self.code_point_db.get(value) if result is None: return "%s/%s is not supported by Unicode database." % (self.name, repr(value)) # Reference to internal database --> decouple with 'deepcopy' return deepcopy(result)
def __init__(self): self.match_set = NumberSet() self.negation_f = False
def set_all_character_set_UNIT_TEST(self, Begin, End): self.buffer_codec.source_set = NumberSet.from_range(Begin, End)
def do(DB): combined = NumberSet() for number_set in DB.itervalues(): assert not number_set.has_intersection(combined) combined.unite_with(number_set)
def get_trigger_set_union(self): result = NumberSet() for trigger_set in self.__db.itervalues(): result.unite_with(trigger_set) return result
# mode_db: storing the mode information into a dictionary: # key = mode name # item = Mode object #----------------------------------------------------------------------------------------- mode_db = {} #----------------------------------------------------------------------------------------- # Counter Settings (Default) # Default_NewlineCharDB = { 1: NumberSet([ Interval(0x0A), # Line Feed Interval(0x0B), # Vertical Tab Interval(0x0C), # Form Feed # 0x0D --> set to '0' newlines, see below Interval(0x85), # Next Line Interval(0x2028), # Line Separator Interval(0x2029) ]), # Paragraph Separator 0: NumberSet(Interval(0x0D)), # Carriage Return # # DOS/Windows: 0x0D, 0x0A --> 1 newline } Default_GridCharDB = { 4: NumberSet(ord('\t')) # Tabulator: Grid of 4 columns } Default_SpecialCharDB = { # # Special character sizes are font dependent. # # No assumptions made by default. }