def __init__(self, logger_client, char_data=None): if char_data is None: from char_data.CharData import CharData char_data = CharData() self.char_indexes = CharIndexes(char_data=char_data) ServerMethodsBase.__init__(self, logger_client)
def __get_by_L_block_headings(self, key, value, LUseOnly=None): if self.char_indexes is None: from char_data.CharIndexes import CharIndexes self.char_indexes = CharIndexes(char_data=self.char_data) r = self.block_headings.get_L_block_headings( self.char_indexes.search(key, value))[-1] if LUseOnly: return self.__filter_to_only(r, LUseOnly) else: return r
def group_by_alphabet(self, search, char_indexes=None): """ The headings are actually provided by the CLDR data directly, so if using the alphabet key, grab directly from the original source! """ if char_indexes is None: from char_data.CharIndexes import CharIndexes char_indexes = CharIndexes(char_data=self) lang_data = LangData(search) script = ISOTools.split(ISOTools.guess_omitted_info(search)).script LRtn = [] for heading, ranges_string in lang_data.get_L_alpha(): LOut = [] for i_s in UnicodeSet(self, char_indexes, ranges_string): LOut.extend([ord(i) for i in i_s]) # LRtn.extend(LOut) LRtn.append(('block', (heading, ''))) LRtn.append(('chars', LOut)) # LRtn.append((heading, LOut)) for typ1, typ2, i_L in lang_data.get_L_symbols(): for heading, chars in i_L: if typ2: # ??? What does typ1/typ2 do again?? heading = '%s %s' % (typ2, heading) if typ1: heading = '%s %s' % (typ1, heading) if heading.startswith("latn") and script != 'Latn': # Ignore Latin perMille etc for langauge # which don't use Latin scripts continue if heading.startswith('arab') and script != 'Arab': # Ignore arabic group, etc for languages # which don't use the Arabic script continue LExtend = [ord(i) for i in chars] LRtn.append(('block', (heading, ''))) LRtn.append(('chars', LExtend)) # LRtn.extend(LExtend) # LRtn.append((heading, LExtend)) # from pprint import pprint # pprint(LRtn) # lang_data.get_currency_symbol() # lang_data.locale_pattern() # lang_data.ellipsis() # lang_data.quotes('') # lang_data.paranthesis('') return LRtn
def get_key_info(self, key): inst = self.get_class_by_property(key) if inst.index: if not self.char_indexes: from char_data.CharIndexes import CharIndexes self.char_indexes = CharIndexes(char_data=self) try: ciki = self.char_indexes.get_key_info(key) except KeyError: ciki = None else: ciki = None source = inst.parent.key # HACK! return CharDataKeyInfo(key, inst.key, inst.short_desc, inst.header_const, source, ciki)
class CharIndexServer(ServerMethodsBase): port = 40518 name = 'char_idx' def __init__(self, logger_client, char_data=None): if char_data is None: from char_data.CharData import CharData char_data = CharData() self.char_indexes = CharIndexes(char_data=char_data) ServerMethodsBase.__init__(self, logger_client) @json_method def search(self, key: str, value: str, args, kw): r = self.char_indexes.search(key, value, *args, **kw) if r.__class__.__name__ == 'NumArray': r = [r[i] for i in range(len(r))] return r @json_method def keys(self): return self.char_indexes.keys() @json_method def get_key_info(self, key: str): key_info = self.char_indexes.get_key_info(key) if key_info: return key_info.to_tuple() else: return None @json_method def values(self, key: str): return self.char_indexes.values(key) @json_method def get_value_info(self, key: str, value): value_info = self.char_indexes.get_value_info(key, value) if value_info: return value_info.to_tuple() else: return None
comment = '' if tech_notice and see_also: comment = '%s. %s' % (tech_notice, see_also) elif tech_notice: comment = tech_notice elif see_also: comment = see_also if comment: LRtn.append(('sub_block', ['. '.join(sub_block), comment])) else: LRtn.append(('sub_block', ['. '.join(sub_block), None])) if __name__ == '__main__': from char_data.CharData import CharData from char_data.CharIndexes import CharIndexes char_data = CharData() char_indexes = CharIndexes(char_data=char_data) from pprint import pprint pprint(list(char_indexes.keys())) pprint(list(char_data.keys())) block_headings = BlockHeadings(char_data=char_data) pprint( BlockHeadings(char_data, char_indexes.search('unicodedata.script', 'Arabic')))
class CharData( PropertyAccessBase, DataReader, HeadingGrouperBase, CharDataBase, Singleton, ): def __init__(self): """ A class that allows looking up information about given Unicode codepoints. For example, `raw_data('a', 'name')` will give `('LATIN SMALL LETTER A',)`. """ self.data_reader = None self.char_indexes = None DataReader.__init__(self) PropertyAccessBase.__init__(self, self) from char_data.run_after_loaded import run_all run_all() # HACK! def __hasattr__(self, item): if self.data_reader: return hasattr(self.data_reader, item) or \ item in self.__dict__ else: return hasattr(self, item) def __getattr__(self, item): if 'data_reader' in self.__dict__: return getattr(self.data_reader, item) raise AttributeError @copydoc(CharDataBase.get_data_sources) def get_data_sources(self): return [i[0] for i in self.LData] #=============================================================# # Get Character Data Keys # #=============================================================# @copydoc(CharDataBase.keys) def keys(self, data_source=None): LRtn = [] for key, _ in self.LData: if data_source and data_source != key: continue o = getattr(self, key) for property in dir(o): i_o = getattr(o, property) if not isinstance(i_o, PropertyFormatterBase): continue LRtn.append('%s.%s' % (key, property)) return sorted(LRtn) @copydoc(CharDataBase.get_key_info) def get_key_info(self, key): inst = self.get_class_by_property(key) if inst.index: if not self.char_indexes: from char_data.CharIndexes import CharIndexes self.char_indexes = CharIndexes(char_data=self) try: ciki = self.char_indexes.get_key_info(key) except KeyError: ciki = None else: ciki = None source = inst.parent.key # HACK! return CharDataKeyInfo(key, inst.key, inst.short_desc, inst.header_const, source, ciki) #=============================================================# # Get Two-Level Mappings # #=============================================================# @copydoc(CharDataBase.get_two_level_mapping) def get_two_level_mapping(self, key): return DTwoLevelMappings.get(key, None) #=============================================================# # Get Character Data # #=============================================================# @copydoc(CharDataBase.get_all_data_for_codepoint) def get_all_data_for_codepoint(self, ord_): ord_ = int(ord_) DData = {} for key in list(self.keys()): inst = self.get_class_by_property(key) short_desc = inst.short_desc # Get the raw value, to allow linking (if relevant) if inst.index and inst.index.typ != 'fulltext': raw_data = inst.raw_data(ord_) if not isinstance(raw_data, str): # FIXME: Add support for non-string values! raw_data = None else: raw_data = None html_value = inst.html_formatted(ord_) # print('get_property_table:', key, short_desc, html_value, raw_data) if html_value is not None: DData.setdefault(inst.header_const, []).append( (key, short_desc, html_value, raw_data)) for k in DData: DData[k].sort(key=lambda i: i[1]) # Add a "Code Point" header # hex_val = hex(ord_)[2:].upper() # hex_val = ( # hex_val.zfill(4) if len(hex_val) <= 4 else hex_val # ) # DData[6.5] = [ # HACK: Make it so that it's after scripts/blocks # ('Unicode', 'Unicode', f'U+{hex_val}', None), # ('XML/HTML', 'XML/HTML', f'&#{ord_};', None) # ] # DHeaders[6.5] = 'Code Point' # Make sorted/output by header order append_LData = [] for k in sorted(DData.keys(), key=lambda x: 65535 if x is None else x): i_LData = DData[k] append_LData.append((k, i_LData)) return append_LData @copydoc(CharDataBase.raw_data) def raw_data(self, key, ord_): if isinstance(ord_, str): ord_ = w_ord(ord_) inst = self.get_class_by_property(key) return inst.raw_data(ord_) @copydoc(CharDataBase.formatted) def formatted(self, key, ord_): if isinstance(ord_, str): ord_ = w_ord(ord_) inst = self.get_class_by_property(key) return inst.formatted(ord_) @copydoc(CharDataBase.html_formatted) def html_formatted(self, key, ord_): if isinstance(ord_, str): ord_ = w_ord(ord_) inst = self.get_class_by_property(key) return inst.html_formatted(ord_)
class ReformattedData(ExternalSourceBase): def __init__(self, char_data, char_indexes=None): def get_filter_fn(L): def fn(): if not hasattr(self, 'LCommon'): self.LCommon = self.__get_by_L_block_headings( 'unicodedata.script', 'Common') LRtn = [] for heading, LUseOnly in L: LRtn.append([ heading, self.__filter_to_only(self.LCommon, LUseOnly=LUseOnly) ]) return LRtn return fn self.emoji_and_other_symbols = ReformatDataFormatter( self, header_const=HEADER_OTHER_SYMBOLS, original_name='emoji and other symbols', short_desc="Emoji and Other Symbols", # Note that DCommonMappings is a SortedDict, so the keys are in order as defined in DCommonMappings.py get_L_data=get_filter_fn([mapping, DCommonMappings[mapping]] for mapping in DCommonMappings.keys())) self.inherited = ReformatDataFormatter( self, header_const=HEADER_OTHER_SYMBOLS, original_name='inherited', short_desc='Inherited Combining Characters (etc)', get_L_data=lambda: [[ 'Inherited Combining Characters (etc)', self.__get_by_L_block_headings('unicodedata.script', 'Inherited') ]]) #self.ipa = ReformatData( # self, header_const=HEADER_FIXME, original_name='ipa', short_desc, # LData=self.__get_by_L_block_headings(FIXME) #) ExternalSourceBase.__init__(self, char_data, 'reformatted') self.char_data = char_data self.char_indexes = char_indexes self.block_headings = BlockHeadings(char_data=char_data) def __get_by_L_block_headings(self, key, value, LUseOnly=None): if self.char_indexes is None: from char_data.CharIndexes import CharIndexes self.char_indexes = CharIndexes(char_data=self.char_data) r = self.block_headings.get_L_block_headings( self.char_indexes.search(key, value))[-1] if LUseOnly: return self.__filter_to_only(r, LUseOnly) else: return r def __filter_to_only(self, L, LUseOnly): LOut = [] cur_block_item = None use_chars = False for item in L: if item[0] == 'block': cur_block_item = item if cur_block_item[1][0] in LUseOnly: LOut.append(cur_block_item) use_chars = True else: use_chars = False elif item[0] == 'sub_block': assert cur_block_item if use_chars: LOut.append(item) elif item[0] == 'chars': if use_chars: LOut.append(item) else: raise Exception("Unknown value type: %s" % item[0]) return LOut