Exemple #1
0
    def __init__(self, logger_client, char_data=None):
        if char_data is None:
            from char_data.CharData import CharData
            char_data = CharData()

        self.char_indexes = CharIndexes(char_data=char_data)
        ServerMethodsBase.__init__(self, logger_client)
Exemple #2
0
    def __get_by_L_block_headings(self, key, value, LUseOnly=None):
        if self.char_indexes is None:
            from char_data.CharIndexes import CharIndexes
            self.char_indexes = CharIndexes(char_data=self.char_data)

        r = self.block_headings.get_L_block_headings(
            self.char_indexes.search(key, value))[-1]

        if LUseOnly:
            return self.__filter_to_only(r, LUseOnly)
        else:
            return r
    def group_by_alphabet(self, search, char_indexes=None):
        """
        The headings are actually provided by the CLDR data directly,
        so if using the alphabet key, grab directly from the original source!
        """
        if char_indexes is None:
            from char_data.CharIndexes import CharIndexes
            char_indexes = CharIndexes(char_data=self)

        lang_data = LangData(search)
        script = ISOTools.split(ISOTools.guess_omitted_info(search)).script

        LRtn = []
        for heading, ranges_string in lang_data.get_L_alpha():
            LOut = []
            for i_s in UnicodeSet(self, char_indexes, ranges_string):
                LOut.extend([ord(i) for i in i_s])
            # LRtn.extend(LOut)

            LRtn.append(('block', (heading, '')))
            LRtn.append(('chars', LOut))
            # LRtn.append((heading, LOut))

        for typ1, typ2, i_L in lang_data.get_L_symbols():
            for heading, chars in i_L:
                if typ2:
                    # ??? What does typ1/typ2 do again??
                    heading = '%s %s' % (typ2, heading)

                if typ1:
                    heading = '%s %s' % (typ1, heading)

                if heading.startswith("latn") and script != 'Latn':
                    # Ignore Latin perMille etc for langauge
                    # which don't use Latin scripts
                    continue

                if heading.startswith('arab') and script != 'Arab':
                    # Ignore arabic group, etc for languages
                    # which don't use the Arabic script
                    continue

                LExtend = [ord(i) for i in chars]
                LRtn.append(('block', (heading, '')))
                LRtn.append(('chars', LExtend))
                # LRtn.extend(LExtend)
                # LRtn.append((heading, LExtend))

        # from pprint import pprint
        # pprint(LRtn)

        # lang_data.get_currency_symbol()
        # lang_data.locale_pattern()
        # lang_data.ellipsis()
        # lang_data.quotes('')
        # lang_data.paranthesis('')
        return LRtn
Exemple #4
0
    def get_key_info(self, key):
        inst = self.get_class_by_property(key)

        if inst.index:
            if not self.char_indexes:
                from char_data.CharIndexes import CharIndexes
                self.char_indexes = CharIndexes(char_data=self)

            try:
                ciki = self.char_indexes.get_key_info(key)
            except KeyError:
                ciki = None
        else:
            ciki = None

        source = inst.parent.key  # HACK!

        return CharDataKeyInfo(key, inst.key, inst.short_desc,
                               inst.header_const, source, ciki)
Exemple #5
0
class CharIndexServer(ServerMethodsBase):
    port = 40518
    name = 'char_idx'

    def __init__(self, logger_client, char_data=None):
        if char_data is None:
            from char_data.CharData import CharData
            char_data = CharData()

        self.char_indexes = CharIndexes(char_data=char_data)
        ServerMethodsBase.__init__(self, logger_client)

    @json_method
    def search(self, key: str, value: str, args, kw):
        r = self.char_indexes.search(key, value, *args, **kw)
        if r.__class__.__name__ == 'NumArray':
            r = [r[i] for i in range(len(r))]
        return r

    @json_method
    def keys(self):
        return self.char_indexes.keys()

    @json_method
    def get_key_info(self, key: str):
        key_info = self.char_indexes.get_key_info(key)
        if key_info:
            return key_info.to_tuple()
        else:
            return None

    @json_method
    def values(self, key: str):
        return self.char_indexes.values(key)

    @json_method
    def get_value_info(self, key: str, value):
        value_info = self.char_indexes.get_value_info(key, value)
        if value_info:
            return value_info.to_tuple()
        else:
            return None
Exemple #6
0
        comment = ''
        if tech_notice and see_also:
            comment = '%s. %s' % (tech_notice, see_also)
        elif tech_notice:
            comment = tech_notice
        elif see_also:
            comment = see_also

        if comment:
            LRtn.append(('sub_block', ['. '.join(sub_block), comment]))
        else:
            LRtn.append(('sub_block', ['. '.join(sub_block), None]))


if __name__ == '__main__':
    from char_data.CharData import CharData
    from char_data.CharIndexes import CharIndexes

    char_data = CharData()
    char_indexes = CharIndexes(char_data=char_data)

    from pprint import pprint
    pprint(list(char_indexes.keys()))
    pprint(list(char_data.keys()))

    block_headings = BlockHeadings(char_data=char_data)
    pprint(
        BlockHeadings(char_data,
                      char_indexes.search('unicodedata.script', 'Arabic')))
Exemple #7
0
class CharData(
        PropertyAccessBase,
        DataReader,
        HeadingGrouperBase,
        CharDataBase,
        Singleton,
):
    def __init__(self):
        """
        A class that allows looking up information
        about given Unicode codepoints.

        For example, `raw_data('a', 'name')` will give
        `('LATIN SMALL LETTER A',)`.
        """
        self.data_reader = None
        self.char_indexes = None

        DataReader.__init__(self)
        PropertyAccessBase.__init__(self, self)

        from char_data.run_after_loaded import run_all
        run_all()  # HACK!

    def __hasattr__(self, item):
        if self.data_reader:
            return hasattr(self.data_reader, item) or \
                   item in self.__dict__
        else:
            return hasattr(self, item)

    def __getattr__(self, item):
        if 'data_reader' in self.__dict__:
            return getattr(self.data_reader, item)
        raise AttributeError

    @copydoc(CharDataBase.get_data_sources)
    def get_data_sources(self):
        return [i[0] for i in self.LData]

    #=============================================================#
    #                  Get Character Data Keys                    #
    #=============================================================#

    @copydoc(CharDataBase.keys)
    def keys(self, data_source=None):
        LRtn = []

        for key, _ in self.LData:
            if data_source and data_source != key:
                continue
            o = getattr(self, key)

            for property in dir(o):
                i_o = getattr(o, property)
                if not isinstance(i_o, PropertyFormatterBase):
                    continue

                LRtn.append('%s.%s' % (key, property))

        return sorted(LRtn)

    @copydoc(CharDataBase.get_key_info)
    def get_key_info(self, key):
        inst = self.get_class_by_property(key)

        if inst.index:
            if not self.char_indexes:
                from char_data.CharIndexes import CharIndexes
                self.char_indexes = CharIndexes(char_data=self)

            try:
                ciki = self.char_indexes.get_key_info(key)
            except KeyError:
                ciki = None
        else:
            ciki = None

        source = inst.parent.key  # HACK!

        return CharDataKeyInfo(key, inst.key, inst.short_desc,
                               inst.header_const, source, ciki)

    #=============================================================#
    #                   Get Two-Level Mappings                    #
    #=============================================================#

    @copydoc(CharDataBase.get_two_level_mapping)
    def get_two_level_mapping(self, key):
        return DTwoLevelMappings.get(key, None)

    #=============================================================#
    #                     Get Character Data                      #
    #=============================================================#

    @copydoc(CharDataBase.get_all_data_for_codepoint)
    def get_all_data_for_codepoint(self, ord_):
        ord_ = int(ord_)
        DData = {}

        for key in list(self.keys()):
            inst = self.get_class_by_property(key)
            short_desc = inst.short_desc

            # Get the raw value, to allow linking (if relevant)
            if inst.index and inst.index.typ != 'fulltext':
                raw_data = inst.raw_data(ord_)
                if not isinstance(raw_data, str):
                    # FIXME: Add support for non-string values!
                    raw_data = None
            else:
                raw_data = None

            html_value = inst.html_formatted(ord_)
            # print('get_property_table:', key, short_desc, html_value, raw_data)

            if html_value is not None:
                DData.setdefault(inst.header_const, []).append(
                    (key, short_desc, html_value, raw_data))

        for k in DData:
            DData[k].sort(key=lambda i: i[1])

        # Add a "Code Point" header
        # hex_val = hex(ord_)[2:].upper()
        # hex_val = (
        #    hex_val.zfill(4) if len(hex_val) <= 4 else hex_val
        # )
        # DData[6.5] = [
        # HACK: Make it so that it's after scripts/blocks
        #    ('Unicode', 'Unicode', f'U+{hex_val}', None),
        #    ('XML/HTML', 'XML/HTML', f'&amp;#{ord_};', None)
        # ]
        # DHeaders[6.5] = 'Code Point'

        # Make sorted/output by header order
        append_LData = []
        for k in sorted(DData.keys(), key=lambda x: 65535 if x is None else x):
            i_LData = DData[k]
            append_LData.append((k, i_LData))
        return append_LData

    @copydoc(CharDataBase.raw_data)
    def raw_data(self, key, ord_):
        if isinstance(ord_, str):
            ord_ = w_ord(ord_)

        inst = self.get_class_by_property(key)
        return inst.raw_data(ord_)

    @copydoc(CharDataBase.formatted)
    def formatted(self, key, ord_):
        if isinstance(ord_, str):
            ord_ = w_ord(ord_)

        inst = self.get_class_by_property(key)
        return inst.formatted(ord_)

    @copydoc(CharDataBase.html_formatted)
    def html_formatted(self, key, ord_):
        if isinstance(ord_, str):
            ord_ = w_ord(ord_)

        inst = self.get_class_by_property(key)
        return inst.html_formatted(ord_)
Exemple #8
0
class ReformattedData(ExternalSourceBase):
    def __init__(self, char_data, char_indexes=None):
        def get_filter_fn(L):
            def fn():
                if not hasattr(self, 'LCommon'):
                    self.LCommon = self.__get_by_L_block_headings(
                        'unicodedata.script', 'Common')

                LRtn = []
                for heading, LUseOnly in L:
                    LRtn.append([
                        heading,
                        self.__filter_to_only(self.LCommon, LUseOnly=LUseOnly)
                    ])
                return LRtn

            return fn

        self.emoji_and_other_symbols = ReformatDataFormatter(
            self,
            header_const=HEADER_OTHER_SYMBOLS,
            original_name='emoji and other symbols',
            short_desc="Emoji and Other Symbols",
            # Note that DCommonMappings is a SortedDict, so the keys are in order as defined in DCommonMappings.py
            get_L_data=get_filter_fn([mapping, DCommonMappings[mapping]]
                                     for mapping in DCommonMappings.keys()))

        self.inherited = ReformatDataFormatter(
            self,
            header_const=HEADER_OTHER_SYMBOLS,
            original_name='inherited',
            short_desc='Inherited Combining Characters (etc)',
            get_L_data=lambda: [[
                'Inherited Combining Characters (etc)',
                self.__get_by_L_block_headings('unicodedata.script',
                                               'Inherited')
            ]])

        #self.ipa = ReformatData(
        #    self, header_const=HEADER_FIXME, original_name='ipa', short_desc,
        #    LData=self.__get_by_L_block_headings(FIXME)
        #)

        ExternalSourceBase.__init__(self, char_data, 'reformatted')

        self.char_data = char_data
        self.char_indexes = char_indexes
        self.block_headings = BlockHeadings(char_data=char_data)

    def __get_by_L_block_headings(self, key, value, LUseOnly=None):
        if self.char_indexes is None:
            from char_data.CharIndexes import CharIndexes
            self.char_indexes = CharIndexes(char_data=self.char_data)

        r = self.block_headings.get_L_block_headings(
            self.char_indexes.search(key, value))[-1]

        if LUseOnly:
            return self.__filter_to_only(r, LUseOnly)
        else:
            return r

    def __filter_to_only(self, L, LUseOnly):
        LOut = []

        cur_block_item = None
        use_chars = False

        for item in L:
            if item[0] == 'block':
                cur_block_item = item

                if cur_block_item[1][0] in LUseOnly:
                    LOut.append(cur_block_item)
                    use_chars = True
                else:
                    use_chars = False

            elif item[0] == 'sub_block':
                assert cur_block_item

                if use_chars:
                    LOut.append(item)

            elif item[0] == 'chars':
                if use_chars:
                    LOut.append(item)

            else:
                raise Exception("Unknown value type: %s" % item[0])

        return LOut