Python read_single_byteの例、ecma35.data.singlebyte.sbmapparsers.read_single_byte Pythonの例

コード例 #1

0

ファイルを表示

ファイル: indic.py プロジェクト: harjitmoe/ecma35lib

    0x0E53,
    0x0E54,
    0x0E55,
    0x0E56,
    0x0E57,
    0x0E58,
    0x0E59,
    0x0E5A,
    0x0E5B,
    None,
    None,
    None,
    None,
))

# Code pages 874 (TIS-620 exts)
# Per alias comments in ICU's convrtrs.txt, IBM's 874 is identical to IBM's 9066.
# Microsoft's 874, on the other hand, matches the layout of IBM's 1162.
graphdata.rhses["1162"] = parsers.read_single_byte(
    "WHATWG/index-windows-874.txt")
graphdata.rhses["9066"] = parsers.read_single_byte("ICU/ibm-874_P100-1995.ucm")
# The two only collide at 0xA0, which IBM uses for an alternate U+0E48 and which Microsoft
#   uses for an NBSP. Favour the more-deployed Microsoft / ISO-8859-11 NBSP for "874".
graphdata.rhses["874"] = tuple(
    a or b for a, b in zip(graphdata.rhses["1162"], graphdata.rhses["9066"]))
graphdata.defgsets["874"] = graphdata.defgsets["1162"] = ("ir006", "ir166",
                                                          "nil", "nil")

# Macintosh code page (doesn't have a Mozilla file)
#graphdata.rhses["10021"] = parsers.read_mozilla_ut_file("Mozilla/macthai.ut")

コード例 #2

0

ファイルを表示

             0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, 
             0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 
             0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 
             0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 
             0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 
             0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 
             0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 
             0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 
             0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, 
             0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 
             0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F))

# TODO: ir153 (Russian subset of ISO-8859-5), ir200 (Uralic Cyrillic), ir201 (Volgaic Cyrillic)

# Other KOI-8 encodings
graphdata.rhses["878"] = graphdata.rhses["20866"] = parsers.read_single_byte("WHATWG/index-koi8-r.txt")
# TODO: number 21866 is in reality used for both KOI8-U and KOI8-RU.
graphdata.rhses["1168"] = graphdata.rhses["21866"] = parsers.read_single_byte("WHATWG/index-koi8-u.txt")

# The Windows encoding
graphdata.rhses["1251"] = parsers.read_single_byte("WHATWG/index-windows-1251.txt")

# 10007/1283 is the original MacCyrillic; current MacCyrillic is a Euro update of 10017.
# Mappings to U+00A4 changed to U+20AC across the board, so number the current one 10017, and use
#   a version with that change but not the others for 10007/1283.
graphdata.rhses["10017"] = parsers.read_single_byte("WHATWG/index-x-mac-cyrillic.txt")
maccy = list(graphdata.rhses["10017"])
maccy[0x22] = (0x00A2,)
maccy[0x36] = (0x2202,)
graphdata.rhses["10007"] = graphdata.rhses["1283"] = tuple(maccy)

コード例 #3

0

ファイルを表示

ファイル: greek.py プロジェクト: harjitmoe/ecma35lib

    (0x00B2, ), (0x00B3, ), (0x0384, ), (0x0385, ), (0x0386, ), (0x00B7, ),
    (0x0388, ), (0x0389, ), (0x038A, ), (0x00BB, ), (0x038C, ), (0x00BD, ),
    (0x038E, ), (0x038F, ), (0x0390, ), (0x0391, ), (0x0392, ), (0x0393, ),
    (0x0394, ), (0x0395, ), (0x0396, ), (0x0397, ), (0x0398, ), (0x0399, ),
    (0x039A, ), (0x039B, ), (0x039C, ), (0x039D, ), (0x039E, ), (0x039F, ),
    (0x03A0, ), (0x03A1, ), None, (0x03A3, ), (0x03A4, ), (0x03A5, ),
    (0x03A6, ), (0x03A7, ), (0x03A8, ), (0x03A9, ), (0x03AA, ), (0x03AB, ),
    (0x03AC, ), (0x03AD, ), (0x03AE, ), (0x03AF, ), (0x03B0, ), (0x03B1, ),
    (0x03B2, ), (0x03B3, ), (0x03B4, ), (0x03B5, ), (0x03B6, ), (0x03B7, ),
    (0x03B8, ), (0x03B9, ), (0x03BA, ), (0x03BB, ), (0x03BC, ), (0x03BD, ),
    (0x03BE, ), (0x03BF, ), (0x03C0, ), (0x03C1, ), (0x03C2, ), (0x03C3, ),
    (0x03C4, ), (0x03C5, ), (0x03C6, ), (0x03C7, ), (0x03C8, ), (0x03C9, ),
    (0x03CA, ), (0x03CB, ), (0x03CC, ), (0x03CD, ), (0x03CE, ), None))

# Windows code page
graphdata.rhses["1253"] = parsers.read_single_byte(
    "WHATWG/index-windows-1253.txt")

# OEM code pages
graphdata.rhses["737"] = parsers.read_single_byte("ICU/ibm-737_P100-1997.ucm")
graphdata.rhses["869"] = parsers.read_single_byte("ICU/ibm-869_P100-1995.ucm")

# Macintosh code page
graphdata.rhses["10006"] = graphdata.rhses[
    "1280"] = parsers.read_mozilla_ut_file("Mozilla/macgreek.ut")

# TODO: ir018/19, Greek Teletext,
# ir181 (Technical), ir055, ir031, ir050 (INIS Greek non-homoglyphs)

# Symbol (ibm-1038)
graphdata.gsets["symbolgl"] = (94, 1,
                               parsers.read_single_byte("UTC/symbol.txt",

コード例 #4

0

ファイルを表示

ファイル: semitic.py プロジェクト: harjitmoe/ecma35lib

# SI-1311:2002 Latin/Hebrew RHS
graphdata.gsets["ir234"] = (96, 1, (
             0x00A0, None,   0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 
             0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 
             0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 
             0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, None, 
             None,   None,   None,   None,   None,   None,   None,   None, 
             None,   None,   None,   None,   None,   None,   None,   None, 
             None,   None,   None,   None,   None,   None,   None,   None, 
             None,   0x20AC, 0x20AA, 0x202D, 0x202E, 0x202C, None,   0x2017, 
             0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 
             0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 
             0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 
             0x05E8, 0x05E9, 0x05EA, None,   None,   0x200E, 0x200F, None))

# Windows code pages
graphdata.rhses["1255"] = parsers.read_single_byte("WHATWG/index-windows-1255.txt") # Hebrew
graphdata.rhses["1256"] = parsers.read_single_byte("WHATWG/index-windows-1256.txt") # Arabic

# OEM code pages
graphdata.rhses["720"] = parsers.read_single_byte("ICU/ibm-720_P100-1997.ucm") # Arabic
graphdata.rhses["862"] = parsers.read_single_byte("ICU/ibm-862_P100-1995.ucm") # Hebrew
graphdata.rhses["864"] = parsers.read_single_byte("ICU/ibm-864_X110-1999.ucm") # Arabic

# Macintosh code pages (both seem to have only Microsoft IDs?)
graphdata.rhses["10004"] = parsers.read_mozilla_ut_file("Mozilla/macarabic.ut")
graphdata.rhses["10005"] = parsers.read_mozilla_ut_file("Mozilla/machebrew.ut")

コード例 #5

0

ファイルを表示

     (0x23F5, ), (0x23F6, ), (0x23F7, ), (0x2B73, 0xF87F), (0x1F782, ),
     (0x1F783, ), None, None, None, None, None, None, None, None, None, None,
     None, None, None, None, None, None, None, None, None, None, None, None,
     None, None, None, None, None, None, None, None, None, None, None, None,
     None, None, None, None, None, (0x2714, ), (0x2713, ), (0x1FB7D, ),
     (0x1FB7F, ), (0x23BE, ), (0x23CC, ), (0x2B1B, ), (0x2022, ), (0x25CF, ),
     (0x25DC, 0xF879), (0x25DE, 0xF879), (0x25DC, 0xF87F), (0x25DE, 0xF87F),
     (0x2B24, ), (0x1FB9E, ), (0x1FB9E, 0xF87C), (0x2500, ), (0x1F5D9, ),
     (0x2753, ), (0x2BC5, ), (0x2BC6, ), (0x2B0D, 0xF87F), (0x1F780, ),
     (0x1FB9F, ), (0x1FB9F, 0xF87C), None, None, None, None, None))

#####################################################################
# Zapf Dingbats

graphdata.gsets["zdings_g0"] = zdg0 = (94, 1,
                                       parsers.read_single_byte(
                                           "UTC/zdingbat.txt", typ="GL94"))
graphdata.gsets["zdings_g1"] = zdg1 = (94, 1,
                                       parsers.read_single_byte(
                                           "UTC/zdingbat.txt", typ="GR94"))
graphdata.rhses["998000"] = (tuple(
    (i, )
    for i in range(0x2768, 0x2776)) + ((None, ) * 19) + zdg1[2] + (None, ))
graphdata.defgsets["998000"] = ("zdings_g0", "zdings_g1", "nil", "nil")

#####################################################################
# WordPerfect Iconic Symbols (not ECMA-35 structured)

# This is the original source of U+231A and U+231B emoji (and U+2319).
# 0x00–0x22 were defined in WordPerfect 5.
# WordPerfect 6 changed it so 0x21–0x7E, 0xA1–0xEF and 0xF1–0xFE
#   ranges are Zapf Dingbats, while 0x00–0x20 are similar (not identical).

コード例 #6

0

ファイルを表示

ファイル: extlatin.py プロジェクト: harjitmoe/ecma35lib

                             0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
                             0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153,
                             0x0178, 0x017C, 0x00C0, 0x00C1, 0x00C2, 0x0102,
                             0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9,
                             0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
                             0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150,
                             0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB,
                             0x00DC, 0x0118, 0x021A, 0x00DF, 0x00E0, 0x00E1,
                             0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
                             0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED,
                             0x00EE, 0x00EF, 0x0111, 0x0144, 0x00F2, 0x00F3,
                             0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9,
                             0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF))

# Windows code pages for non-Vietnamese Latin
graphdata.rhses["1250"] = parsers.read_single_byte(
    "WHATWG/index-windows-1250.txt")  # Central European
graphdata.rhses["1252"] = parsers.read_single_byte(
    "WHATWG/index-windows-1252.txt")  # ISO-8859-1 ext.
graphdata.defgsets["1252"] = ("ir006", "ir100", "nil", "nil")
graphdata.rhses["1254"] = parsers.read_single_byte(
    "WHATWG/index-windows-1254.txt")  # ISO-8859-9 ext.
graphdata.defgsets["1254"] = ("ir006", "ir148", "nil", "nil")
graphdata.rhses["1257"] = parsers.read_single_byte(
    "WHATWG/index-windows-1257.txt")  # Baltic

# OEM pages (TODO: OEM 210 Greek and OEM 220 Spanish are both listed by DEC in the very definition
#   of the DECSPPCS CSI control. I do not have a source for their layout.)
graphdata.rhses["437"] = parsers.read_single_byte(
    "ICU/ibm-437_P100-1995.ucm")  # United States
graphdata.defgsets["437"] = ("ir006", "nil", "nil", "nil"
                             )  # Note: gets used as the default.

コード例 #7

0

ファイルを表示

ファイル: quoocs_ngwx.py プロジェクト: harjitmoe/ecma35lib

#!/usr/bin/env python3
# -*- mode: python; coding: utf-8 -*-
# By HarJIT in 2020.

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

# Vietnamese Roman (quo^'c ngu*~) encodings

from ecma35.data import graphdata
from ecma35.data.singlebyte import sbmapparsers as parsers

# Windows-1258
graphdata.rhses["1258"] = parsers.read_single_byte(
    "WHATWG/index-windows-1258.txt")

# VPS
graphdata.rhses["997000"] = parsers.read_mozilla_ut_file("Mozilla/vps.ut")
graphdata.c0graphics["997000"] = parsers.read_mozilla_ut_file("Mozilla/vps.ut",
                                                              typ="CL33")

# TCVN (TCVN 5712, VSCII; not VISCII)
graphdata.rhses["997001"] = parsers.read_mozilla_ut_file("Mozilla/tcvn5712.ut")
graphdata.c0graphics["997001"] = parsers.read_mozilla_ut_file(
    "Mozilla/tcvn5712.ut", typ="CL33")
graphdata.gsets["ir180"] = (96, 1,
                            parsers.read_mozilla_ut_file("Mozilla/tcvn5712.ut",
                                                         typ="GR96"))
graphdata.defgsets["997001"] = ("ir014", "ir180", "nil", "nil")