Esempio n. 1
0
    0x0E53,
    0x0E54,
    0x0E55,
    0x0E56,
    0x0E57,
    0x0E58,
    0x0E59,
    0x0E5A,
    0x0E5B,
    None,
    None,
    None,
    None,
))

# Code pages 874 (TIS-620 exts)
# Per alias comments in ICU's convrtrs.txt, IBM's 874 is identical to IBM's 9066.
# Microsoft's 874, on the other hand, matches the layout of IBM's 1162.
graphdata.rhses["1162"] = parsers.read_single_byte(
    "WHATWG/index-windows-874.txt")
graphdata.rhses["9066"] = parsers.read_single_byte("ICU/ibm-874_P100-1995.ucm")
# The two only collide at 0xA0, which IBM uses for an alternate U+0E48 and which Microsoft
#   uses for an NBSP. Favour the more-deployed Microsoft / ISO-8859-11 NBSP for "874".
graphdata.rhses["874"] = tuple(
    a or b for a, b in zip(graphdata.rhses["1162"], graphdata.rhses["9066"]))
graphdata.defgsets["874"] = graphdata.defgsets["1162"] = ("ir006", "ir166",
                                                          "nil", "nil")

# Macintosh code page (doesn't have a Mozilla file)
#graphdata.rhses["10021"] = parsers.read_mozilla_ut_file("Mozilla/macthai.ut")
Esempio n. 2
0
             0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F, 
             0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 
             0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 
             0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 
             0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 
             0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 
             0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 
             0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 
             0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, 
             0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 
             0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F))

# TODO: ir153 (Russian subset of ISO-8859-5), ir200 (Uralic Cyrillic), ir201 (Volgaic Cyrillic)

# Other KOI-8 encodings
graphdata.rhses["878"] = graphdata.rhses["20866"] = parsers.read_single_byte("WHATWG/index-koi8-r.txt")
# TODO: number 21866 is in reality used for both KOI8-U and KOI8-RU.
graphdata.rhses["1168"] = graphdata.rhses["21866"] = parsers.read_single_byte("WHATWG/index-koi8-u.txt")

# The Windows encoding
graphdata.rhses["1251"] = parsers.read_single_byte("WHATWG/index-windows-1251.txt")

# 10007/1283 is the original MacCyrillic; current MacCyrillic is a Euro update of 10017.
# Mappings to U+00A4 changed to U+20AC across the board, so number the current one 10017, and use
#   a version with that change but not the others for 10007/1283.
graphdata.rhses["10017"] = parsers.read_single_byte("WHATWG/index-x-mac-cyrillic.txt")
maccy = list(graphdata.rhses["10017"])
maccy[0x22] = (0x00A2,)
maccy[0x36] = (0x2202,)
graphdata.rhses["10007"] = graphdata.rhses["1283"] = tuple(maccy)
Esempio n. 3
0
    (0x00B2, ), (0x00B3, ), (0x0384, ), (0x0385, ), (0x0386, ), (0x00B7, ),
    (0x0388, ), (0x0389, ), (0x038A, ), (0x00BB, ), (0x038C, ), (0x00BD, ),
    (0x038E, ), (0x038F, ), (0x0390, ), (0x0391, ), (0x0392, ), (0x0393, ),
    (0x0394, ), (0x0395, ), (0x0396, ), (0x0397, ), (0x0398, ), (0x0399, ),
    (0x039A, ), (0x039B, ), (0x039C, ), (0x039D, ), (0x039E, ), (0x039F, ),
    (0x03A0, ), (0x03A1, ), None, (0x03A3, ), (0x03A4, ), (0x03A5, ),
    (0x03A6, ), (0x03A7, ), (0x03A8, ), (0x03A9, ), (0x03AA, ), (0x03AB, ),
    (0x03AC, ), (0x03AD, ), (0x03AE, ), (0x03AF, ), (0x03B0, ), (0x03B1, ),
    (0x03B2, ), (0x03B3, ), (0x03B4, ), (0x03B5, ), (0x03B6, ), (0x03B7, ),
    (0x03B8, ), (0x03B9, ), (0x03BA, ), (0x03BB, ), (0x03BC, ), (0x03BD, ),
    (0x03BE, ), (0x03BF, ), (0x03C0, ), (0x03C1, ), (0x03C2, ), (0x03C3, ),
    (0x03C4, ), (0x03C5, ), (0x03C6, ), (0x03C7, ), (0x03C8, ), (0x03C9, ),
    (0x03CA, ), (0x03CB, ), (0x03CC, ), (0x03CD, ), (0x03CE, ), None))

# Windows code page
graphdata.rhses["1253"] = parsers.read_single_byte(
    "WHATWG/index-windows-1253.txt")

# OEM code pages
graphdata.rhses["737"] = parsers.read_single_byte("ICU/ibm-737_P100-1997.ucm")
graphdata.rhses["869"] = parsers.read_single_byte("ICU/ibm-869_P100-1995.ucm")

# Macintosh code page
graphdata.rhses["10006"] = graphdata.rhses[
    "1280"] = parsers.read_mozilla_ut_file("Mozilla/macgreek.ut")

# TODO: ir018/19, Greek Teletext,
# ir181 (Technical), ir055, ir031, ir050 (INIS Greek non-homoglyphs)

# Symbol (ibm-1038)
graphdata.gsets["symbolgl"] = (94, 1,
                               parsers.read_single_byte("UTC/symbol.txt",
Esempio n. 4
0
# SI-1311:2002 Latin/Hebrew RHS
graphdata.gsets["ir234"] = (96, 1, (
             0x00A0, None,   0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 
             0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 
             0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 
             0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, None, 
             None,   None,   None,   None,   None,   None,   None,   None, 
             None,   None,   None,   None,   None,   None,   None,   None, 
             None,   None,   None,   None,   None,   None,   None,   None, 
             None,   0x20AC, 0x20AA, 0x202D, 0x202E, 0x202C, None,   0x2017, 
             0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 
             0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, 
             0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 
             0x05E8, 0x05E9, 0x05EA, None,   None,   0x200E, 0x200F, None))

# Windows code pages
graphdata.rhses["1255"] = parsers.read_single_byte("WHATWG/index-windows-1255.txt") # Hebrew
graphdata.rhses["1256"] = parsers.read_single_byte("WHATWG/index-windows-1256.txt") # Arabic

# OEM code pages
graphdata.rhses["720"] = parsers.read_single_byte("ICU/ibm-720_P100-1997.ucm") # Arabic
graphdata.rhses["862"] = parsers.read_single_byte("ICU/ibm-862_P100-1995.ucm") # Hebrew
graphdata.rhses["864"] = parsers.read_single_byte("ICU/ibm-864_X110-1999.ucm") # Arabic

# Macintosh code pages (both seem to have only Microsoft IDs?)
graphdata.rhses["10004"] = parsers.read_mozilla_ut_file("Mozilla/macarabic.ut")
graphdata.rhses["10005"] = parsers.read_mozilla_ut_file("Mozilla/machebrew.ut")



Esempio n. 5
0
     (0x23F5, ), (0x23F6, ), (0x23F7, ), (0x2B73, 0xF87F), (0x1F782, ),
     (0x1F783, ), None, None, None, None, None, None, None, None, None, None,
     None, None, None, None, None, None, None, None, None, None, None, None,
     None, None, None, None, None, None, None, None, None, None, None, None,
     None, None, None, None, None, (0x2714, ), (0x2713, ), (0x1FB7D, ),
     (0x1FB7F, ), (0x23BE, ), (0x23CC, ), (0x2B1B, ), (0x2022, ), (0x25CF, ),
     (0x25DC, 0xF879), (0x25DE, 0xF879), (0x25DC, 0xF87F), (0x25DE, 0xF87F),
     (0x2B24, ), (0x1FB9E, ), (0x1FB9E, 0xF87C), (0x2500, ), (0x1F5D9, ),
     (0x2753, ), (0x2BC5, ), (0x2BC6, ), (0x2B0D, 0xF87F), (0x1F780, ),
     (0x1FB9F, ), (0x1FB9F, 0xF87C), None, None, None, None, None))

#####################################################################
# Zapf Dingbats

graphdata.gsets["zdings_g0"] = zdg0 = (94, 1,
                                       parsers.read_single_byte(
                                           "UTC/zdingbat.txt", typ="GL94"))
graphdata.gsets["zdings_g1"] = zdg1 = (94, 1,
                                       parsers.read_single_byte(
                                           "UTC/zdingbat.txt", typ="GR94"))
graphdata.rhses["998000"] = (tuple(
    (i, )
    for i in range(0x2768, 0x2776)) + ((None, ) * 19) + zdg1[2] + (None, ))
graphdata.defgsets["998000"] = ("zdings_g0", "zdings_g1", "nil", "nil")

#####################################################################
# WordPerfect Iconic Symbols (not ECMA-35 structured)

# This is the original source of U+231A and U+231B emoji (and U+2319).
# 0x00–0x22 were defined in WordPerfect 5.
# WordPerfect 6 changed it so 0x21–0x7E, 0xA1–0xEF and 0xF1–0xFE
#   ranges are Zapf Dingbats, while 0x00–0x20 are similar (not identical).
Esempio n. 6
0
                             0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
                             0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153,
                             0x0178, 0x017C, 0x00C0, 0x00C1, 0x00C2, 0x0102,
                             0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9,
                             0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
                             0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150,
                             0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB,
                             0x00DC, 0x0118, 0x021A, 0x00DF, 0x00E0, 0x00E1,
                             0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
                             0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED,
                             0x00EE, 0x00EF, 0x0111, 0x0144, 0x00F2, 0x00F3,
                             0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9,
                             0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF))

# Windows code pages for non-Vietnamese Latin
graphdata.rhses["1250"] = parsers.read_single_byte(
    "WHATWG/index-windows-1250.txt")  # Central European
graphdata.rhses["1252"] = parsers.read_single_byte(
    "WHATWG/index-windows-1252.txt")  # ISO-8859-1 ext.
graphdata.defgsets["1252"] = ("ir006", "ir100", "nil", "nil")
graphdata.rhses["1254"] = parsers.read_single_byte(
    "WHATWG/index-windows-1254.txt")  # ISO-8859-9 ext.
graphdata.defgsets["1254"] = ("ir006", "ir148", "nil", "nil")
graphdata.rhses["1257"] = parsers.read_single_byte(
    "WHATWG/index-windows-1257.txt")  # Baltic

# OEM pages (TODO: OEM 210 Greek and OEM 220 Spanish are both listed by DEC in the very definition
#   of the DECSPPCS CSI control. I do not have a source for their layout.)
graphdata.rhses["437"] = parsers.read_single_byte(
    "ICU/ibm-437_P100-1995.ucm")  # United States
graphdata.defgsets["437"] = ("ir006", "nil", "nil", "nil"
                             )  # Note: gets used as the default.
Esempio n. 7
0
#!/usr/bin/env python3
# -*- mode: python; coding: utf-8 -*-
# By HarJIT in 2020.

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

# Vietnamese Roman (quo^'c ngu*~) encodings

from ecma35.data import graphdata
from ecma35.data.singlebyte import sbmapparsers as parsers

# Windows-1258
graphdata.rhses["1258"] = parsers.read_single_byte(
    "WHATWG/index-windows-1258.txt")

# VPS
graphdata.rhses["997000"] = parsers.read_mozilla_ut_file("Mozilla/vps.ut")
graphdata.c0graphics["997000"] = parsers.read_mozilla_ut_file("Mozilla/vps.ut",
                                                              typ="CL33")

# TCVN (TCVN 5712, VSCII; not VISCII)
graphdata.rhses["997001"] = parsers.read_mozilla_ut_file("Mozilla/tcvn5712.ut")
graphdata.c0graphics["997001"] = parsers.read_mozilla_ut_file(
    "Mozilla/tcvn5712.ut", typ="CL33")
graphdata.gsets["ir180"] = (96, 1,
                            parsers.read_mozilla_ut_file("Mozilla/tcvn5712.ut",
                                                         typ="GR96"))
graphdata.defgsets["997001"] = ("ir014", "ir180", "nil", "nil")