Exemple #1
0
def _create_voicing_map():
    """
    Constructs map from kana to their voiced alternatives.
    """
    table = kana_table.KanaTable.get_cached().get_table()
    voiced_line = table['か'] + table['さ'] + table['た']
    double_voiced_line = table['は']

    voicing_map = {}
    for kana in scripts.get_script(scripts.Script.Hiragana):
        ord_kana = ord(kana)

        if kana in voiced_line:
            voicing_map[kana] = [chr(ord_kana+1)]
        elif kana in double_voiced_line:
            voicing_map[kana] = [chr(ord_kana+1), chr(ord_kana+2)]
        else:
            voicing_map[kana] = []

    # Add katakana into the mix
    katakana_vm = {scripts.to_katakana(k): list(map(scripts.to_katakana, v))
                   for k, v in iteritems(voicing_map)}

    voicing_map.update(katakana_vm)

    return voicing_map
Exemple #2
0
    def test_cpc(self):
        for cpc in self.cpcchar:
            l = unicodedata.normalize("NFKD", cpc)
            self.assertFalse(uchar.isModernChoseong(cpc), msg=cpc)
            self.assertFalse(uchar.isChoseongJamo(cpc), msg=cpc)
            self.assertFalse(uchar.isModernJungseong(cpc), msg=cpc)
            self.assertFalse(uchar.isJungseongJamo(cpc), msg=cpc)
            self.assertFalse(uchar.isModernJongseong(cpc), msg=cpc)
            self.assertFalse(uchar.isOldJongseong(cpc), msg=cpc)
            self.assertFalse(uchar.isJongseongJamo(cpc), msg=cpc)
            self.assertFalse(uchar.isHangulJamo(cpc), msg=cpc)
            self.assertFalse(uchar.isHalfwidthLetter(cpc), msg=cpc)
            self.assertTrue(uchar.isCompatibilityLetter(cpc), msg=cpc)
            self.assertFalse(uchar.isParenthesizedLetter(cpc), msg=cpc)
            self.assertFalse(uchar.isCircledLetter(cpc), msg=cpc)
            self.assertFalse(uchar.isPrecomposedSyllable(cpc), msg=cpc)
            self.assertTrue(uchar.isHangulLetter(cpc), msg=cpc)

            if uchar.isChoseongJamo(l):
                self.assertEqual(normalization.normalizeJamoKDKC(cpc),
                                 l + six.chr(int("1160", 16)))
            else:
                self.assertEqual(
                    normalization.normalizeJamoKDKC(cpc),
                    six.chr(int("115F", 16)) + six.chr(int("1160", 16)) + l)
def recomposeHangul(source):
    """
    If one uses a UAX #15 algorithm instead of the above composeHangul function for normalization,
    an Old Hangul Syllable Block can be decomposed into a Wanseong Modern Hangul Syllable Block and
    Johab Hangul Letter(s). In such cases, after applying, one can use the following recomposition
    algorithm to restore a character string in Normalization Form NFC or NFKC to an L V T format.

    :param string source: unicode string
    """
    length = len(source)

    if length == 0:
        return ""

    result = []
    last = source[0]
    result += last
    for i in range(1, length):
        ch = source[i]

        # check to see if two consecutive characters are a Wanseong Modern Hangul
        # Syallable Block and a Syllable-Final Letter.
        SIndex = ord(last) - SBase
        if 0 <= SIndex and SIndex < SCount and (SIndex % TCount) == 0:
            if uchar.isOldJongseong(ch):
                L = LBase + SIndex // NCount
                V = VBase + (SIndex % NCount) // TCount
                result[len(result) - 1] = six.chr(L)
                result += six.chr(V)
                result += ch
                continue
        last = ch
        result += ch
    return "".join(result)
Exemple #4
0
 def setUp(self):
     self.parenthesized = list(
         six.chr(x) for x in range(int("3200", 16),
                                   int("321E", 16) + 1))
     self.circled = list(
         six.chr(x) for x in range(int("3260", 16),
                                   int("327E", 16) + 1))
Exemple #5
0
def _create_voicing_map():
    """
    Constructs map from kana to their voiced alternatives.
    """
    table = kana_table.KanaTable.get_cached().get_table()
    voiced_line = table['か'] + table['さ'] + table['た']
    double_voiced_line = table['は']

    voicing_map = {}
    for kana in scripts.get_script(scripts.Script.Hiragana):
        ord_kana = ord(kana)

        if kana in voiced_line:
            voicing_map[kana] = [chr(ord_kana + 1)]
        elif kana in double_voiced_line:
            voicing_map[kana] = [chr(ord_kana + 1), chr(ord_kana + 2)]
        else:
            voicing_map[kana] = []

    # Add katakana into the mix
    katakana_vm = {
        scripts.to_katakana(k): list(map(scripts.to_katakana, v))
        for k, v in iteritems(voicing_map)
    }

    voicing_map.update(katakana_vm)

    return voicing_map
def sortKey(text, hangul_first=True):
    """
    key function for sorted

    :param string text: A string for weight
    :param bool hangul_first: Boolean
    """

    weights = []
    itr = enumerate(text)
    for i, ch in itr:
        if uchar.isChoseongJamo(ch):
            L = ch
            V = six.chr(0x1160)
            T = None
            if i + 1 < len(text) and uchar.isJungseongJamo(text[i + 1]):
                _, V = next(itr)
                if i + 2 < len(text) and uchar.isJongseongJamo(text[i + 2]):
                    _, T = next(itr)
            _type = 0
            weight = getHangulWeightLVT(L, V, T, _type)
            _type = weight & 3
            weight = weight >> 2
            weight = weight << 1
            weight = weight | _type
            if not hangul_first:
                weight += 1 << 31
        elif uchar.isJongseongJamo(ch):
            L = six.chr(0x115F)
            V = ch
            if i + 1 < len(text) and uchar.isJongseongJamo(text[i + 1]):
                _, T = next(itr)
            _type = 0
            weight = getHangulWeightLVT(L, V, T, _type)
            _type = weight & 3
            weight = weight >> 2
            weight = weight << 1
            weight = weight | _type
            if not hangul_first:
                weight += 1 << 31

        elif uchar.isHangulLetter(ch):
            weight = getHangulWeight(ch)
            _type = weight & 3
            weight = weight >> 2
            weight = weight << 1
            weight = weight | _type
            if not hangul_first:
                weight += 1 << 31
        else:
            weight = ord(ch)
            if hangul_first:
                weight += 1 << 31

        weights.append(weight)
    return weights
Exemple #7
0
 def visit_escaped_numeric_character(self, node, children):
     [[escape, character_code]] = children
     if escape == '\\':
         # Octal escape code like '\077'
         return chr(int(character_code, 8))
     elif escape in ('\\u', '\\x', '\\U'):
         # hex escape like '\xff'
         return chr(int(character_code, 16))
     else:
         raise NotImplementedError('Unhandled character escape %s' % escape)
Exemple #8
0
 def visit_escaped_numeric_character(self, node, children):
     [[escape, character_code]] = children
     if escape == '\\':
         # Octal escape code like '\077'
         return chr(int(character_code, 8))
     elif escape in ('\\u', '\\x', '\\U'):
         # hex escape like '\xff'
         return chr(int(character_code, 16))
     else:
         raise NotImplementedError('Unhandled character escape %s' % escape)
Exemple #9
0
    def setUp(self):

        self.lchar = list(
            six.chr(x) for x in range(int("1100", 16),
                                      int("1112", 16) + 1))
        self.vchar = list(
            six.chr(x) for x in range(int("1161", 16),
                                      int("1175", 16) + 1))
        self.tchar = [""] + list(
            six.chr(x) for x in range(int("11A8", 16),
                                      int("11C2", 16) + 1))

        self.old_lchar = list(
            six.chr(x)
            for x in range(int("1113", 16),
                           int("115E", 16) + 1)) + list(
                               six.chr(x)
                               for x in range(int("A960", 16),
                                              int("A97C", 16) + 1))

        self.old_vchar = list(
            six.chr(x)
            for x in range(int("1176", 16),
                           int("11A7", 16) + 1)) + list(
                               six.chr(x)
                               for x in range(int("D7B0", 16),
                                              int("D7C6", 16) + 1))
        self.old_tchar = list(
            six.chr(x)
            for x in range(int("11C3", 16),
                           int("11FF", 16) + 1)) + list(
                               six.chr(x)
                               for x in range(int("D7CB", 16),
                                              int("D7FB", 16) + 1))
Exemple #10
0
    def bisect_string_range(start, end):
        """Returns a string that is approximately in the middle of the range.

    (start, end) is treated as a string range, and it is assumed
    start <= end in the usual lexicographic string ordering. The output key
    mid is guaranteed to satisfy start <= mid <= end.

    The method proceeds by comparing initial characters of start and
    end.  When the characters are equal, they are appended to the mid
    string.  In the first place that the characters differ, the
    difference characters are averaged and this average is appended to
    the mid string.  If averaging resulted in rounding down, and
    additional character is added to the mid string to make up for the
    rounding down.  This extra step is necessary for correctness in
    the case that the average of the two characters is equal to the
    character in the start string.

    This method makes the assumption that most keys are ascii and it
    attempts to perform splitting within the ascii range when that
    results in a valid split.

    Args:
      start: A string.
      end: A string such that start <= end.

    Returns:
      A string mid such that start <= mid <= end.
    """
        if start == end:
            return start
        start += "\0"
        end += "\0"
        midpoint = []

        expected_max = 127
        for i in range(min(len(start), len(end))):
            if start[i] == end[i]:
                midpoint.append(start[i])
            else:
                ord_sum = ord(start[i]) + ord(end[i])
                midpoint.append(chr(ord_sum / 2))
                if ord_sum % 2:
                    if len(start) > i + 1:
                        ord_start = ord(start[i + 1])
                    else:
                        ord_start = 0
                    if ord_start < expected_max:

                        ord_split = (expected_max + ord_start) / 2
                    else:

                        ord_split = (0xFFFF + ord_start) / 2
                    midpoint.append(chr(ord_split))
                break
        return "".join(midpoint)
    def setUp(self):
        self.syllable = list(
            six.chr(x) for x in range(int("AC00", 16), int("D7A3", 16) + 1))

        lchar = list(
            six.chr(x) for x in range(int("1100", 16), int("1112", 16) + 1))
        vchar = [""] + list(
            six.chr(x) for x in range(int("1161", 16), int("1175", 16) + 1))
        tchar = [""] + list(
            six.chr(x) for x in range(int("11A8", 16), int("11C2", 16) + 1))
        self.lvt = itertools.product(lchar, vchar, tchar)
Exemple #12
0
 def handle_match(m):
     name = m.group(1)
     if name in HTML_ENTITIES:
         return chr(HTML_ENTITIES[name])
     try:
         if name[:2] in ('#x', '#X'):
             return chr(int(name[2:], 16))
         elif name.startswith('#'):
             return chr(int(name[1:]))
     except ValueError:
         pass
     return ''
Exemple #13
0
 def show_status(self, changed):
     status = ''
     # set 'geotagged' status
     if self.metadata.latlong:
         status += six.chr(0x2690)
     # set 'unsaved' status
     if changed:
         status += six.chr(0x26A1)
     self.status.setText(status)
     self._elide_name()
     if changed:
         self.image_list.new_metadata.emit(True)
def normalizeJamoKDKC(source):
    """
    Normalizing Compatibility/Halfwidth Hangul Letters and Hangul-embedded symbols
    (NormalizeJamoKDKC)

    :param string source: unicode string
    """
    PHBase = 0x3200
    PHEnd = 0x320D
    CHBase = 0x3260
    CHEnd = 0x326D

    length = len(source)
    if length == 0:
        return ""

    result = []

    for i in range(0, length):
        ch = source[i]
        pf = 0

        if uchar.isCompatibilityLetter(ch):
            ch = six.chr(CPJAMO[ord(ch) - 0x3131])
        elif PHBase <= ord(ch) and ord(ch) <= PHEnd:
            result += '\u0028'
            ch = six.chr(PCJAMO[ord(ch) - PHBase])
            pf = '\u0029'
        elif CHBase <= ord(ch) and ord(ch) <= CHEnd:
            ch = six.chr(PCJAMO[ord(ch) - CHBase])
        elif uchar.isHalfwidthLetter(ch):
            ch = six.chr(HWJAMO[ord(ch) - 0xFFA0])
        else:
            result += ch
            continue

        if uchar.isChoseongJamo(ch):
            result += ch
            result += '\u1160'
        elif uchar.isJungseongJamo(ch):
            result += '\u115F'
            result += ch
        elif uchar.isJongseongJamo(ch):
            result += '\u115F'
            result += '\u1160'
            result += ch

        if pf != 0:
            result.append(pf)

    return "".join(result)
Exemple #15
0
def _bracket_fixup(path):
    if path.find('[') == -1 and path.find(']') == -1:
        return path
    sentinal = 256
    while path.find(chr(sentinal)) != -1:
        sentinal += 1
        if sentinal > 65535:
            log.error(
                'Cannot fix brackets in path, path contains all possible sentinal characters'
            )
            return path
    newpath = path.replace(']', chr(sentinal))
    newpath = newpath.replace('[', '[[]')
    newpath = newpath.replace(chr(sentinal), '[]]')
    return newpath
Exemple #16
0
def _unescape_one(match):
    if match.group(1) is not None:
        return match.group(1)
    elif match.group(2) is not None:
        return six.chr(int(match.group(2), 16))
    else:
        return six.text_type()
Exemple #17
0
def utf16decode(x):
    """Decode an escaped utf8-encoded string
    """
    y = ""
    state = -1
    for z in x:
        if state == -1:
            if z == "\\":
                state = 0
            else:
                y += six.text_type(z)
        elif state == 0:
            if z == "u":
                state = 1
                acc = ""
            else:
                y += six.text_type(z)
                state = -1
        elif state < 4:
            state += 1
            acc += z
        else:
            state = -1
            y += six.chr(int(acc + z, 16))
    return y
def composeHangul(source):
    """
    returns a Wanseong Modern Hangul Syllable Block for the given Johab Modern Hangul Syllable
    Block. Even when a portion of an Old Hangul Syllable Block is a Modern Hangul Syllable Block,
    unlike UAX #15, that portion is not transformed to a Wanseong Modern Hangul Syllable Block.

    :param string source: unicode string.
    """
    length = len(source)
    if length == 0:
        return ""

    result = []
    last = source[0]
    result += last

    for i in range(1, length):
        ch = source[i]
        LIndex = ord(last) - LBase
        if 0 <= LIndex and LIndex < LCount:
            VIndex = ord(ch) - VBase
            if 0 <= VIndex and VIndex < VCount:
                last = six.chr(SBase + (LIndex * VCount + VIndex) * TCount)
                len_result = len(result)
                result[len_result - 1] = last
                continue
        SIndex = ord(last) - SBase
        if 0 <= SIndex and SIndex < SCount and (SIndex % TCount) == 0:
            TIndex = ord(ch) - TBase
            if 0 < TIndex and TIndex < TCount:
                last = six.chr(ord(last) + TIndex)
                len_result = len(result)
                result[len_result - 1] = last
                continue

            if uchar.isOldJongseong(ch):
                L = LBase + SIndex // NCount
                V = VBase + (SIndex % NCount) // TCount
                len_result = len(result)
                result[len_result - 1] = six.chr(L)
                result += six.chr(V)
                result += ch
                continue
        last = ch
        result += ch

    return "".join(result)
Exemple #19
0
    def setUp(self):
        self.lchar = list(
            six.chr(x) for x in range(int("1100", 16),
                                      int("1112", 16) + 1))
        self.vchar = list(
            six.chr(x) for x in range(int("1161", 16),
                                      int("1175", 16) + 1))
        self.tchar = list(
            six.chr(x) for x in range(int("11A8", 16),
                                      int("11C2", 16) + 1))

        self.cpcchar = list(
            six.chr(x) for x in range(int("3131", 16),
                                      int("314E", 16) + 1))
        self.cpvchar = list(
            six.chr(x) for x in range(int("314F", 16),
                                      int("3163", 16) + 1))
Exemple #20
0
def _replace_entity(match):
    text = match.group(1)
    if text[0] == '#':
        text = text[1:]
        try:
            if text[0] in 'xX':
                c = int(text[1:], 16)
            else:
                c = int(text)
            return chr(c)
        except ValueError:
            return match.group(0)
    else:
        try:
            return chr(name2codepoint[text])
        except (ValueError, KeyError):
            return match.group(0)
def decomposeHangul(S):
    """
    returns a Johab Modern Hangul Syllable Block for the given Wanseong Modern Hangul Syllable Block

    :param char S: Single character Hangul Syllable. If not, return input.
    """
    SIndex = ord(S) - SBase
    if SIndex < 0 or SIndex >= SCount:
        return S
    result = ""
    L = LBase + SIndex // NCount
    V = VBase + (SIndex % NCount) // TCount
    T = TBase + SIndex % TCount
    result += six.chr(L)
    result += six.chr(V)
    if T != TBase:
        result += six.chr(T)
    return result
Exemple #22
0
    def _create_key_event(self, event_type, event):
        """ Convert a GUI toolkit keyboard event into a KeyEvent.
        """
        if self.focus_owner is None:
            focus_owner = self.component
        else:
            focus_owner = self.focus_owner

        if focus_owner is not None:
            if event_type == 'character':
                key = six.chr(event.GetUniChar())
                if not key:
                    return None
            else:
                key_code = event.GetKeyCode()
                if key_code in KEY_MAP:
                    key = KEY_MAP.get(key_code)
                else:
                    key = six.chr(event.GetUniChar()).lower()

            # Use the last-seen mouse coordinates instead of GetX/GetY due
            # to wx bug.
            x, y = self._last_mouse_pos

            # Someday when wx does this properly, we can use these instead:
            # x = event.GetX()
            # y = event.GetY()

            return KeyEvent(event_type=event_type,
                            character=key,
                            alt_down=event.AltDown(),
                            control_down=event.ControlDown(),
                            shift_down=event.ShiftDown(),
                            x=x,
                            y=self._flip_y(y),
                            event=event,
                            window=self)
        else:
            event.Skip()

        return None
Exemple #23
0
 def ntou(n, encoding='ISO-8859-1'):
     """Return the given native string as a unicode string with the given encoding."""
     # In Python 2, the native string type is bytes.
     # First, check for the special encoding 'escape'. The test suite uses this
     # to signal that it wants to pass a string with embedded \uXXXX escapes,
     # but without having to prefix it with u'' for Python 2, but no prefix
     # for Python 3.
     if encoding == 'escape':
         return six.text_type(
             re.sub(r'\\u([0-9a-zA-Z]{4})',
                    lambda m: chr(int(m.group(1), 16)),
                    n.decode('ISO-8859-1')))
     # Assume it's already in the given encoding, which for ISO-8859-1 is almost
     # always what was intended.
     return n.decode(encoding)
Exemple #24
0
    def show_plot(OO, excursi, rmsdpos, minimum):
        excursi.reshape(flex.grid(len(OO.grid), len(OO.grid)))
        rmsdpos.reshape(flex.grid(len(OO.grid), len(OO.grid)))

        from matplotlib import pyplot as plt
        plt.figure()
        CS = plt.contour([i * 0.02 for i in OO.grid],
                         [i * 0.02 for i in OO.grid], excursi.as_numpy_array())
        plt.clabel(CS, inline=1, fontsize=10, fmt="%6.3f" + chr(176))
        plt.plot([minimum[1] * 180. / math.pi], [minimum[0] * 180. / math.pi],
                 "r+")
        plt.title("Rms rotational excursion to reflection condition, degrees")
        plt.axes().set_aspect("equal")
        plt.figure()
        CS = plt.contour([i * 0.02 for i in OO.grid],
                         [i * 0.02 for i in OO.grid], rmsdpos.as_numpy_array())
        plt.clabel(CS, inline=1, fontsize=10, fmt="%7.4f px")
        plt.title("Rms position shift, obs vs. model, pixels")
        plt.axes().set_aspect("equal")
        plt.show()
Exemple #25
0
    def _create_key_event(self, event_type, event):
        focus_owner = self.focus_owner

        if focus_owner is None:
            focus_owner = self.component

            if focus_owner is None:
                event.ignore()
                return None

        if event_type == 'character':
            key = six.text_type(event.text())
        else:
            # Convert the keypress to a standard enable key if possible, otherwise
            # to text.
            key_code = event.key()
            key = KEY_MAP.get(key_code)
            if key is None:
                key = six.chr(key_code).lower()

        if not key:
            return None

        # Use the last-seen mouse position as the coordinates of this event.
        x, y = self.control.handler.last_mouse_pos

        modifiers = event.modifiers()

        return KeyEvent(event_type=event_type,
                        character=key,
                        x=x,
                        y=self._flip_y(y),
                        alt_down=bool(modifiers & QtCore.Qt.AltModifier),
                        shift_down=bool(modifiers & QtCore.Qt.ShiftModifier),
                        control_down=bool(modifiers
                                          & QtCore.Qt.ControlModifier),
                        event=event,
                        window=self)
Exemple #26
0
    def _split_id_or_name(id_or_name1, id_or_name2, batch_size,
                          maintain_batches):
        """Return an id_or_name that is between id_or_name1 an id_or_name2.

    Attempts to split the range [id_or_name1, id_or_name2] in half,
    unless maintain_batches is true and the size of the range
    [id_or_name1, id_or_name2] is less than or equal to batch_size.

    Args:
      id_or_name1: A number or string or the id_or_name component of a key
      id_or_name2: A number or string or the id_or_name component of a key
      batch_size: The range size that will not be split if maintain_batches
        is true.
      maintain_batches: A boolean for whether to keep small ranges intact.

    Returns:
      An id_or_name such that id_or_name1 <= id_or_name <= id_or_name2.
    """
        if (isinstance(id_or_name1, six.integer_types)
                and isinstance(id_or_name2, six.integer_types)):
            if not maintain_batches or id_or_name2 - id_or_name1 > batch_size:
                return (id_or_name1 + id_or_name2) / 2
            else:
                return id_or_name1
        elif (isinstance(id_or_name1, six.string_types)
              and isinstance(id_or_name2, six.string_types)):
            return KeyRange.bisect_string_range(id_or_name1, id_or_name2)
        else:
            if (not isinstance(id_or_name1, six.integer_types)
                    or not isinstance(id_or_name2, six.string_types)):
                raise KeyRangeError("Wrong key order: %r, %r" %
                                    (id_or_name1, id_or_name2))

            zero_ch = chr(0)
            if id_or_name2 == zero_ch:
                return (id_or_name1 + 2**63 - 1) / 2
            return zero_ch
Exemple #27
0
 def __init__(self, *args, **kw):
     super(LocationInfo, self).__init__(*args, **kw)
     layout = QtWidgets.QGridLayout()
     self.setLayout(layout)
     layout.setContentsMargins(0, 0, 0, 0)
     self.members = {
         'taken': LocationWidgets(self),
         'shown': LocationWidgets(self)
     }
     self.swap = SquareButton(six.chr(0x21c4))
     self.swap.setStyleSheet('QPushButton { font-size: 10px }')
     self.swap.setFont(QtGui.QFont("Dejavu Sans"))
     if not self.swap.fontInfo().exactMatch():
         # probably on Windows, try a different font
         self.swap.setFont(QtGui.QFont("Segoe UI Symbol"))
     layout.addWidget(self.swap, 0, 4)
     label = QtWidgets.QLabel(translate('PhotiniMap', 'camera'))
     layout.addWidget(label, 0, 1, 1, 2)
     label = QtWidgets.QLabel(translate('PhotiniMap', 'subject'))
     layout.addWidget(label, 0, 3)
     layout.addWidget(QtWidgets.QLabel(translate('PhotiniMap', 'Street:')),
                      1, 0)
     layout.addWidget(QtWidgets.QLabel(translate('PhotiniMap', 'City:')), 2,
                      0)
     layout.addWidget(
         QtWidgets.QLabel(translate('PhotiniMap', 'Province:')), 3, 0)
     layout.addWidget(QtWidgets.QLabel(translate('PhotiniMap', 'Country:')),
                      4, 0)
     layout.addWidget(QtWidgets.QLabel(translate('PhotiniMap', 'Region:')),
                      5, 0)
     for ts, col in (('taken', 1), ('shown', 3)):
         layout.addWidget(self.members[ts]['sublocation'], 1, col, 1, 2)
         layout.addWidget(self.members[ts]['city'], 2, col, 1, 2)
         layout.addWidget(self.members[ts]['province_state'], 3, col, 1, 2)
         layout.addWidget(self.members[ts]['country_name'], 4, col)
         layout.addWidget(self.members[ts]['country_code'], 4, col + 1)
         layout.addWidget(self.members[ts]['world_region'], 5, col, 1, 2)
Exemple #28
0
 def on_char(self, event):
     self.delete_selection()
     c = six.chr(event.GetUnicodeKey())
     self.__tokens.insert(self.__cursor_pos, c)
     self.move_cursor_pos(self.__cursor_pos + 1)
     self.on_token_change()
Exemple #29
0
def convertToUnicode(t):
    return six.chr(int(t[0], 16))
def getHangulWeight(hc):
    """
    determine a weight for a Wanseong Hangul Syllable Block, a Hangul Letter or
    Hangul-embedded Symbol

    :param char hc: Single character string
    """

    _type = 0
    index = ord(hc)
    weight = 0
    L = six.chr(0x115F)
    V = six.chr(0x1160)
    T = None
    """
    _type:
    0 is assigned to a Johab Hangul Syllable Block or Wanseong Hangul Syllable Block.
    1 is assigned when there is only a Syllable-Final Letter.
    2 is assigned to a Halfwidth Hangul Letter.
    3 is assigned to a Hangul Compatibility Letter.
    4 is assigned to a Parenthesized Hangul Letter/Syllable Block.
    5 is assigned to a Circled Hangul Letter/Syllable Block.
    """

    if uchar.isJongseongJamo(hc):
        _type = 1
        T = hc
    elif uchar.isHalfwidthLetter(hc):
        _type = 2
        index = HWJAMO[index - 0xFFA0]
        if index == ord(hc):
            raise
    elif uchar.isCompatibilityLetter(hc):
        _type = 3
        index = CPJAMO[index - 0x3131]
    elif uchar.isParenthesizedLetter(hc):
        _type = 4
        index = PACHAR[index - 0x3200]
        if index == ord(hc):
            raise
    elif uchar.isCircledLetter(hc):
        _type = 5
        index = CLCHAR[index - 0x3260]
        if index == ord(hc):
            raise

    index = six.chr(index)
    if uchar.isChoseongJamo(index):
        L = index
    elif uchar.isJungseongJamo(index):
        V = index
    elif uchar.isJongseongJamo(index):
        T = index
    elif uchar.isPrecomposedSyllable(index):
        SIndex = decomposeHangul(index)
        L = SIndex[0]
        V = SIndex[1]
        if len(SIndex) == 3:
            T = SIndex[2]
        else:
            T = None
    else:
        return 0

    weight = getHangulWeightLVT(L, V, T, _type)
    return weight
Exemple #31
0
 def visit_range(self, node, children):
     start, dash, end = children
     return CharSet([chr(i) for i in range(ord(start), ord(end) + 1)])
Exemple #32
0
def py_scanstring(s,
                  end,
                  encoding=None,
                  strict=True,
                  _b=BACKSLASH,
                  _m=STRINGCHUNK.match):
    """Scan the string s for a JSON string. End is the index of the
    character in s after the quote that started the JSON string.
    Unescapes all valid JSON string escape sequences and raises ValueError
    on attempt to decode an invalid string. If strict is False then literal
    control characters are allowed in the string.

    Returns a tuple of the decoded string and the index of the character in s
    after the end quote."""
    if encoding is None:
        encoding = DEFAULT_ENCODING
    chunks = []
    _append = chunks.append
    begin = end - 1
    while 1:
        chunk = _m(s, end)
        if chunk is None:
            raise ValueError(
                errmsg("Unterminated string starting at", s, begin))
        end = chunk.end()
        content, terminator = chunk.groups()
        # Content is contains zero or more unescaped string characters
        if content:
            if not isinstance(content, six.text_type):
                content = six.text_type(content, encoding)
            _append(content)
        # Terminator is the end of string, a literal control character,
        # or a backslash denoting that an escape sequence follows
        if terminator == '"':
            break
        elif terminator != '\\':
            if strict:
                msg = "Invalid control character %r at" % (terminator, )
                raise ValueError(msg, s, end)
            else:
                _append(terminator)
                continue
        try:
            esc = s[end]
        except IndexError:
            raise ValueError(
                errmsg("Unterminated string starting at", s, begin))
        # If not a unicode escape sequence, must be in the lookup table
        if esc != 'u':
            try:
                char = _b[esc]
            except KeyError:
                raise ValueError(
                    errmsg("Invalid \\escape: %r" % (esc, ), s, end))
            end += 1
        else:
            # Unicode escape sequence
            esc = s[end + 1:end + 5]
            next_end = end + 5
            if len(esc) != 4:
                msg = "Invalid \\uXXXX escape"
                raise ValueError(errmsg(msg, s, end))
            uni = int(esc, 16)
            # Check for surrogate pair on UCS-4 systems
            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
                if not s[end + 5:end + 7] == '\\u':
                    raise ValueError(errmsg(msg, s, end))
                esc2 = s[end + 7:end + 11]
                if len(esc2) != 4:
                    raise ValueError(errmsg(msg, s, end))
                uni2 = int(esc2, 16)
                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
                next_end += 6
            char = chr(uni)
            end = next_end
        # Append the unescaped character
        _append(char)
    return ''.join(chunks), end
 def testEscapeValues(self):
     self.assertEqual("&", identifier.escape.parseString(r"\26")[0])
     self.assertEqual('\x81', identifier.escape.parseString("\\" + six.chr(129))[0])
     self.assertEqual("~", identifier.escape.parseString(r'\~')[0])
Exemple #34
0
 def visit_range(self, node, children):
     start, dash, end = children
     return CharSet([chr(i) for i in range(ord(start), ord(end) + 1)])