def _create_voicing_map(): """ Constructs map from kana to their voiced alternatives. """ table = kana_table.KanaTable.get_cached().get_table() voiced_line = table['か'] + table['さ'] + table['た'] double_voiced_line = table['は'] voicing_map = {} for kana in scripts.get_script(scripts.Script.Hiragana): ord_kana = ord(kana) if kana in voiced_line: voicing_map[kana] = [chr(ord_kana+1)] elif kana in double_voiced_line: voicing_map[kana] = [chr(ord_kana+1), chr(ord_kana+2)] else: voicing_map[kana] = [] # Add katakana into the mix katakana_vm = {scripts.to_katakana(k): list(map(scripts.to_katakana, v)) for k, v in iteritems(voicing_map)} voicing_map.update(katakana_vm) return voicing_map
def test_cpc(self): for cpc in self.cpcchar: l = unicodedata.normalize("NFKD", cpc) self.assertFalse(uchar.isModernChoseong(cpc), msg=cpc) self.assertFalse(uchar.isChoseongJamo(cpc), msg=cpc) self.assertFalse(uchar.isModernJungseong(cpc), msg=cpc) self.assertFalse(uchar.isJungseongJamo(cpc), msg=cpc) self.assertFalse(uchar.isModernJongseong(cpc), msg=cpc) self.assertFalse(uchar.isOldJongseong(cpc), msg=cpc) self.assertFalse(uchar.isJongseongJamo(cpc), msg=cpc) self.assertFalse(uchar.isHangulJamo(cpc), msg=cpc) self.assertFalse(uchar.isHalfwidthLetter(cpc), msg=cpc) self.assertTrue(uchar.isCompatibilityLetter(cpc), msg=cpc) self.assertFalse(uchar.isParenthesizedLetter(cpc), msg=cpc) self.assertFalse(uchar.isCircledLetter(cpc), msg=cpc) self.assertFalse(uchar.isPrecomposedSyllable(cpc), msg=cpc) self.assertTrue(uchar.isHangulLetter(cpc), msg=cpc) if uchar.isChoseongJamo(l): self.assertEqual(normalization.normalizeJamoKDKC(cpc), l + six.chr(int("1160", 16))) else: self.assertEqual( normalization.normalizeJamoKDKC(cpc), six.chr(int("115F", 16)) + six.chr(int("1160", 16)) + l)
def recomposeHangul(source): """ If one uses a UAX #15 algorithm instead of the above composeHangul function for normalization, an Old Hangul Syllable Block can be decomposed into a Wanseong Modern Hangul Syllable Block and Johab Hangul Letter(s). In such cases, after applying, one can use the following recomposition algorithm to restore a character string in Normalization Form NFC or NFKC to an L V T format. :param string source: unicode string """ length = len(source) if length == 0: return "" result = [] last = source[0] result += last for i in range(1, length): ch = source[i] # check to see if two consecutive characters are a Wanseong Modern Hangul # Syallable Block and a Syllable-Final Letter. SIndex = ord(last) - SBase if 0 <= SIndex and SIndex < SCount and (SIndex % TCount) == 0: if uchar.isOldJongseong(ch): L = LBase + SIndex // NCount V = VBase + (SIndex % NCount) // TCount result[len(result) - 1] = six.chr(L) result += six.chr(V) result += ch continue last = ch result += ch return "".join(result)
def setUp(self): self.parenthesized = list( six.chr(x) for x in range(int("3200", 16), int("321E", 16) + 1)) self.circled = list( six.chr(x) for x in range(int("3260", 16), int("327E", 16) + 1))
def _create_voicing_map(): """ Constructs map from kana to their voiced alternatives. """ table = kana_table.KanaTable.get_cached().get_table() voiced_line = table['か'] + table['さ'] + table['た'] double_voiced_line = table['は'] voicing_map = {} for kana in scripts.get_script(scripts.Script.Hiragana): ord_kana = ord(kana) if kana in voiced_line: voicing_map[kana] = [chr(ord_kana + 1)] elif kana in double_voiced_line: voicing_map[kana] = [chr(ord_kana + 1), chr(ord_kana + 2)] else: voicing_map[kana] = [] # Add katakana into the mix katakana_vm = { scripts.to_katakana(k): list(map(scripts.to_katakana, v)) for k, v in iteritems(voicing_map) } voicing_map.update(katakana_vm) return voicing_map
def sortKey(text, hangul_first=True): """ key function for sorted :param string text: A string for weight :param bool hangul_first: Boolean """ weights = [] itr = enumerate(text) for i, ch in itr: if uchar.isChoseongJamo(ch): L = ch V = six.chr(0x1160) T = None if i + 1 < len(text) and uchar.isJungseongJamo(text[i + 1]): _, V = next(itr) if i + 2 < len(text) and uchar.isJongseongJamo(text[i + 2]): _, T = next(itr) _type = 0 weight = getHangulWeightLVT(L, V, T, _type) _type = weight & 3 weight = weight >> 2 weight = weight << 1 weight = weight | _type if not hangul_first: weight += 1 << 31 elif uchar.isJongseongJamo(ch): L = six.chr(0x115F) V = ch if i + 1 < len(text) and uchar.isJongseongJamo(text[i + 1]): _, T = next(itr) _type = 0 weight = getHangulWeightLVT(L, V, T, _type) _type = weight & 3 weight = weight >> 2 weight = weight << 1 weight = weight | _type if not hangul_first: weight += 1 << 31 elif uchar.isHangulLetter(ch): weight = getHangulWeight(ch) _type = weight & 3 weight = weight >> 2 weight = weight << 1 weight = weight | _type if not hangul_first: weight += 1 << 31 else: weight = ord(ch) if hangul_first: weight += 1 << 31 weights.append(weight) return weights
def visit_escaped_numeric_character(self, node, children): [[escape, character_code]] = children if escape == '\\': # Octal escape code like '\077' return chr(int(character_code, 8)) elif escape in ('\\u', '\\x', '\\U'): # hex escape like '\xff' return chr(int(character_code, 16)) else: raise NotImplementedError('Unhandled character escape %s' % escape)
def visit_escaped_numeric_character(self, node, children): [[escape, character_code]] = children if escape == '\\': # Octal escape code like '\077' return chr(int(character_code, 8)) elif escape in ('\\u', '\\x', '\\U'): # hex escape like '\xff' return chr(int(character_code, 16)) else: raise NotImplementedError('Unhandled character escape %s' % escape)
def setUp(self): self.lchar = list( six.chr(x) for x in range(int("1100", 16), int("1112", 16) + 1)) self.vchar = list( six.chr(x) for x in range(int("1161", 16), int("1175", 16) + 1)) self.tchar = [""] + list( six.chr(x) for x in range(int("11A8", 16), int("11C2", 16) + 1)) self.old_lchar = list( six.chr(x) for x in range(int("1113", 16), int("115E", 16) + 1)) + list( six.chr(x) for x in range(int("A960", 16), int("A97C", 16) + 1)) self.old_vchar = list( six.chr(x) for x in range(int("1176", 16), int("11A7", 16) + 1)) + list( six.chr(x) for x in range(int("D7B0", 16), int("D7C6", 16) + 1)) self.old_tchar = list( six.chr(x) for x in range(int("11C3", 16), int("11FF", 16) + 1)) + list( six.chr(x) for x in range(int("D7CB", 16), int("D7FB", 16) + 1))
def bisect_string_range(start, end): """Returns a string that is approximately in the middle of the range. (start, end) is treated as a string range, and it is assumed start <= end in the usual lexicographic string ordering. The output key mid is guaranteed to satisfy start <= mid <= end. The method proceeds by comparing initial characters of start and end. When the characters are equal, they are appended to the mid string. In the first place that the characters differ, the difference characters are averaged and this average is appended to the mid string. If averaging resulted in rounding down, and additional character is added to the mid string to make up for the rounding down. This extra step is necessary for correctness in the case that the average of the two characters is equal to the character in the start string. This method makes the assumption that most keys are ascii and it attempts to perform splitting within the ascii range when that results in a valid split. Args: start: A string. end: A string such that start <= end. Returns: A string mid such that start <= mid <= end. """ if start == end: return start start += "\0" end += "\0" midpoint = [] expected_max = 127 for i in range(min(len(start), len(end))): if start[i] == end[i]: midpoint.append(start[i]) else: ord_sum = ord(start[i]) + ord(end[i]) midpoint.append(chr(ord_sum / 2)) if ord_sum % 2: if len(start) > i + 1: ord_start = ord(start[i + 1]) else: ord_start = 0 if ord_start < expected_max: ord_split = (expected_max + ord_start) / 2 else: ord_split = (0xFFFF + ord_start) / 2 midpoint.append(chr(ord_split)) break return "".join(midpoint)
def setUp(self): self.syllable = list( six.chr(x) for x in range(int("AC00", 16), int("D7A3", 16) + 1)) lchar = list( six.chr(x) for x in range(int("1100", 16), int("1112", 16) + 1)) vchar = [""] + list( six.chr(x) for x in range(int("1161", 16), int("1175", 16) + 1)) tchar = [""] + list( six.chr(x) for x in range(int("11A8", 16), int("11C2", 16) + 1)) self.lvt = itertools.product(lchar, vchar, tchar)
def handle_match(m): name = m.group(1) if name in HTML_ENTITIES: return chr(HTML_ENTITIES[name]) try: if name[:2] in ('#x', '#X'): return chr(int(name[2:], 16)) elif name.startswith('#'): return chr(int(name[1:])) except ValueError: pass return ''
def show_status(self, changed): status = '' # set 'geotagged' status if self.metadata.latlong: status += six.chr(0x2690) # set 'unsaved' status if changed: status += six.chr(0x26A1) self.status.setText(status) self._elide_name() if changed: self.image_list.new_metadata.emit(True)
def normalizeJamoKDKC(source): """ Normalizing Compatibility/Halfwidth Hangul Letters and Hangul-embedded symbols (NormalizeJamoKDKC) :param string source: unicode string """ PHBase = 0x3200 PHEnd = 0x320D CHBase = 0x3260 CHEnd = 0x326D length = len(source) if length == 0: return "" result = [] for i in range(0, length): ch = source[i] pf = 0 if uchar.isCompatibilityLetter(ch): ch = six.chr(CPJAMO[ord(ch) - 0x3131]) elif PHBase <= ord(ch) and ord(ch) <= PHEnd: result += '\u0028' ch = six.chr(PCJAMO[ord(ch) - PHBase]) pf = '\u0029' elif CHBase <= ord(ch) and ord(ch) <= CHEnd: ch = six.chr(PCJAMO[ord(ch) - CHBase]) elif uchar.isHalfwidthLetter(ch): ch = six.chr(HWJAMO[ord(ch) - 0xFFA0]) else: result += ch continue if uchar.isChoseongJamo(ch): result += ch result += '\u1160' elif uchar.isJungseongJamo(ch): result += '\u115F' result += ch elif uchar.isJongseongJamo(ch): result += '\u115F' result += '\u1160' result += ch if pf != 0: result.append(pf) return "".join(result)
def _bracket_fixup(path): if path.find('[') == -1 and path.find(']') == -1: return path sentinal = 256 while path.find(chr(sentinal)) != -1: sentinal += 1 if sentinal > 65535: log.error( 'Cannot fix brackets in path, path contains all possible sentinal characters' ) return path newpath = path.replace(']', chr(sentinal)) newpath = newpath.replace('[', '[[]') newpath = newpath.replace(chr(sentinal), '[]]') return newpath
def _unescape_one(match): if match.group(1) is not None: return match.group(1) elif match.group(2) is not None: return six.chr(int(match.group(2), 16)) else: return six.text_type()
def utf16decode(x): """Decode an escaped utf8-encoded string """ y = "" state = -1 for z in x: if state == -1: if z == "\\": state = 0 else: y += six.text_type(z) elif state == 0: if z == "u": state = 1 acc = "" else: y += six.text_type(z) state = -1 elif state < 4: state += 1 acc += z else: state = -1 y += six.chr(int(acc + z, 16)) return y
def composeHangul(source): """ returns a Wanseong Modern Hangul Syllable Block for the given Johab Modern Hangul Syllable Block. Even when a portion of an Old Hangul Syllable Block is a Modern Hangul Syllable Block, unlike UAX #15, that portion is not transformed to a Wanseong Modern Hangul Syllable Block. :param string source: unicode string. """ length = len(source) if length == 0: return "" result = [] last = source[0] result += last for i in range(1, length): ch = source[i] LIndex = ord(last) - LBase if 0 <= LIndex and LIndex < LCount: VIndex = ord(ch) - VBase if 0 <= VIndex and VIndex < VCount: last = six.chr(SBase + (LIndex * VCount + VIndex) * TCount) len_result = len(result) result[len_result - 1] = last continue SIndex = ord(last) - SBase if 0 <= SIndex and SIndex < SCount and (SIndex % TCount) == 0: TIndex = ord(ch) - TBase if 0 < TIndex and TIndex < TCount: last = six.chr(ord(last) + TIndex) len_result = len(result) result[len_result - 1] = last continue if uchar.isOldJongseong(ch): L = LBase + SIndex // NCount V = VBase + (SIndex % NCount) // TCount len_result = len(result) result[len_result - 1] = six.chr(L) result += six.chr(V) result += ch continue last = ch result += ch return "".join(result)
def setUp(self): self.lchar = list( six.chr(x) for x in range(int("1100", 16), int("1112", 16) + 1)) self.vchar = list( six.chr(x) for x in range(int("1161", 16), int("1175", 16) + 1)) self.tchar = list( six.chr(x) for x in range(int("11A8", 16), int("11C2", 16) + 1)) self.cpcchar = list( six.chr(x) for x in range(int("3131", 16), int("314E", 16) + 1)) self.cpvchar = list( six.chr(x) for x in range(int("314F", 16), int("3163", 16) + 1))
def _replace_entity(match): text = match.group(1) if text[0] == '#': text = text[1:] try: if text[0] in 'xX': c = int(text[1:], 16) else: c = int(text) return chr(c) except ValueError: return match.group(0) else: try: return chr(name2codepoint[text]) except (ValueError, KeyError): return match.group(0)
def decomposeHangul(S): """ returns a Johab Modern Hangul Syllable Block for the given Wanseong Modern Hangul Syllable Block :param char S: Single character Hangul Syllable. If not, return input. """ SIndex = ord(S) - SBase if SIndex < 0 or SIndex >= SCount: return S result = "" L = LBase + SIndex // NCount V = VBase + (SIndex % NCount) // TCount T = TBase + SIndex % TCount result += six.chr(L) result += six.chr(V) if T != TBase: result += six.chr(T) return result
def _create_key_event(self, event_type, event): """ Convert a GUI toolkit keyboard event into a KeyEvent. """ if self.focus_owner is None: focus_owner = self.component else: focus_owner = self.focus_owner if focus_owner is not None: if event_type == 'character': key = six.chr(event.GetUniChar()) if not key: return None else: key_code = event.GetKeyCode() if key_code in KEY_MAP: key = KEY_MAP.get(key_code) else: key = six.chr(event.GetUniChar()).lower() # Use the last-seen mouse coordinates instead of GetX/GetY due # to wx bug. x, y = self._last_mouse_pos # Someday when wx does this properly, we can use these instead: # x = event.GetX() # y = event.GetY() return KeyEvent(event_type=event_type, character=key, alt_down=event.AltDown(), control_down=event.ControlDown(), shift_down=event.ShiftDown(), x=x, y=self._flip_y(y), event=event, window=self) else: event.Skip() return None
def ntou(n, encoding='ISO-8859-1'): """Return the given native string as a unicode string with the given encoding.""" # In Python 2, the native string type is bytes. # First, check for the special encoding 'escape'. The test suite uses this # to signal that it wants to pass a string with embedded \uXXXX escapes, # but without having to prefix it with u'' for Python 2, but no prefix # for Python 3. if encoding == 'escape': return six.text_type( re.sub(r'\\u([0-9a-zA-Z]{4})', lambda m: chr(int(m.group(1), 16)), n.decode('ISO-8859-1'))) # Assume it's already in the given encoding, which for ISO-8859-1 is almost # always what was intended. return n.decode(encoding)
def show_plot(OO, excursi, rmsdpos, minimum): excursi.reshape(flex.grid(len(OO.grid), len(OO.grid))) rmsdpos.reshape(flex.grid(len(OO.grid), len(OO.grid))) from matplotlib import pyplot as plt plt.figure() CS = plt.contour([i * 0.02 for i in OO.grid], [i * 0.02 for i in OO.grid], excursi.as_numpy_array()) plt.clabel(CS, inline=1, fontsize=10, fmt="%6.3f" + chr(176)) plt.plot([minimum[1] * 180. / math.pi], [minimum[0] * 180. / math.pi], "r+") plt.title("Rms rotational excursion to reflection condition, degrees") plt.axes().set_aspect("equal") plt.figure() CS = plt.contour([i * 0.02 for i in OO.grid], [i * 0.02 for i in OO.grid], rmsdpos.as_numpy_array()) plt.clabel(CS, inline=1, fontsize=10, fmt="%7.4f px") plt.title("Rms position shift, obs vs. model, pixels") plt.axes().set_aspect("equal") plt.show()
def _create_key_event(self, event_type, event): focus_owner = self.focus_owner if focus_owner is None: focus_owner = self.component if focus_owner is None: event.ignore() return None if event_type == 'character': key = six.text_type(event.text()) else: # Convert the keypress to a standard enable key if possible, otherwise # to text. key_code = event.key() key = KEY_MAP.get(key_code) if key is None: key = six.chr(key_code).lower() if not key: return None # Use the last-seen mouse position as the coordinates of this event. x, y = self.control.handler.last_mouse_pos modifiers = event.modifiers() return KeyEvent(event_type=event_type, character=key, x=x, y=self._flip_y(y), alt_down=bool(modifiers & QtCore.Qt.AltModifier), shift_down=bool(modifiers & QtCore.Qt.ShiftModifier), control_down=bool(modifiers & QtCore.Qt.ControlModifier), event=event, window=self)
def _split_id_or_name(id_or_name1, id_or_name2, batch_size, maintain_batches): """Return an id_or_name that is between id_or_name1 an id_or_name2. Attempts to split the range [id_or_name1, id_or_name2] in half, unless maintain_batches is true and the size of the range [id_or_name1, id_or_name2] is less than or equal to batch_size. Args: id_or_name1: A number or string or the id_or_name component of a key id_or_name2: A number or string or the id_or_name component of a key batch_size: The range size that will not be split if maintain_batches is true. maintain_batches: A boolean for whether to keep small ranges intact. Returns: An id_or_name such that id_or_name1 <= id_or_name <= id_or_name2. """ if (isinstance(id_or_name1, six.integer_types) and isinstance(id_or_name2, six.integer_types)): if not maintain_batches or id_or_name2 - id_or_name1 > batch_size: return (id_or_name1 + id_or_name2) / 2 else: return id_or_name1 elif (isinstance(id_or_name1, six.string_types) and isinstance(id_or_name2, six.string_types)): return KeyRange.bisect_string_range(id_or_name1, id_or_name2) else: if (not isinstance(id_or_name1, six.integer_types) or not isinstance(id_or_name2, six.string_types)): raise KeyRangeError("Wrong key order: %r, %r" % (id_or_name1, id_or_name2)) zero_ch = chr(0) if id_or_name2 == zero_ch: return (id_or_name1 + 2**63 - 1) / 2 return zero_ch
def __init__(self, *args, **kw): super(LocationInfo, self).__init__(*args, **kw) layout = QtWidgets.QGridLayout() self.setLayout(layout) layout.setContentsMargins(0, 0, 0, 0) self.members = { 'taken': LocationWidgets(self), 'shown': LocationWidgets(self) } self.swap = SquareButton(six.chr(0x21c4)) self.swap.setStyleSheet('QPushButton { font-size: 10px }') self.swap.setFont(QtGui.QFont("Dejavu Sans")) if not self.swap.fontInfo().exactMatch(): # probably on Windows, try a different font self.swap.setFont(QtGui.QFont("Segoe UI Symbol")) layout.addWidget(self.swap, 0, 4) label = QtWidgets.QLabel(translate('PhotiniMap', 'camera')) layout.addWidget(label, 0, 1, 1, 2) label = QtWidgets.QLabel(translate('PhotiniMap', 'subject')) layout.addWidget(label, 0, 3) layout.addWidget(QtWidgets.QLabel(translate('PhotiniMap', 'Street:')), 1, 0) layout.addWidget(QtWidgets.QLabel(translate('PhotiniMap', 'City:')), 2, 0) layout.addWidget( QtWidgets.QLabel(translate('PhotiniMap', 'Province:')), 3, 0) layout.addWidget(QtWidgets.QLabel(translate('PhotiniMap', 'Country:')), 4, 0) layout.addWidget(QtWidgets.QLabel(translate('PhotiniMap', 'Region:')), 5, 0) for ts, col in (('taken', 1), ('shown', 3)): layout.addWidget(self.members[ts]['sublocation'], 1, col, 1, 2) layout.addWidget(self.members[ts]['city'], 2, col, 1, 2) layout.addWidget(self.members[ts]['province_state'], 3, col, 1, 2) layout.addWidget(self.members[ts]['country_name'], 4, col) layout.addWidget(self.members[ts]['country_code'], 4, col + 1) layout.addWidget(self.members[ts]['world_region'], 5, col, 1, 2)
def on_char(self, event): self.delete_selection() c = six.chr(event.GetUnicodeKey()) self.__tokens.insert(self.__cursor_pos, c) self.move_cursor_pos(self.__cursor_pos + 1) self.on_token_change()
def convertToUnicode(t): return six.chr(int(t[0], 16))
def getHangulWeight(hc): """ determine a weight for a Wanseong Hangul Syllable Block, a Hangul Letter or Hangul-embedded Symbol :param char hc: Single character string """ _type = 0 index = ord(hc) weight = 0 L = six.chr(0x115F) V = six.chr(0x1160) T = None """ _type: 0 is assigned to a Johab Hangul Syllable Block or Wanseong Hangul Syllable Block. 1 is assigned when there is only a Syllable-Final Letter. 2 is assigned to a Halfwidth Hangul Letter. 3 is assigned to a Hangul Compatibility Letter. 4 is assigned to a Parenthesized Hangul Letter/Syllable Block. 5 is assigned to a Circled Hangul Letter/Syllable Block. """ if uchar.isJongseongJamo(hc): _type = 1 T = hc elif uchar.isHalfwidthLetter(hc): _type = 2 index = HWJAMO[index - 0xFFA0] if index == ord(hc): raise elif uchar.isCompatibilityLetter(hc): _type = 3 index = CPJAMO[index - 0x3131] elif uchar.isParenthesizedLetter(hc): _type = 4 index = PACHAR[index - 0x3200] if index == ord(hc): raise elif uchar.isCircledLetter(hc): _type = 5 index = CLCHAR[index - 0x3260] if index == ord(hc): raise index = six.chr(index) if uchar.isChoseongJamo(index): L = index elif uchar.isJungseongJamo(index): V = index elif uchar.isJongseongJamo(index): T = index elif uchar.isPrecomposedSyllable(index): SIndex = decomposeHangul(index) L = SIndex[0] V = SIndex[1] if len(SIndex) == 3: T = SIndex[2] else: T = None else: return 0 weight = getHangulWeightLVT(L, V, T, _type) return weight
def visit_range(self, node, children): start, dash, end = children return CharSet([chr(i) for i in range(ord(start), ord(end) + 1)])
def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError on attempt to decode an invalid string. If strict is False then literal control characters are allowed in the string. Returns a tuple of the decoded string and the index of the character in s after the end quote.""" if encoding is None: encoding = DEFAULT_ENCODING chunks = [] _append = chunks.append begin = end - 1 while 1: chunk = _m(s, end) if chunk is None: raise ValueError( errmsg("Unterminated string starting at", s, begin)) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: if not isinstance(content, six.text_type): content = six.text_type(content, encoding) _append(content) # Terminator is the end of string, a literal control character, # or a backslash denoting that an escape sequence follows if terminator == '"': break elif terminator != '\\': if strict: msg = "Invalid control character %r at" % (terminator, ) raise ValueError(msg, s, end) else: _append(terminator) continue try: esc = s[end] except IndexError: raise ValueError( errmsg("Unterminated string starting at", s, begin)) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: raise ValueError( errmsg("Invalid \\escape: %r" % (esc, ), s, end)) end += 1 else: # Unicode escape sequence esc = s[end + 1:end + 5] next_end = end + 5 if len(esc) != 4: msg = "Invalid \\uXXXX escape" raise ValueError(errmsg(msg, s, end)) uni = int(esc, 16) # Check for surrogate pair on UCS-4 systems if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: msg = "Invalid \\uXXXX\\uXXXX surrogate pair" if not s[end + 5:end + 7] == '\\u': raise ValueError(errmsg(msg, s, end)) esc2 = s[end + 7:end + 11] if len(esc2) != 4: raise ValueError(errmsg(msg, s, end)) uni2 = int(esc2, 16) uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) next_end += 6 char = chr(uni) end = next_end # Append the unescaped character _append(char) return ''.join(chunks), end
def testEscapeValues(self): self.assertEqual("&", identifier.escape.parseString(r"\26")[0]) self.assertEqual('\x81', identifier.escape.parseString("\\" + six.chr(129))[0]) self.assertEqual("~", identifier.escape.parseString(r'\~')[0])
def visit_range(self, node, children): start, dash, end = children return CharSet([chr(i) for i in range(ord(start), ord(end) + 1)])