def scan_flow_scalar_non_spaces(self, start_mark): # See the specification for details. chunks = [] while True: length = 0 while self.peek(length) not in '\"\\\0 \t\n': length += 1 if length: chunks.append(self.prefix(length)) self.forward(length) ch = self.peek() if ch == '\\': self.forward() ch = self.peek() if ch in self.ESCAPE_REPLACEMENTS: chunks.append(self.ESCAPE_REPLACEMENTS[ch]) self.forward() elif ch in self.ESCAPE_CODES: length = self.ESCAPE_CODES[ch] self.forward() for k in range(length): if self.peek(k) not in hexdigits: raise ScannerError( 'while scanning a double-quoted scalar', start_mark, 'expected escape sequence of %d hexdecimal numbers, but found %r' % ( length, self.peek(k)), self.get_mark() ) code = int(self.prefix(length), 16) self.forward(length) if 0xD800 <= code <= 0xDC00: # Start of the surrogate pair next_char = self.prefix(6) if ( next_char[0] != '\\' or next_char[1] != 'u' or not (set(next_char[2:]) < hexdigits_set) or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF) ): raise ScannerError( 'while scanning a double-quoted scalar', start_mark, 'expected escape sequence with the next character in surrogate pair, but found %r' % ( next_char ), self.get_mark() ) code = surrogate_pair_to_character(code, int(next_char[2:], 16)) self.forward(6) chunks.append(unichr(code)) else: raise ScannerError( 'while scanning a double-quoted scalar', start_mark, ('found unknown escape character %r' % ch), self.get_mark() ) else: return chunks
def test_surrogate_pair_to_character(self): self.assertEqual(0x1F48E, plu.surrogate_pair_to_character(0xD83D, 0xDC8E))