Exemple #1
0
	def scan_flow_scalar_non_spaces(self, start_mark):
		# See the specification for details.
		chunks = []
		while True:
			length = 0
			while self.peek(length) not in '\"\\\0 \t\n':
				length += 1
			if length:
				chunks.append(self.prefix(length))
				self.forward(length)
			ch = self.peek()
			if ch == '\\':
				self.forward()
				ch = self.peek()
				if ch in self.ESCAPE_REPLACEMENTS:
					chunks.append(self.ESCAPE_REPLACEMENTS[ch])
					self.forward()
				elif ch in self.ESCAPE_CODES:
					length = self.ESCAPE_CODES[ch]
					self.forward()
					for k in range(length):
						if self.peek(k) not in hexdigits:
							raise ScannerError(
								'while scanning a double-quoted scalar', start_mark,
								'expected escape sequence of %d hexdecimal numbers, but found %r' % (
									length, self.peek(k)),
								self.get_mark()
							)
					code = int(self.prefix(length), 16)
					self.forward(length)
					if 0xD800 <= code <= 0xDC00:
						# Start of the surrogate pair
						next_char = self.prefix(6)
						if (
							next_char[0] != '\\'
							or next_char[1] != 'u'
							or not (set(next_char[2:]) < hexdigits_set)
							or not (0xDC00 <= int(next_char[2:], 16) <= 0xDFFF)
						):
							raise ScannerError(
								'while scanning a double-quoted scalar', start_mark,
								'expected escape sequence with the next character in surrogate pair, but found %r' % (
									next_char
								),
								self.get_mark()
							)
						code = surrogate_pair_to_character(code, int(next_char[2:], 16))
						self.forward(6)
					chunks.append(unichr(code))
				else:
					raise ScannerError(
						'while scanning a double-quoted scalar', start_mark,
						('found unknown escape character %r' % ch), self.get_mark()
					)
			else:
				return chunks
Exemple #2
0
	def test_surrogate_pair_to_character(self):
		self.assertEqual(0x1F48E, plu.surrogate_pair_to_character(0xD83D, 0xDC8E))
Exemple #3
0
 def test_surrogate_pair_to_character(self):
     self.assertEqual(0x1F48E,
                      plu.surrogate_pair_to_character(0xD83D, 0xDC8E))