def test_asciirestriction(self): if sys.version_info < (3,): self.assertRaises(UnicodeDecodeError, nodes.Text, b('hol%s' % chr(224))) else: # no bytes at all allowed self.assertRaises(TypeError, nodes.Text, b('hol'))
def write(self, data): """Encode `data`, write it to a single file, and return it. With Python 3 or binary output mode, `data` is returned unchanged, except when specified encoding and output encoding differ. """ if not self.opened: self.open() if ('b' not in self.mode and sys.version_info < (3,0) or check_encoding(self.destination, self.encoding) is False ): data = self.encode(data) if sys.version_info >= (3,0) and os.linesep != '\n': data = data.replace(b('\n'), b(os.linesep)) # fix endings try: # In Python < 2.5, try...except has to be nested in try...finally. try: self.destination.write(data) except TypeError, e: if sys.version_info >= (3,0) and isinstance(data, bytes): try: self.destination.buffer.write(data) except AttributeError: if check_encoding(self.destination, self.encoding) is False: raise ValueError('Encoding of %s (%s) differs \n' ' from specified encoding (%s)' % (self.destination_path or 'destination', self.destination.encoding, self.encoding)) else: raise e except (UnicodeError, LookupError), err: raise UnicodeError( 'Unable to encode output data. output-encoding is: ' '%s.\n(%s)' % (self.encoding, ErrorString(err)))
def write(self, data): """Encode `data`, write it to a single file, and return it. With Python 3 or binary output mode, `data` is returned unchanged, except when specified encoding and output encoding differ. """ if not self.opened: self.open() if ('b' not in self.mode and sys.version_info < (3, 0) or check_encoding(self.destination, self.encoding) is False): data = self.encode(data) if sys.version_info >= (3, 0) and os.linesep != '\n': data = data.replace(b('\n'), b(os.linesep)) # fix endings try: # In Python < 2.5, try...except has to be nested in try...finally. try: self.destination.write(data) except TypeError, e: if sys.version_info >= (3, 0) and isinstance(data, bytes): try: self.destination.buffer.write(data) except AttributeError: if check_encoding(self.destination, self.encoding) is False: raise ValueError( 'Encoding of %s (%s) differs \n' ' from specified encoding (%s)' % (self.destination_path or 'destination', self.destination.encoding, self.encoding)) else: raise e except (UnicodeError, LookupError), err: raise UnicodeError( 'Unable to encode output data. output-encoding is: ' '%s.\n(%s)' % (self.encoding, ErrorString(err)))
def read(self): """Read and decode a single file and return the data (Unicode string).""" try: if self.source is sys.stdin and sys.version_info >= (3, 0): # read as binary data to circumvent auto-decoding data = self.source.buffer.read() # normalize newlines data = b('\n').join(data.splitlines()) + b('\n') else: data = self.source.read() except (UnicodeError, LookupError): # (in Py3k read() decodes) if not self.encoding and self.source_path: # re-read in binary mode and decode with heuristics b_source = open(self.source_path, 'rb') data = b_source.read() b_source.close() # normalize newlines data = b('\n').join(data.splitlines()) + b('\n') else: raise finally: if self.autoclose: self.close() return substitute_source(app, self.source_path, self.decode(data))
def read(self): """ Read and decode a single file and return the data (Unicode string). """ try: # In Python < 2.5, try...except has to be nested in try...finally. try: if self.source is sys.stdin and sys.version_info >= (3,0): # read as binary data to circumvent auto-decoding data = self.source.buffer.read() # normalize newlines data = b('\n').join(data.splitlines()) + b('\n') else: data = self.source.read() except (UnicodeError, LookupError), err: # (in Py3k read() decodes) if not self.encoding and self.source_path: # re-read in binary mode and decode with heuristics b_source = open(self.source_path, 'rb') data = b_source.read() b_source.close() # normalize newlines data = b('\n').join(data.splitlines()) + b('\n') else: raise finally: if self.autoclose: self.close() return self.decode(data)
def test_coding_slug(self): input = io.StringInput( source=b( """\ .. -*- coding: ascii -*- data blah """ ) ) data = input.read() self.assertEqual(input.successful_encoding, "ascii") input = io.StringInput( source=b( """\ #! python # -*- coding: ascii -*- print "hello world" """ ) ) data = input.read() self.assertEqual(input.successful_encoding, "ascii") input = io.StringInput( source=b( """\ #! python # extraneous comment; prevents coding slug from being read # -*- coding: ascii -*- print "hello world" """ ) ) data = input.read() self.assertNotEqual(input.successful_encoding, "ascii")
def test_asciirestriction(self): if sys.version_info < (3, ): self.assertRaises(UnicodeDecodeError, nodes.Text, b('hol%s' % chr(224))) else: # no bytes at all allowed self.assertRaises(TypeError, nodes.Text, b('hol'))
def test_xmlcharrefreplace(self): # Test that xmlcharrefreplace is the default output encoding # error handler. settings_overrides={ 'output_encoding': 'latin1', 'stylesheet': '', '_disable_config': 1,} result = core.publish_string( b('EUR = \xe2\x82\xac'), writer_name='html4css1', settings_overrides=settings_overrides) # Encoding a euro sign with latin1 doesn't work, so the # xmlcharrefreplace handler is used. self.assert_(result.find(b('EUR = €')) != -1)
def test_inputrestrictions(self): parser_class = parsers.get_parser_class('rst') parser = parser_class() document = utils.new_document('test data', frontend.OptionParser( components=(parser, )).get_default_values()) if sys.version_info < (3,): # supplying string input is supported, but only if ascii-decodable self.assertRaises(UnicodeDecodeError, parser.parse, b('hol%s' % chr(224)), document) else: # input must be unicode at all times self.assertRaises(TypeError, parser.parse, b('hol'), document)
def test_inputrestrictions(self): parser_class = parsers.get_parser_class('rst') parser = parser_class() document = utils.new_document('test data', frontend.OptionParser( components=(parser, )).get_default_values()) if sys.version_info < (3,): # supplying string input is supported, but only if ascii-decodable self.assertRaises(UnicodeError, # UnicodeDecodeError since py2.3 parser.parse, b('hol%s' % chr(224)), document) else: # input must be unicode at all times self.assertRaises(TypeError, parser.parse, b('hol'), document)
def test_extract_extension_options(self): field_list = nodes.field_list() field_list += nodes.field( '', nodes.field_name('', 'a'), nodes.field_body('', nodes.paragraph('', '1'))) field_list += nodes.field( '', nodes.field_name('', 'bbb'), nodes.field_body('', nodes.paragraph('', '2.0'))) field_list += nodes.field( '', nodes.field_name('', 'cdef'), nodes.field_body( '', nodes.paragraph('', unicode(b('hol%s' % chr(224)), 'iso-8859-1')))) field_list += nodes.field('', nodes.field_name('', 'empty'), nodes.field_body()) self.assertEquals( utils.extract_extension_options(field_list, self.optionspec), { 'a': 1, 'bbb': 2.0, 'cdef': unicode(b('hol%s' % chr(224)), 'iso-8859-1'), 'empty': None }) self.assertRaises(KeyError, utils.extract_extension_options, field_list, {}) field_list += nodes.field( '', nodes.field_name('', 'cdef'), nodes.field_body('', nodes.paragraph('', 'one'), nodes.paragraph('', 'two'))) self.assertRaises(utils.BadOptionDataError, utils.extract_extension_options, field_list, self.optionspec) field_list[-1] = nodes.field( '', nodes.field_name('', 'cdef bad'), nodes.field_body('', nodes.paragraph('', 'no arguments'))) self.assertRaises(utils.BadOptionError, utils.extract_extension_options, field_list, self.optionspec) field_list[-1] = nodes.field( '', nodes.field_name('', 'cdef'), nodes.field_body('', nodes.paragraph('', 'duplicate'))) self.assertRaises(utils.DuplicateOptionError, utils.extract_extension_options, field_list, self.optionspec) field_list[-2] = nodes.field( '', nodes.field_name('', 'unkown'), nodes.field_body('', nodes.paragraph('', 'unknown'))) self.assertRaises(KeyError, utils.extract_extension_options, field_list, self.optionspec)
def test_ubuf(self): buf = UBuf() # buffer only accepting unicode string # decode of binary strings e = ErrorOutput(buf, encoding='ascii') e.write(b('b\xfc')) self.assertEquals(buf.getvalue(), u'b\ufffd') # use REPLACEMENT CHARACTER # write Unicode string and Exceptions with Unicode args e.write(u' u\xfc') self.assertEquals(buf.getvalue(), u'b\ufffd u\xfc') e.write(AttributeError(u' e\xfc')) self.assertEquals(buf.getvalue(), u'b\ufffd u\xfc e\xfc') # decode with `encoding` attribute e.encoding = 'latin1' e.write(b(' b\xfc')) self.assertEquals(buf.getvalue(), u'b\ufffd u\xfc e\xfc b\xfc')
def test_ubuf(self): buf = UBuf() # buffer only accepting unicode string # decode of binary strings e = ErrorOutput(buf, encoding='ascii') e.write(b('b\xfc')) self.assertEqual(buf.getvalue(), u'b\ufffd') # use REPLACEMENT CHARACTER # write Unicode string and Exceptions with Unicode args e.write(u' u\xfc') self.assertEqual(buf.getvalue(), u'b\ufffd u\xfc') e.write(AttributeError(u' e\xfc')) self.assertEqual(buf.getvalue(), u'b\ufffd u\xfc e\xfc') # decode with `encoding` attribute e.encoding = 'latin1' e.write(b(' b\xfc')) self.assertEqual(buf.getvalue(), u'b\ufffd u\xfc e\xfc b\xfc')
def system_message(self, level, message, *children, **kwargs): """ Return a system_message object. Raise an exception or generate a warning if appropriate. """ attributes = kwargs.copy() if 'base_node' in kwargs: source, line = get_source_line(kwargs['base_node']) del attributes['base_node'] if source is not None: attributes.setdefault('source', source) if line is not None: attributes.setdefault('line', line) attributes.setdefault('source', self.source) msg = nodes.system_message(message, level=level, type=self.levels[level], *children, **attributes) if self.stream and (level >= self.report_level or self.debug_flag and level == self.DEBUG_LEVEL or level >= self.halt_level): msgtext = msg.astext().encode(self.encoding, self.error_handler) self.stream.write(msgtext) self.stream.write(b('\n')) if level >= self.halt_level: raise SystemMessage(msg, level) if level > self.DEBUG_LEVEL or self.debug_flag: self.notify_observers(msg) self.max_level = max(level, self.max_level) return msg
class SafeStringTests_locale(unittest.TestCase): """ Test docutils.SafeString with 'problematic' locales. The error message in `EnvironmentError` instances comes from the OS and in some locales (e.g. ru_RU), contains high bit chars. """ if testlocale: locale.setlocale(locale.LC_ALL, testlocale) # test data: bs = b('\xfc') us = u'\xfc' try: open(b('\xfc')) except IOError, e: # in Python 3 the name for the exception instance bioe = e # is local to the except clause
def test_publish(self): settings = {'input_encoding': 'utf8', 'output_encoding': 'iso-8859-1', '_disable_config': 1} for settings['newlines'] in 0, 1: for settings['indents'] in 0, 1: for settings['xml_declaration'] in 0, 1: for settings['doctype_declaration'] in 0, 1: expected = b('') if settings['xml_declaration']: expected += self.xmldecl if settings['doctype_declaration']: expected += self.doctypedecl expected += self.generatedby if settings['indents']: expected += self.bodyindents elif settings['newlines']: expected += self.bodynewlines else: expected += self.bodynormal self.assertEqual(docutils.core.publish_string (source=self.input, reader_name='standalone', writer_name='docutils_xml', settings_overrides=settings), expected)
def copy_file(self, name, source_dir, dest_dir): """ Copy file `name` from `source_dir` to `dest_dir`. Return 1 if the file exists in either `source_dir` or `dest_dir`. """ source = os.path.join(source_dir, name) dest = os.path.join(dest_dir, name) if dest in self.theme_files_copied: return 1 else: self.theme_files_copied[dest] = 1 if os.path.isfile(source): if self.files_to_skip_pattern.search(source): return None settings = self.document.settings if os.path.exists(dest) and not settings.overwrite_theme_files: settings.record_dependencies.add(dest) else: src_file = open(source, 'rb') src_data = src_file.read() src_file.close() dest_file = open(dest, 'wb') dest_dir = dest_dir.replace(os.sep, '/') dest_file.write(src_data.replace( b('ui/default'), dest_dir[dest_dir.rfind('ui/'):].encode( sys.getfilesystemencoding()))) dest_file.close() settings.record_dependencies.add(source) return 1 if os.path.isfile(dest): return 1
def copy_file(self, name, source_dir, dest_dir): """ Copy file `name` from `source_dir` to `dest_dir`. Return 1 if the file exists in either `source_dir` or `dest_dir`. """ source = os.path.join(source_dir, name) dest = os.path.join(dest_dir, name) if dest in self.theme_files_copied: return 1 else: self.theme_files_copied[dest] = 1 if os.path.isfile(source): if self.files_to_skip_pattern.search(source): return None settings = self.document.settings if os.path.exists(dest) and not settings.overwrite_theme_files: settings.record_dependencies.add(dest) else: src_file = open(source, 'rb') src_data = src_file.read() src_file.close() dest_file = open(dest, 'wb') dest_dir = dest_dir.replace(os.sep, '/') dest_file.write( src_data.replace( b('ui/default'), dest_dir[dest_dir.rfind('ui/'):].encode( sys.getfilesystemencoding()))) dest_file.close() settings.record_dependencies.add(source) return 1 if os.path.isfile(dest): return 1
def test_decode_unicode(self): # With the special value "unicode" or "Unicode": uniinput = io.Input(encoding='unicode') # keep unicode instances as-is self.assertEqual(uniinput.decode(u'ja'), u'ja') # raise AssertionError if data is not an unicode string self.assertRaises(AssertionError, uniinput.decode, b('ja'))
def test_level4(self): self.assertRaises(utils.SystemMessage, self.reporter.system_message, 4, 'a severe error, raises an exception') self.assertEquals( self.stream.getvalue(), b('test data:: (SEVERE/4) ' 'a severe error, raises an exception\n'))
def test_publish(self): settings = { 'input_encoding': 'utf8', 'output_encoding': 'iso-8859-1', '_disable_config': 1 } for settings['newlines'] in 0, 1: for settings['indents'] in 0, 1: for settings['xml_declaration'] in 0, 1: for settings['doctype_declaration'] in 0, 1: expected = b('') if settings['xml_declaration']: expected += self.xmldecl if settings['doctype_declaration']: expected += self.doctypedecl expected += self.generatedby if settings['indents']: expected += self.bodyindents elif settings['newlines']: expected += self.bodynewlines else: expected += self.bodynormal self.assertEqual( docutils.core.publish_string( source=self.input, reader_name='standalone', writer_name='docutils_xml', settings_overrides=settings), expected)
def test_bom(self): input = io.StringInput(source=b("\xef\xbb\xbf foo \xef\xbb\xbf bar"), encoding="utf8") # Assert BOMs are gone. self.assertEqual(input.read(), u" foo bar") # With unicode input: input = io.StringInput(source=u"\ufeff foo \ufeff bar") # Assert BOMs are still there. self.assertEqual(input.read(), u"\ufeff foo \ufeff bar")
def test_error(self): sw = self.reporter.error('an error') self.assertEquals(sw.pformat(), """\ <system_message level="3" source="test data" type="ERROR"> <paragraph> an error """) self.assertEquals(self.stream.getvalue(), b(''))
def test_level1(self): sw = self.reporter.system_message(1, 'a little reminder') self.assertEquals(sw.pformat(), """\ <system_message level="1" source="test data" type="INFO"> <paragraph> a little reminder """) self.assertEquals(self.stream.getvalue(), b(''))
def test_bbuf(self): buf = BBuf() # buffer storing byte string e = ErrorOutput(buf, encoding='ascii') # write byte-string as-is e.write(b('b\xfc')) self.assertEqual(buf.getvalue(), b('b\xfc')) # encode unicode data with backslashescape fallback replacement: e.write(u' u\xfc') self.assertEqual(buf.getvalue(), b('b\xfc u\\xfc')) # handle Exceptions with Unicode string args # unicode(Exception(u'e\xfc')) # fails in Python < 2.6 e.write(AttributeError(u' e\xfc')) self.assertEqual(buf.getvalue(), b('b\xfc u\\xfc e\\xfc')) # encode with `encoding` attribute e.encoding = 'utf8' e.write(u' u\xfc') self.assertEqual(buf.getvalue(), b('b\xfc u\\xfc e\\xfc u\xc3\xbc'))
def test_info(self): sw = self.reporter.info('an informational message') self.assertEquals(sw.pformat(), """\ <system_message level="1" source="test data" type="INFO"> <paragraph> an informational message """) self.assertEquals(self.stream.getvalue(), b(''))
def test_warning(self): sw = self.reporter.warning('a warning') self.assertEquals(sw.pformat(), """\ <system_message level="2" source="test data" type="WARNING"> <paragraph> a warning """) self.assertEquals(self.stream.getvalue(), b(''))
def test_bbuf(self): buf = BBuf() # buffer storing byte string e = ErrorOutput(buf, encoding='ascii') # write byte-string as-is e.write(b('b\xfc')) self.assertEqual(buf.getvalue(), b('b\xfc')) # encode unicode data with backslashescape fallback replacement: e.write(' u\xfc') self.assertEqual(buf.getvalue(), b('b\xfc u\\xfc')) # handle Exceptions with Unicode string args # unicode(Exception(u'e\xfc')) # fails in Python < 2.6 e.write(AttributeError(' e\xfc')) self.assertEqual(buf.getvalue(), b('b\xfc u\\xfc e\\xfc')) # encode with `encoding` attribute e.encoding = 'utf8' e.write(' u\xfc') self.assertEqual(buf.getvalue(), b('b\xfc u\\xfc e\\xfc u\xc3\xbc'))
def test_severe(self): sw = self.reporter.severe('a severe error') self.assertEquals(sw.pformat(), """\ <system_message level="4" source="test data" type="SEVERE"> <paragraph> a severe error """) self.assertEquals(self.stream.getvalue(), b(''))
def test_extract_extension_options(self): field_list = nodes.field_list() field_list += nodes.field( '', nodes.field_name('', 'a'), nodes.field_body('', nodes.paragraph('', '1'))) field_list += nodes.field( '', nodes.field_name('', 'bbb'), nodes.field_body('', nodes.paragraph('', '2.0'))) field_list += nodes.field( '', nodes.field_name('', 'cdef'), nodes.field_body('', nodes.paragraph('', unicode(b('hol%s' % chr(224)), 'iso-8859-1')))) field_list += nodes.field( '', nodes.field_name('', 'empty'), nodes.field_body()) self.assertEquals( utils.extract_extension_options(field_list, self.optionspec), {'a': 1, 'bbb': 2.0, 'cdef': unicode(b('hol%s' % chr(224)), 'iso-8859-1'), 'empty': None}) self.assertRaises(KeyError, utils.extract_extension_options, field_list, {}) field_list += nodes.field( '', nodes.field_name('', 'cdef'), nodes.field_body('', nodes.paragraph('', 'one'), nodes.paragraph('', 'two'))) self.assertRaises(utils.BadOptionDataError, utils.extract_extension_options, field_list, self.optionspec) field_list[-1] = nodes.field( '', nodes.field_name('', 'cdef bad'), nodes.field_body('', nodes.paragraph('', 'no arguments'))) self.assertRaises(utils.BadOptionError, utils.extract_extension_options, field_list, self.optionspec) field_list[-1] = nodes.field( '', nodes.field_name('', 'cdef'), nodes.field_body('', nodes.paragraph('', 'duplicate'))) self.assertRaises(utils.DuplicateOptionError, utils.extract_extension_options, field_list, self.optionspec) field_list[-2] = nodes.field( '', nodes.field_name('', 'unkown'), nodes.field_body('', nodes.paragraph('', 'unknown'))) self.assertRaises(KeyError, utils.extract_extension_options, field_list, self.optionspec)
def test_level0(self): sw = self.reporter.system_message(0, 'debug output') self.assertEquals(sw.pformat(), """\ <system_message level="0" source="test data" type="DEBUG"> <paragraph> debug output """) self.assertEquals(self.stream.getvalue(), b('test data:: (DEBUG/0) debug output\n'))
def test_severe(self): sw = self.reporter.severe('a severe error') self.assertEquals( sw.pformat(), """\ <system_message level="4" source="test data" type="SEVERE"> <paragraph> a severe error """) self.assertEquals(self.stream.getvalue(), b(''))
def test_bom(self): input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'), encoding='utf8') # Assert BOMs are gone. self.assertEqual(input.read(), u' foo bar') # With unicode input: input = io.StringInput(source=u'\ufeff foo \ufeff bar') # Assert BOMs are still there. self.assertEqual(input.read(), u'\ufeff foo \ufeff bar')
def test_level1(self): sw = self.reporter.system_message(1, 'a little reminder') self.assertEquals( sw.pformat(), """\ <system_message level="1" source="test data" type="INFO"> <paragraph> a little reminder """) self.assertEquals(self.stream.getvalue(), b(''))
def test_info(self): sw = self.reporter.info('an informational message') self.assertEquals( sw.pformat(), """\ <system_message level="1" source="test data" type="INFO"> <paragraph> an informational message """) self.assertEquals(self.stream.getvalue(), b(''))
def test_error(self): sw = self.reporter.error('an error') self.assertEquals( sw.pformat(), """\ <system_message level="3" source="test data" type="ERROR"> <paragraph> an error """) self.assertEquals(self.stream.getvalue(), b(''))
def test_warning(self): sw = self.reporter.warning('a warning') self.assertEquals( sw.pformat(), """\ <system_message level="2" source="test data" type="WARNING"> <paragraph> a warning """) self.assertEquals(self.stream.getvalue(), b(''))
def test_bom(self): input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'), encoding='utf8') # Assert BOMs are gone. self.assertEquals(input.read(), u' foo bar') # With unicode input: input = io.StringInput(source=u'\ufeff foo \ufeff bar') # Assert BOMs are still there. self.assertEquals(input.read(), u'\ufeff foo \ufeff bar')
def test_level0(self): sw = self.reporter.system_message(0, 'debug output') self.assertEquals( sw.pformat(), """\ <system_message level="0" source="test data" type="DEBUG"> <paragraph> debug output """) self.assertEquals(self.stream.getvalue(), b('test data:: (DEBUG/0) debug output\n'))
class SafeStringTests(unittest.TestCase): # the error message in EnvironmentError instances comes from the OS # and in some locales (e.g. ru_RU), contains high bit chars. # -> see the test in test_error_reporting.py # test data: bs = b('\xfc') # unicode(bs) fails, str(bs) in Python 3 return repr() us = '\xfc' # bytes(us) fails; str(us) fails in Python 2 be = Exception(bs) # unicode(be) fails ue = Exception(us) # bytes(ue) fails, str(ue) fails in Python 2; # unicode(ue) fails in Python < 2.6 (issue2517_) # .. _issue2517: http://bugs.python.org/issue2517 # wrapped test data: wbs = SafeString(bs) wus = SafeString(us) wbe = SafeString(be) wue = SafeString(ue) def test_7bit(self): # wrapping (not required with 7-bit chars) must not change the # result of conversions: bs7 = b('foo') us7 = 'foo' be7 = Exception(bs7) ue7 = Exception(us7) self.assertEqual(str(42), str(SafeString(42))) self.assertEqual(str(bs7), str(SafeString(bs7))) self.assertEqual(str(us7), str(SafeString(us7))) self.assertEqual(str(be7), str(SafeString(be7))) self.assertEqual(str(ue7), str(SafeString(ue7))) self.assertEqual(str(7), str(SafeString(7))) self.assertEqual(str(bs7), str(SafeString(bs7))) self.assertEqual(str(us7), str(SafeString(us7))) self.assertEqual(str(be7), str(SafeString(be7))) self.assertEqual(str(ue7), str(SafeString(ue7))) def test_ustr(self): """Test conversion to a unicode-string.""" # unicode(self.bs) fails self.assertEqual(str, type(str(self.wbs))) self.assertEqual(str(self.us), str(self.wus)) # unicode(self.be) fails self.assertEqual(str, type(str(self.wbe))) # unicode(ue) fails in Python < 2.6 (issue2517_) self.assertEqual(str, type(str(self.wue))) self.assertEqual(self.us, str(self.wue)) def test_str(self): """Test conversion to a string (bytes in Python 2, unicode in Python 3).""" self.assertEqual(str(self.bs), str(self.wbs)) self.assertEqual(str(self.be), str(self.be)) # str(us) fails in Python 2 self.assertEqual(str, type(str(self.wus))) # str(ue) fails in Python 2 self.assertEqual(str, type(str(self.wue)))
def test_xmlcharrefreplace(self): # Test that xmlcharrefreplace is the default output encoding # error handler. settings_overrides={ 'output_encoding': 'latin1', 'stylesheet': '', '_disable_config': True,} result = core.publish_string( u'EUR = \u20ac', writer_name='html_plain', settings_overrides=settings_overrides) # Encoding a euro sign with latin1 doesn't work, so the # xmlcharrefreplace handler is used. self.assertIn(b('EUR = €'), result)
def test_xmlcharrefreplace(self): # Test that xmlcharrefreplace is the default output encoding # error handler. settings_overrides={ 'output_encoding': 'latin1', 'stylesheet': '', '_disable_config': True,} result = core.publish_string( u'EUR = \u20ac', writer_name='html5_polyglot', settings_overrides=settings_overrides) # Encoding a euro sign with latin1 doesn't work, so the # xmlcharrefreplace handler is used. self.assertIn(b('EUR = €'), result)
def test_coding_slug(self): input = io.StringInput(source=b("""\ .. -*- coding: ascii -*- data blah """)) data = input.read() self.assertEqual(input.successful_encoding, 'ascii') input = io.StringInput(source=b("""\ #! python # -*- coding: ascii -*- print "hello world" """)) data = input.read() self.assertEqual(input.successful_encoding, 'ascii') input = io.StringInput(source=b("""\ #! python # extraneous comment; prevents coding slug from being read # -*- coding: ascii -*- print "hello world" """)) data = input.read() self.assertNotEqual(input.successful_encoding, 'ascii')
def test_definition_list_item_classes(self): # Do not drop class arguments for the definition list item. # Pass them to to the term node instead. data = """\ first term: fist def .. class:: for the second item second term: second def """ result = core.publish_string(data, writer_name='html4css1', settings_overrides=self.settings_overrides) self.assertIn(b('<dt class="for the second item">second term:</dt>'), result)
def test_definition_list_item_name(self): # Do not drop the "name" of the definition list item. # Pass it to to the term node instead. data = """\ first term: first def .. _second item: second term: second def """ result = core.publish_string(data, writer_name='html4css1', settings_overrides=self.settings_overrides) self.assertIn(b('<dt id="second-item">second term:</dt>'), result)
def test_7bit(self): # wrapping (not required with 7-bit chars) must not change the # result of conversions: bs7 = b('foo') us7 = u'foo' be7 = Exception(bs7) ue7 = Exception(us7) self.assertEqual(str(42), str(SafeString(42))) self.assertEqual(str(bs7), str(SafeString(bs7))) self.assertEqual(str(us7), str(SafeString(us7))) self.assertEqual(str(be7), str(SafeString(be7))) self.assertEqual(str(ue7), str(SafeString(ue7))) self.assertEqual(unicode(7), unicode(SafeString(7))) self.assertEqual(unicode(bs7), unicode(SafeString(bs7))) self.assertEqual(unicode(us7), unicode(SafeString(us7))) self.assertEqual(unicode(be7), unicode(SafeString(be7))) self.assertEqual(unicode(ue7), unicode(SafeString(ue7)))
def test_7bit(self): # wrapping (not required with 7-bit chars) must not change the # result of conversions: bs7 = b('foo') us7 = 'foo' be7 = Exception(bs7) ue7 = Exception(us7) self.assertEqual(str(42), str(SafeString(42))) self.assertEqual(str(bs7), str(SafeString(bs7))) self.assertEqual(str(us7), str(SafeString(us7))) self.assertEqual(str(be7), str(SafeString(be7))) self.assertEqual(str(ue7), str(SafeString(ue7))) self.assertEqual(str(7), str(SafeString(7))) self.assertEqual(str(bs7), str(SafeString(bs7))) self.assertEqual(str(us7), str(SafeString(us7))) self.assertEqual(str(be7), str(SafeString(be7))) self.assertEqual(str(ue7), str(SafeString(ue7)))
class ErrorStringTests(unittest.TestCase): bs = b('\xfc') # unicode(bs) fails, str(bs) in Python 3 return repr() us = '\xfc' # bytes(us) fails; str(us) fails in Python 2 def test_str(self): self.assertEqual('Exception: spam', str(ErrorString(Exception('spam')))) self.assertEqual('IndexError: ' + str(self.bs), str(ErrorString(IndexError(self.bs)))) self.assertEqual('ImportError: %s' % SafeString(self.us), str(ErrorString(ImportError(self.us)))) def test_unicode(self): self.assertEqual('Exception: spam', str(ErrorString(Exception('spam')))) self.assertEqual('IndexError: ' + self.us, str(ErrorString(IndexError(self.us)))) self.assertEqual('ImportError: %s' % SafeString(self.bs), str(ErrorString(ImportError(self.bs))))
totest['include'] = [ ["""\ Include Test ============ .. include:: %s A paragraph. """ % include1, """\ <document source="test data"> <section ids="include-test" names="include\ test"> <title> Include Test <section ids="inclusion-1" names="inclusion\ 1"> <title> Inclusion 1 <paragraph> This file is used by \n\ <literal> test_include.py . <paragraph> A paragraph. """], ["""\ Include Test ============ .. include:: %s :literal: :class: test :name: my name A paragraph. """ % include1, """\ <document source="test data"> <section ids="include-test" names="include\ test"> <title> Include Test <literal_block classes="test" ids="my-name" names="my\ name" source="%s" xml:space="preserve"> Inclusion 1 ----------- \n\ This file is used by ``test_include.py``. <paragraph> A paragraph. """ % reldir(include1)], ["""\ Literal include, add line numbers .. include:: %s :literal: :number-lines: """ % include1, """\ <document source="test data"> <paragraph> Literal include, add line numbers <literal_block source="%s" xml:space="preserve"> <inline classes="ln"> 1 \n\ Inclusion 1 <inline classes="ln"> 2 \n\ ----------- <inline classes="ln"> 3 \n\ \n\ <inline classes="ln"> 4 \n\ This file is used by ``test_include.py``. """ % reldir(include1)], ["""\ Include code .. include:: %s :code: :class: test :name: my name """ % include1, """\ <document source="test data"> <paragraph> Include code <literal_block classes="code test" ids="my-name" names="my\ name" source="%s" xml:space="preserve"> Inclusion 1 ----------- \n\ This file is used by ``test_include.py``. """ % reldir(include1)], ["""\ Include code, add line numbers .. include:: %s :code: :number-lines: """ % include1, """\ <document source="test data"> <paragraph> Include code, add line numbers <literal_block classes="code" source="%s" xml:space="preserve"> <inline classes="ln"> 1 \n\ Inclusion 1 <inline classes="ln"> 2 \n\ ----------- <inline classes="ln"> 3 \n\ \n\ <inline classes="ln"> 4 \n\ This file is used by ``test_include.py``. """ % reldir(include1)], ["""\ Let's test the parse context. This paragraph is in a block quote. .. include:: %s The included paragraphs should also be in the block quote. """ % include2, """\ <document source="test data"> <paragraph> Let's test the parse context. <block_quote> <paragraph> This paragraph is in a block quote. <paragraph> Here are some paragraphs that can appear at any level. <paragraph> This file (include2.txt) is used by <literal> test_include.py . <paragraph> The included paragraphs should also be in the block quote. """], ["""\ Include Test ============ .. include:: nonexistent.txt A paragraph. """, """\ <document source="test data"> <section ids="include-test" names="include\ test"> <title> Include Test <system_message level="4" line="4" source="test data" type="SEVERE"> <paragraph> Problems with "include" directive path: InputError: [Errno 2] No such file or directory: 'nonexistent.txt'. <literal_block xml:space="preserve"> .. include:: nonexistent.txt <paragraph> A paragraph. """], ["""\ Include Test ============ .. include:: %s .. include:: %s A paragraph. """ % (include1, include1), """\ <document source="test data"> <section ids="include-test" names="include\ test"> <title> Include Test <section dupnames="inclusion\ 1" ids="inclusion-1"> <title> Inclusion 1 <paragraph> This file is used by <literal> test_include.py . <section dupnames="inclusion\ 1" ids="id1"> <title> Inclusion 1 <system_message backrefs="id1" level="1" line="2" source="%s" type="INFO"> <paragraph> Duplicate implicit target name: "inclusion 1". <paragraph> This file is used by <literal> test_include.py . <paragraph> A paragraph. """ % reldir(include1)], ["""\ Include Test ============ .. include:: %s ---------- .. include:: %s A paragraph. """ % (include1, include1), """\ <document source="test data"> <section ids="include-test" names="include\ test"> <title> Include Test <section dupnames="inclusion\ 1" ids="inclusion-1"> <title> Inclusion 1 <paragraph> This file is used by \n\ <literal> test_include.py . <transition> <section dupnames="inclusion\ 1" ids="id1"> <title> Inclusion 1 <system_message backrefs="id1" level="1" line="2" source="%s" type="INFO"> <paragraph> Duplicate implicit target name: "inclusion 1". <paragraph> This file is used by \n\ <literal> test_include.py . <paragraph> A paragraph. """ % reldir(include1)], ["""\ In test data .. include:: %s """ % include3, """\ <document source="test data"> <paragraph> In test data <paragraph> In include3.txt <paragraph> In includes/include4.txt <paragraph> In includes/include5.txt <paragraph> In includes/more/include6.txt <paragraph> In includes/sibling/include7.txt """], ["""\ In test data Section ======= (Section contents in nested parse; slice of input_lines ViewList.) .. include:: %s """ % include3, """\ <document source="test data"> <paragraph> In test data <section ids="section" names="section"> <title> Section <paragraph> (Section contents in nested parse; slice of input_lines ViewList.) <paragraph> In include3.txt <paragraph> In includes/include4.txt <paragraph> In includes/include5.txt <paragraph> In includes/more/include6.txt <paragraph> In includes/sibling/include7.txt """], ["""\ Testing relative includes: .. include:: %s """ % include8, """\ <document source="test data"> <paragraph> Testing relative includes: <paragraph> In include8.txt <paragraph> In ../includes/include9.txt. <paragraph> Here are some paragraphs that can appear at any level. <paragraph> This file (include2.txt) is used by <literal> test_include.py . """], ["""\ Encoding: .. include:: %s :encoding: utf-16 """ % reldir(utf_16_file), b("""\ <document source="test data"> <paragraph> Encoding: <paragraph> "Treat", "Quantity", "Description" "Albatr\xb0\xdf", 2.99, "\xa1On a \\u03c3\\u03c4\\u03b9\\u03ba!" "Crunchy Frog", 1.49, "If we took the b\xf6nes out, it wouldn\\u2019t be crunchy, now would it?" "Gannet Ripple", 1.99, "\xbfOn a \\u03c3\\u03c4\\u03b9\\u03ba?" """).decode('raw_unicode_escape')], ["""\ Include file is UTF-16-encoded, and is not valid ASCII. .. include:: %s :encoding: ascii """ % reldir(utf_16_file), """\ <document source="test data"> <paragraph> Include file is UTF-16-encoded, and is not valid ASCII. <system_message level="4" line="3" source="test data" type="SEVERE"> <paragraph> Problem with "include" directive: %s <literal_block xml:space="preserve"> .. include:: %s :encoding: ascii """ % (utf_16_error_str, reldir(utf_16_file))], [u"""\ cyrillic filename: .. include:: \u043c\u0438\u0440.txt """, u"""\ <document source="test data"> <paragraph> cyrillic filename: <system_message level="4" line="3" source="test data" type="SEVERE"> <paragraph> Problems with "include" directive path: %s <literal_block xml:space="preserve"> .. include:: \u043c\u0438\u0440.txt """ % errstr_8bit_path], ["""\ Testing errors in included file: .. include:: %s """ % include10, """\ <document source="test data"> <paragraph> Testing errors in included file: <system_message level="3" line="1" source="%(source)s" type="ERROR"> <paragraph> Invalid character code: 0x11111111 %(unichr_exception)s <literal_block xml:space="preserve"> unicode:: 0x11111111 <system_message level="2" line="1" source="%(source)s" type="WARNING"> <paragraph> Substitution definition "bad" empty or invalid. <literal_block xml:space="preserve"> .. |bad| unicode:: 0x11111111 <section dupnames="hi" ids="hi"> <title> hi <block_quote> <paragraph> indent <system_message level="2" line="7" source="%(source)s" type="WARNING"> <paragraph> Block quote ends without a blank line; unexpected unindent. <paragraph> error <section dupnames="hi" ids="id1"> <title> hi <system_message backrefs="id1" level="1" line="10" source="%(source)s" type="INFO"> <paragraph> Duplicate implicit target name: "hi". <system_message level="4" line="12" source="%(source)s" type="SEVERE"> <paragraph> Problems with "include" directive path: InputError: [Errno 2] No such file or directory: '%(nonexistent)s'. <literal_block xml:space="preserve"> .. include:: <nonexistent> <system_message level="3" line="14" source="%(source)s" type="ERROR"> <paragraph> Content block expected for the "note" directive; none found. <literal_block xml:space="preserve"> .. note:: <system_message level="3" line="16" source="%(source)s" type="ERROR"> <paragraph> Content block expected for the "admonition" directive; none found. <literal_block xml:space="preserve"> .. admonition:: without title <system_message level="3" line="19" source="%(source)s" type="ERROR"> <paragraph> Content block expected for the "epigraph" directive; none found. <literal_block xml:space="preserve"> .. epigraph:: <system_message level="3" line="21" source="%(source)s" type="ERROR"> <paragraph> Content block expected for the "highlights" directive; none found. <literal_block xml:space="preserve"> .. highlights:: <system_message level="3" line="23" source="%(source)s" type="ERROR"> <paragraph> Content block expected for the "pull-quote" directive; none found. <literal_block xml:space="preserve"> .. pull-quote:: <system_message level="3" line="25" source="%(source)s" type="ERROR"> <paragraph> Invalid context: the "date" directive can only be used within a substitution definition. <literal_block xml:space="preserve"> .. date:: <paragraph> not a definition list: <system_message level="3" line="29" source="%(source)s" type="ERROR"> <paragraph> Unexpected indentation. <block_quote> <paragraph> as a term may only be one line long. <system_message level="3" line="31" source="%(source)s" type="ERROR"> <paragraph> Error in "admonition" directive: 1 argument(s) required, 0 supplied. <literal_block xml:space="preserve"> .. admonition:: without title and content following a blank line <section ids="section-underline-too-short" names="section\ underline\ too\ short"> <title> section underline too short <system_message level="2" line="36" source="%(source)s" type="WARNING"> <paragraph> Title underline too short. <literal_block xml:space="preserve"> section underline too short ----- <table> <tgroup cols="2" colwidths="auto"> <colspec colwidth="14"> <colspec colwidth="6"> <thead> <row> <entry> <paragraph> A simple table <entry> <paragraph> cell 2 <tbody> <row> <entry> <paragraph> cell 3 <entry> <paragraph> cell 4 <system_message level="2" line="43" source="%(source)s" type="WARNING"> <paragraph> Blank line required after table. <paragraph> No blank line after table. <system_message level="3" line="45" source="%(source)s" type="ERROR"> <paragraph> Error in "unicode" directive: 1 argument(s) required, 0 supplied. <literal_block xml:space="preserve"> unicode:: <system_message level="2" line="45" source="%(source)s" type="WARNING"> <paragraph> Substitution definition "empty" empty or invalid. <literal_block xml:space="preserve"> .. |empty| unicode:: <system_message level="3" line="47" source="%(source)s" type="ERROR"> <paragraph> Error in "topic" directive: 1 argument(s) required, 0 supplied. <literal_block xml:space="preserve"> .. topic:: <system_message level="3" line="49" source="%(source)s" type="ERROR"> <paragraph> Error in "rubric" directive: 1 argument(s) required, 0 supplied. <literal_block xml:space="preserve"> .. rubric:: <rubric> A rubric has no content <comment xml:space="preserve"> _`target: No matching backquote. <system_message level="2" line="52" source="%(source)s" type="WARNING"> <paragraph> malformed hyperlink target. <comment xml:space="preserve"> __malformed: no good <system_message level="2" line="53" source="%(source)s" type="WARNING"> <paragraph> malformed hyperlink target. <definition_list> <definition_list_item> <term> A literal block:: <definition> <system_message level="1" line="57" source="%(source)s" type="INFO"> <paragraph> Blank line missing before literal block (after the "::")? Interpreted as a definition list item. <paragraph> with no blank line above. <literal_block xml:space="preserve"> > A literal block. <system_message level="3" line="61" source="%(source)s" type="ERROR"> <paragraph> Inconsistent literal block quoting. <paragraph> $ with inconsistent quoting. <paragraph> <problematic ids="id3" refid="id2"> :unknown-role:`role` and <problematic ids="id5" refid="id4"> * unbalanced <problematic ids="id7" refid="id6"> ` inline <problematic ids="id9" refid="id8"> ** markup <system_message level="1" line="63" source="%(source)s" type="INFO"> <paragraph> No role entry for "unknown-role" in module "docutils.parsers.rst.languages.en". Trying "unknown-role" as canonical role name. <system_message backrefs="id3" ids="id2" level="3" line="63" source="%(source)s" type="ERROR"> <paragraph> Unknown interpreted text role "unknown-role". <system_message backrefs="id5" ids="id4" level="2" line="63" source="%(source)s" type="WARNING"> <paragraph> Inline emphasis start-string without end-string. <system_message backrefs="id7" ids="id6" level="2" line="63" source="%(source)s" type="WARNING"> <paragraph> Inline interpreted text or phrase reference start-string without end-string. <system_message backrefs="id9" ids="id8" level="2" line="63" source="%(source)s" type="WARNING"> <paragraph> Inline strong start-string without end-string. <paragraph> <problematic ids="id11" refid="id10"> :PEP:`-1` <system_message backrefs="id11" ids="id10" level="3" line="68" source="%(source)s" type="ERROR"> <paragraph> PEP number must be a number from 0 to 9999; "-1" is invalid. <system_message level="1" line="66" source="%(source)s" type="INFO"> <paragraph> No directive entry for "unknown" in module "docutils.parsers.rst.languages.en". Trying "unknown" as canonical directive name. <system_message level="3" line="70" source="%(source)s" type="ERROR"> <paragraph> Unknown directive type "unknown". <literal_block xml:space="preserve"> .. unknown:: directive (info still reported with wrong line) <system_message level="3" line="72" source="%(source)s" type="ERROR"> <paragraph> Malformed table. No bottom table border found. <literal_block xml:space="preserve"> ============== ====== A simple table with no bottom border """ % {'source': reldir(include10), 'nonexistent': reldir(nonexistent), 'unichr_exception': DocutilsTestSupport.exception_data(unichr, int("11111111", 16))[2] }], ["""\ Include file with whitespace in the path: .. include:: %s """ % reldir(include11), """\ <document source="test data"> <paragraph> Include file with whitespace in the path: <paragraph> some text """], ["""\ Standard include data file: .. include:: <isogrk4.txt> """, b("""\ <document source="test data"> <paragraph> Standard include data file: <comment xml:space="preserve"> This data file has been placed in the public domain. <comment xml:space="preserve"> Derived from the Unicode character mappings available from <http://www.w3.org/2003/entities/xml/>. Processed by unicode2rstsubs.py, part of Docutils: <http://docutils.sourceforge.net>. <substitution_definition names="b.Gammad"> \\u03dc <substitution_definition names="b.gammad"> \\u03dd """).decode('raw_unicode_escape')], ["""\ Nonexistent standard include data file: .. include:: <nonexistent> """, """\ <document source="test data"> <paragraph> Nonexistent standard include data file: <system_message level="4" line="3" source="test data" type="SEVERE"> <paragraph> Problems with "include" directive path: InputError: [Errno 2] No such file or directory: '%s'. <literal_block xml:space="preserve"> .. include:: <nonexistent> """ % nonexistent_rel], ["""\ Include start-line/end-line Test .. include:: %s :start-line: 3 :end-line: 4 """ % include2, """\ <document source="test data"> <paragraph> Include start-line/end-line Test <paragraph> This file (include2.txt) is used by """], ["""\ Include start-line/end-line + start-after Test .. include:: %s :start-line: 2 :end-line: 5 :start-after: here Text search is limited to the specified lines. """ % include12, """\ <document source="test data"> <paragraph> Include start-line/end-line + start-after Test <paragraph> In include12.txt (after "start here", before "stop here") <paragraph> Text search is limited to the specified lines. """], ["""\ Include start-after/end-before Test .. include:: %s :start-after: .. start here :end-before: .. stop here A paragraph. """ % include12, """\ <document source="test data"> <paragraph> Include start-after/end-before Test <paragraph> In include12.txt (after "start here", before "stop here") <paragraph> A paragraph. """], ["""\ Include start-after/end-before Test, single option variant .. include:: %s :end-before: .. start here .. include:: %s :start-after: .. stop here A paragraph. """ % (include12, include12), """\ <document source="test data"> <paragraph> Include start-after/end-before Test, single option variant <paragraph> In include12.txt (but before "start here") <paragraph> In include12.txt (after "stop here") <paragraph> A paragraph. """], ["""\ Include start-after/end-before multi-line test. .. include:: %s :start-after: From: me To: you :end-before: ------- -- mork of ork .. include:: %s :start-after: From: me To: you :end-before: ------- -- mork of ork A paragraph. """ % (include13, include13), """\ <document source="test data"> <paragraph> Include start-after/end-before multi-line test. <system_message level="4" line="3" source="test data" type="SEVERE"> <paragraph> Problem with "end-before" option of "include" directive: Text not found. <literal_block xml:space="preserve"> .. include:: %s :start-after: From: me To: you :end-before: ------- -- mork of ork <paragraph> In include13.txt (between header and signature) <paragraph> A paragraph. """ % include13], ["""\ Error handling test; "end-before" error handling tested in previous test. .. include:: %s :start-after: bad string :end-before: mork of ork """ % include13, """\ <document source="test data"> <paragraph> Error handling test; "end-before" error handling tested in previous test. <system_message level="4" line="3" source="test data" type="SEVERE"> <paragraph> Problem with "start-after" option of "include" directive: Text not found. <literal_block xml:space="preserve"> .. include:: %s :start-after: bad string :end-before: mork of ork """ % include13], ["""\ TAB expansion with literal include: .. include:: %s :literal: """ % include_literal, """\ <document source="test data"> <paragraph> TAB expansion with literal include: <literal_block source="%s" xml:space="preserve"> Literal included this should **not** be *marked* `up`. <- leading raw tab. Newlines are normalized. """ % include_literal], ["""\ Custom TAB expansion with literal include: .. include:: %s :literal: :tab-width: 2 """ % include_literal, """\ <document source="test data"> <paragraph> Custom TAB expansion with literal include: <literal_block source="%s" xml:space="preserve"> Literal included this should **not** be *marked* `up`. <- leading raw tab. Newlines are normalized. """ % include_literal], ["""\ No TAB expansion with literal include: .. include:: %s :literal: :tab-width: -1 """ % include_literal, """\ <document source="test data"> <paragraph> No TAB expansion with literal include: <literal_block source="%s" xml:space="preserve"> Literal included this should **not** be *marked* `up`. \t<- leading raw tab. Newlines are normalized. """ % include_literal], ]
. """ ], [ """\ Encoding: .. include:: %s :encoding: utf-16 """ % reldir(utf_16_file), b("""\ <document source="test data"> <paragraph> Encoding: <paragraph> "Treat", "Quantity", "Description" "Albatr\xb0\xdf", 2.99, "\xa1On a \\u03c3\\u03c4\\u03b9\\u03ba!" "Crunchy Frog", 1.49, "If we took the b\xf6nes out, it wouldn\\u2019t be crunchy, now would it?" "Gannet Ripple", 1.99, "\xbfOn a \\u03c3\\u03c4\\u03b9\\u03ba?" """).decode('raw_unicode_escape') ], [ """\ Include file is UTF-16-encoded, and is not valid ASCII. .. include:: %s :encoding: ascii """ % reldir(utf_16_file), """\ <document source="test data">
[u"""\ Complex spanning pattern (no edge knows all rows/cols): +--------+---------------------+ | 北西・ | 北・北東セル | | 西セル +--------------+------+ | | 真ん中のセル | 東・ | +--------+--------------+ 南東 | | 南西・南セル | セル | +-----------------------+------+ """, u"""\ <document source="test data"> <paragraph> Complex spanning pattern (no edge knows all rows/cols): <table> <tgroup cols="3"> <colspec colwidth="8"> <colspec colwidth="14"> <colspec colwidth="6"> <tbody> <row> <entry morerows="1"> <paragraph> 北西・ 西セル <entry morecols="1"> <paragraph> 北・北東セル <row> <entry> <paragraph> 真ん中のセル <entry morerows="1"> <paragraph> 東・ 南東 セル <row> <entry morecols="1"> <paragraph> 南西・南セル """], [u"""\ ========= ========= ダイ1ラン ダイ2ラン ========= ========= ======== ========= ダイ1ラン ダイ2ラン ======== ========= """, u"""\ <document source="test data"> <table> <tgroup cols="2"> <colspec colwidth="9"> <colspec colwidth="9"> <tbody> <row> <entry> <paragraph> ダイ1ラン <entry> <paragraph> ダイ2ラン <system_message level="3" line="6" source="test data" type="ERROR"> <paragraph> Malformed table. Text in column margin in table line 2. <literal_block xml:space="preserve"> ======== ========= ダイ1ラン ダイ2ラン ======== ========= """], [u"""\ Some ambiguous-width characters: = =================================== © copyright sign ® registered sign « left pointing guillemet » right pointing guillemet – en-dash — em-dash ‘ single turned comma quotation mark ’ single comma quotation mark ‚ low single comma quotation mark “ double turned comma quotation mark ” double comma quotation mark „ low double comma quotation mark † dagger ‡ double dagger … ellipsis ™ trade mark sign ⇔ left-right double arrow = =================================== """, b("""\ <document source="test data"> <paragraph> Some ambiguous-width characters: <table> <tgroup cols="2"> <colspec colwidth="1"> <colspec colwidth="35"> <tbody> <row> <entry> <paragraph> \xa9 <entry> <paragraph> copyright sign <row> <entry> <paragraph> \xae <entry> <paragraph> registered sign <row> <entry> <paragraph> \xab <entry> <paragraph> left pointing guillemet <row> <entry> <paragraph> \xbb <entry> <paragraph> right pointing guillemet <row> <entry> <paragraph> \\u2013 <entry> <paragraph> en-dash <row> <entry> <paragraph> \\u2014 <entry> <paragraph> em-dash <row> <entry> <paragraph> \\u2018 <entry> <paragraph> single turned comma quotation mark <row> <entry> <paragraph> \\u2019 <entry> <paragraph> single comma quotation mark <row> <entry> <paragraph> \\u201a <entry> <paragraph> low single comma quotation mark <row> <entry> <paragraph> \\u201c <entry> <paragraph> double turned comma quotation mark <row> <entry> <paragraph> \\u201d <entry> <paragraph> double comma quotation mark <row> <entry> <paragraph> \\u201e <entry> <paragraph> low double comma quotation mark <row> <entry> <paragraph> \\u2020 <entry> <paragraph> dagger <row> <entry> <paragraph> \\u2021 <entry> <paragraph> double dagger <row> <entry> <paragraph> \\u2026 <entry> <paragraph> ellipsis <row> <entry> <paragraph> \\u2122 <entry> <paragraph> trade mark sign <row> <entry> <paragraph> \\u21d4 <entry> <paragraph> left-right double arrow """).decode('raw_unicode_escape')], ]
encodings.insert(1, locale_encoding) for enc in encodings: try: decoded = unicode(data, enc, self.error_handler) self.successful_encoding = enc # Return decoded, removing BOMs. return decoded.replace(u'\ufeff', u'') except (UnicodeError, LookupError), err: error = err # in Python 3, the <exception instance> is # local to the except clause raise UnicodeError( 'Unable to decode input data. Tried the following encodings: ' '%s.\n(%s)' % (', '.join([repr(enc) for enc in encodings]), ErrorString(error))) coding_slug = re.compile(b("coding[:=]\s*([-\w.]+)")) """Encoding declaration pattern.""" byte_order_marks = ((codecs.BOM_UTF8, 'utf-8'), # 'utf-8-sig' new in v2.5 (codecs.BOM_UTF16_BE, 'utf-16-be'), (codecs.BOM_UTF16_LE, 'utf-16-le'),) """Sequence of (start_bytes, encoding) tuples for encoding detection. The first bytes of input data are checked against the start_bytes strings. A match indicates the given encoding.""" def determine_encoding_from_data(self, data): """ Try to determine the encoding of `data` by looking *in* `data`. Check for a byte order mark (BOM) or an encoding declaration. """ # check for a byte order mark:
def test_debug(self): sw = self.reporter.debug('a debug message') # None because debug is disabled. self.assertEquals(sw, None) self.assertEquals(self.stream.getvalue(), b(''))
test_document = """\ Test Document ============= This is a test document with a broken reference: nonexistent_ """ pseudoxml_output = b( """\ <document ids="test-document" names="test\ document" source="<string>" title="Test Document"> <title> Test Document <paragraph> This is a test document with a broken reference: \n\ <problematic ids="id2" refid="id1"> nonexistent_ <section classes="system-messages"> <title> Docutils System Messages <system_message backrefs="id2" ids="id1" level="3" line="4" source="<string>" type="ERROR"> <paragraph> Unknown target name: "nonexistent". """ ) exposed_pseudoxml_output = b( """\ <document ids="test-document" internal:refnames="{%s\'nonexistent\': [<reference: <#text: \'nonexistent\'>>]}" names="test\ document" source="<string>" title="Test Document"> <title> Test Document <paragraph> This is a test document with a broken reference: \n\