def test_newlines(self): stream = HTMLInputStream(codecs.BOM_UTF8 + "a\nbb\r\nccc\rddddxe") self.assertEquals(stream.position(), (1, 0)) self.assertEquals(stream.charsUntil('c'), u"a\nbb\n") self.assertEquals(stream.position(), (3, 0)) self.assertEquals(stream.charsUntil('x'), u"ccc\ndddd") self.assertEquals(stream.position(), (4, 4)) self.assertEquals(stream.charsUntil('e'), u"x") self.assertEquals(stream.position(), (4, 5))
def test_python_issue_20007(self): """ Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ class FakeSocket(object): def makefile(self, _mode, _bufsize=None): return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") source = http_client.HTTPResponse(FakeSocket()) source.begin() stream = HTMLInputStream(source) self.assertEqual(stream.charsUntil(" "), "Text")
def test_python_issue_20007_b(self): """ Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ if six.PY2: return class FakeSocket(object): def makefile(self, _mode, _bufsize=None): return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") source = http_client.HTTPResponse(FakeSocket()) source.begin() wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com") stream = HTMLInputStream(wrapped) self.assertEqual(stream.charsUntil(" "), "Text")
def test_char_null(self): stream = HTMLInputStream("\x00") self.assertEquals(stream.char(), u'\ufffd')
def test_position2(self): stream = HTMLInputStream("abc\nd") self.assertEquals(stream.position(), (1, 0)) self.assertEquals(stream.char(), u"a") self.assertEquals(stream.position(), (1, 1)) self.assertEquals(stream.char(), u"b") self.assertEquals(stream.position(), (1, 2)) self.assertEquals(stream.char(), u"c") self.assertEquals(stream.position(), (1, 3)) self.assertEquals(stream.char(), u"\n") self.assertEquals(stream.position(), (2, 0)) self.assertEquals(stream.char(), u"d") self.assertEquals(stream.position(), (2, 1))
def test_position(self): stream = HTMLInputStream(codecs.BOM_UTF8 + "a\nbb\nccc\nddde\nf\ngh") self.assertEquals(stream.position(), (1, 0)) self.assertEquals(stream.charsUntil('c'), u"a\nbb\n") self.assertEquals(stream.position(), (3, 0)) stream.unget(u"\n") self.assertEquals(stream.position(), (2, 2)) self.assertEquals(stream.charsUntil('c'), u"\n") self.assertEquals(stream.position(), (3, 0)) stream.unget(u"\n") self.assertEquals(stream.position(), (2, 2)) self.assertEquals(stream.char(), u"\n") self.assertEquals(stream.position(), (3, 0)) self.assertEquals(stream.charsUntil('e'), u"ccc\nddd") self.assertEquals(stream.position(), (4, 3)) self.assertEquals(stream.charsUntil('h'), u"e\nf\ng") self.assertEquals(stream.position(), (6, 1))
def test_utf_16(): stream = HTMLInputStream((' ' * 1025).encode('utf-16')) assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be'] assert len(stream.charsUntil(' ', True)) == 1025
def test_newlines2(self): size = HTMLUnicodeInputStream._defaultChunkSize stream = HTMLInputStream("\r" * size + "\n") self.assertEqual(stream.charsUntil('x'), "\n" * size)
def test_bom(self): stream = HTMLInputStream(codecs.BOM_UTF8 + b"'") self.assertEqual(stream.charEncoding[0], 'utf-8') self.assertEqual(stream.char(), "'")
def test_char_win1252(self): stream = HTMLInputStream(u"\xa9\xf1\u2019".encode(u'windows-1252')) self.assertEquals(stream.charEncoding[0], u'windows-1252') self.assertEquals(stream.char(), u"\xa9") self.assertEquals(stream.char(), u"\xf1") self.assertEquals(stream.char(), u"\u2019")
def test_char_win1252(): stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252')) assert stream.charEncoding[0].name == 'windows-1252' assert stream.char() == "\xa9" assert stream.char() == "\xf1" assert stream.char() == "\u2019"
def test_char_utf8(self): stream = HTMLInputStream(u'\u2018'.encode(u'utf-8'), encoding=u'utf-8') self.assertEquals(stream.charEncoding[0], u'utf-8') self.assertEquals(stream.char(), u'\u2018')
def test_char_ascii(self): stream = HTMLInputStream("'", encoding=u'ascii') self.assertEquals(stream.charEncoding[0], u'ascii') self.assertEquals(stream.char(), u"'")
def test_char_ascii(self): stream = HTMLInputStream(b"'", encoding='ascii') self.assertEqual(stream.charEncoding[0], 'ascii') self.assertEqual(stream.char(), "'")
def test_char_win1252(self): stream = HTMLInputStream('\xa9\xf1\u2019'.encode('windows-1252')) self.assertEqual(stream.charEncoding[0], 'windows-1252') self.assertEqual(stream.char(), '\xa9') self.assertEqual(stream.char(), '\xf1') self.assertEqual(stream.char(), '\u2019')
def test_newlines2(self): size = HTMLUnicodeInputStream._defaultChunkSize stream = HTMLInputStream('\r' * size + '\n') self.assertEqual(stream.charsUntil('x'), '\n' * size)
def test_newlines2(): size = HTMLUnicodeInputStream._defaultChunkSize stream = HTMLInputStream("\r" * size + "\n") assert stream.charsUntil('x') == "\n" * size
def test_char_ascii(self): stream = HTMLInputStream(b"'", encoding='ascii') self.assertEqual(stream.charEncoding[0].name, 'windows-1252') self.assertEqual(stream.char(), "'")
def test_char_utf8(self): stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8') self.assertEqual(stream.charEncoding[0], 'utf-8') self.assertEqual(stream.char(), '\u2018')
def test_utf_16(self): stream = HTMLInputStream((' ' * 1025).encode('utf-16')) self.assertTrue(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding) self.assertEqual(len(stream.charsUntil(' ', True)), 1025)
def test_char_utf8(): stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8') assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == '\u2018'
def test_bom(): stream = HTMLInputStream(codecs.BOM_UTF8 + b"'") assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == "'"
def test_char_win1252(self): stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252')) self.assertEqual(stream.charEncoding[0], 'windows-1252') self.assertEqual(stream.char(), "\xa9") self.assertEqual(stream.char(), "\xf1") self.assertEqual(stream.char(), "\u2019")
def test_char_ascii(): stream = HTMLInputStream(b"'", encoding='ascii') assert stream.charEncoding[0].name == 'windows-1252' assert stream.char() == "'"