Ejemplo n.º 1
0
 def test_newlines(self):
     stream = HTMLInputStream(codecs.BOM_UTF8 + "a\nbb\r\nccc\rddddxe")
     self.assertEquals(stream.position(), (1, 0))
     self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
     self.assertEquals(stream.position(), (3, 0))
     self.assertEquals(stream.charsUntil('x'), u"ccc\ndddd")
     self.assertEquals(stream.position(), (4, 4))
     self.assertEquals(stream.charsUntil('e'), u"x")
     self.assertEquals(stream.position(), (4, 5))
Ejemplo n.º 2
0
    def test_python_issue_20007(self):
        """
        Make sure we have a work-around for Python bug #20007
        http://bugs.python.org/issue20007
        """
        class FakeSocket(object):
            def makefile(self, _mode, _bufsize=None):
                return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")

        source = http_client.HTTPResponse(FakeSocket())
        source.begin()
        stream = HTMLInputStream(source)
        self.assertEqual(stream.charsUntil(" "), "Text")
Ejemplo n.º 3
0
 def test_newlines(self):
     stream = HTMLInputStream(codecs.BOM_UTF8 + "a\nbb\r\nccc\rddddxe")
     self.assertEquals(stream.position(), (1, 0))
     self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
     self.assertEquals(stream.position(), (3, 0))
     self.assertEquals(stream.charsUntil('x'), u"ccc\ndddd")
     self.assertEquals(stream.position(), (4, 4))
     self.assertEquals(stream.charsUntil('e'), u"x")
     self.assertEquals(stream.position(), (4, 5))
Ejemplo n.º 4
0
    def test_python_issue_20007_b(self):
        """
        Make sure we have a work-around for Python bug #20007
        http://bugs.python.org/issue20007
        """
        if six.PY2:
            return

        class FakeSocket(object):
            def makefile(self, _mode, _bufsize=None):
                return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")

        source = http_client.HTTPResponse(FakeSocket())
        source.begin()
        wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
        stream = HTMLInputStream(wrapped)
        self.assertEqual(stream.charsUntil(" "), "Text")
Ejemplo n.º 5
0
 def test_char_null(self):
     stream = HTMLInputStream("\x00")
     self.assertEquals(stream.char(), u'\ufffd')
Ejemplo n.º 6
0
 def test_position2(self):
     stream = HTMLInputStream("abc\nd")
     self.assertEquals(stream.position(), (1, 0))
     self.assertEquals(stream.char(), u"a")
     self.assertEquals(stream.position(), (1, 1))
     self.assertEquals(stream.char(), u"b")
     self.assertEquals(stream.position(), (1, 2))
     self.assertEquals(stream.char(), u"c")
     self.assertEquals(stream.position(), (1, 3))
     self.assertEquals(stream.char(), u"\n")
     self.assertEquals(stream.position(), (2, 0))
     self.assertEquals(stream.char(), u"d")
     self.assertEquals(stream.position(), (2, 1))
Ejemplo n.º 7
0
 def test_position(self):
     stream = HTMLInputStream(codecs.BOM_UTF8 + "a\nbb\nccc\nddde\nf\ngh")
     self.assertEquals(stream.position(), (1, 0))
     self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
     self.assertEquals(stream.position(), (3, 0))
     stream.unget(u"\n")
     self.assertEquals(stream.position(), (2, 2))
     self.assertEquals(stream.charsUntil('c'), u"\n")
     self.assertEquals(stream.position(), (3, 0))
     stream.unget(u"\n")
     self.assertEquals(stream.position(), (2, 2))
     self.assertEquals(stream.char(), u"\n")
     self.assertEquals(stream.position(), (3, 0))
     self.assertEquals(stream.charsUntil('e'), u"ccc\nddd")
     self.assertEquals(stream.position(), (4, 3))
     self.assertEquals(stream.charsUntil('h'), u"e\nf\ng")
     self.assertEquals(stream.position(), (6, 1))
Ejemplo n.º 8
0
def test_utf_16():
    stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
    assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be']
    assert len(stream.charsUntil(' ', True)) == 1025
Ejemplo n.º 9
0
 def test_newlines2(self):
     size = HTMLUnicodeInputStream._defaultChunkSize
     stream = HTMLInputStream("\r" * size + "\n")
     self.assertEqual(stream.charsUntil('x'), "\n" * size)
Ejemplo n.º 10
0
 def test_bom(self):
     stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
     self.assertEqual(stream.charEncoding[0], 'utf-8')
     self.assertEqual(stream.char(), "'")
Ejemplo n.º 11
0
 def test_char_win1252(self):
     stream = HTMLInputStream(u"\xa9\xf1\u2019".encode(u'windows-1252'))
     self.assertEquals(stream.charEncoding[0], u'windows-1252')
     self.assertEquals(stream.char(), u"\xa9")
     self.assertEquals(stream.char(), u"\xf1")
     self.assertEquals(stream.char(), u"\u2019")
Ejemplo n.º 12
0
def test_char_win1252():
    stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
    assert stream.charEncoding[0].name == 'windows-1252'
    assert stream.char() == "\xa9"
    assert stream.char() == "\xf1"
    assert stream.char() == "\u2019"
Ejemplo n.º 13
0
 def test_char_utf8(self):
     stream = HTMLInputStream(u'\u2018'.encode(u'utf-8'), encoding=u'utf-8')
     self.assertEquals(stream.charEncoding[0], u'utf-8')
     self.assertEquals(stream.char(), u'\u2018')
Ejemplo n.º 14
0
 def test_char_ascii(self):
     stream = HTMLInputStream("'", encoding=u'ascii')
     self.assertEquals(stream.charEncoding[0], u'ascii')
     self.assertEquals(stream.char(), u"'")
Ejemplo n.º 15
0
 def test_char_ascii(self):
     stream = HTMLInputStream(b"'", encoding='ascii')
     self.assertEqual(stream.charEncoding[0], 'ascii')
     self.assertEqual(stream.char(), "'")
Ejemplo n.º 16
0
 def test_char_win1252(self):
     stream = HTMLInputStream('\xa9\xf1\u2019'.encode('windows-1252'))
     self.assertEqual(stream.charEncoding[0], 'windows-1252')
     self.assertEqual(stream.char(), '\xa9')
     self.assertEqual(stream.char(), '\xf1')
     self.assertEqual(stream.char(), '\u2019')
Ejemplo n.º 17
0
 def test_newlines2(self):
     size = HTMLUnicodeInputStream._defaultChunkSize
     stream = HTMLInputStream('\r' * size + '\n')
     self.assertEqual(stream.charsUntil('x'), '\n' * size)
Ejemplo n.º 18
0
def test_newlines2():
    size = HTMLUnicodeInputStream._defaultChunkSize
    stream = HTMLInputStream("\r" * size + "\n")
    assert stream.charsUntil('x') == "\n" * size
Ejemplo n.º 19
0
 def test_char_ascii(self):
     stream = HTMLInputStream("'", encoding=u'ascii')
     self.assertEquals(stream.charEncoding[0], u'ascii')
     self.assertEquals(stream.char(), u"'")
Ejemplo n.º 20
0
 def test_char_utf8(self):
     stream = HTMLInputStream(u'\u2018'.encode(u'utf-8'), encoding=u'utf-8')
     self.assertEquals(stream.charEncoding[0], u'utf-8')
     self.assertEquals(stream.char(), u'\u2018')
Ejemplo n.º 21
0
 def test_position(self):
     stream = HTMLInputStream(codecs.BOM_UTF8 + "a\nbb\nccc\nddde\nf\ngh")
     self.assertEquals(stream.position(), (1, 0))
     self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
     self.assertEquals(stream.position(), (3, 0))
     stream.unget(u"\n")
     self.assertEquals(stream.position(), (2, 2))
     self.assertEquals(stream.charsUntil('c'), u"\n")
     self.assertEquals(stream.position(), (3, 0))
     stream.unget(u"\n")
     self.assertEquals(stream.position(), (2, 2))
     self.assertEquals(stream.char(), u"\n")
     self.assertEquals(stream.position(), (3, 0))
     self.assertEquals(stream.charsUntil('e'), u"ccc\nddd")
     self.assertEquals(stream.position(), (4, 3))
     self.assertEquals(stream.charsUntil('h'), u"e\nf\ng")
     self.assertEquals(stream.position(), (6, 1))
Ejemplo n.º 22
0
 def test_char_ascii(self):
     stream = HTMLInputStream(b"'", encoding='ascii')
     self.assertEqual(stream.charEncoding[0].name, 'windows-1252')
     self.assertEqual(stream.char(), "'")
Ejemplo n.º 23
0
 def test_char_utf8(self):
     stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8')
     self.assertEqual(stream.charEncoding[0], 'utf-8')
     self.assertEqual(stream.char(), '\u2018')
Ejemplo n.º 24
0
 def test_utf_16(self):
     stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
     self.assertTrue(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
     self.assertEqual(len(stream.charsUntil(' ', True)), 1025)
Ejemplo n.º 25
0
def test_char_utf8():
    stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8')
    assert stream.charEncoding[0].name == 'utf-8'
    assert stream.char() == '\u2018'
Ejemplo n.º 26
0
 def test_position2(self):
     stream = HTMLInputStream("abc\nd")
     self.assertEquals(stream.position(), (1, 0))
     self.assertEquals(stream.char(), u"a")
     self.assertEquals(stream.position(), (1, 1))
     self.assertEquals(stream.char(), u"b")
     self.assertEquals(stream.position(), (1, 2))
     self.assertEquals(stream.char(), u"c")
     self.assertEquals(stream.position(), (1, 3))
     self.assertEquals(stream.char(), u"\n")
     self.assertEquals(stream.position(), (2, 0))
     self.assertEquals(stream.char(), u"d")
     self.assertEquals(stream.position(), (2, 1))
Ejemplo n.º 27
0
def test_bom():
    stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
    assert stream.charEncoding[0].name == 'utf-8'
    assert stream.char() == "'"
Ejemplo n.º 28
0
 def test_char_ascii(self):
     stream = HTMLInputStream(b"'", encoding='ascii')
     self.assertEqual(stream.charEncoding[0], 'ascii')
     self.assertEqual(stream.char(), "'")
Ejemplo n.º 29
0
 def test_char_null(self):
     stream = HTMLInputStream("\x00")
     self.assertEquals(stream.char(), u'\ufffd')
Ejemplo n.º 30
0
 def test_char_win1252(self):
     stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
     self.assertEqual(stream.charEncoding[0], 'windows-1252')
     self.assertEqual(stream.char(), "\xa9")
     self.assertEqual(stream.char(), "\xf1")
     self.assertEqual(stream.char(), "\u2019")
Ejemplo n.º 31
0
def test_char_ascii():
    stream = HTMLInputStream(b"'", encoding='ascii')
    assert stream.charEncoding[0].name == 'windows-1252'
    assert stream.char() == "'"