Beispiel #1
0
def test_parse_next_decoder():
    # borrow a fixture from htmlstream
    resource = 'fixtures/htmlstream/shift-jis-next-decoder'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    # here the parse function will try utf-8 and then shift-jis
    etree = e.parse(response)
    assert list(etree.getroot().itertext()) == ['\n', '巨', '\n']
Beispiel #2
0
def test_parse_next_decoder():
    # borrow a fixture from htmlstream
    resource = 'fixtures/htmlstream/shift-jis-next-decoder'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    # here the parse function will try utf-8 and then shift-jis
    etree = e.parse(response)
    assert list(etree.getroot().itertext()) == ['\n', '巨', '\n']
def test_phantomjs_using_proxies():
    elements = []
    with HttpProxy() as proxy:
        readables = url.get(proxies=proxy.proxies)
        for response in map(Response.from_readable, readables):
            tree = parse(response)
            elements.extend(tree.xpath("//h1"))
    assert len(elements) == 1
    assert proxy.requests == [b"GET http://httpbin.org/html HTTP/1.1"]
def test_parse_ioerror():
    class ProblemResponse(object):
        def __init__(self):
            self.headers = parse_headers(BytesIO())
            self.url = None
        def read(self, *args):
            raise IOError
    response = ProblemResponse()
    etree = e.parse(response)
    assert etree.getroot() is e.UNPARSEABLE
def test_phantomjs():
    elements = []
    context = {"foo": "bar"}
    for response in map(Response.from_readable, url.get(context=context)):
        tree = parse(response)
        elements.extend(tree.xpath("//h1"))
        assert response.headers.get("X-wex-context-foo") == "bar"
    assert len(elements) == 1
    assert "bcr-left" in elements[0].attrib
    assert "bcr-top" in elements[0].attrib
    assert "bcr-right" in elements[0].attrib
    assert "bcr-bottom" in elements[0].attrib
Beispiel #6
0
def test_parse_ioerror():
    class ProblemResponse(object):
        def __init__(self):
            self.headers = parse_headers(BytesIO())
            self.url = None

        def read(self, *args):
            raise IOError

        def seek(self, *args):
            pass

    response = ProblemResponse()
    etree = e.parse(response)
    assert etree.getroot() is e.UNPARSEABLE
Beispiel #7
0
def extract(response):
    tree = parse(response)
    yield "name", text(tree.xpath('//h1/text()'))
    yield "whoops", 1/0
    yield "country", text(tree.xpath('//dd[@id="country"]'))
    yield "region", text(tree.xpath('//dd[@id="region"]'))
Beispiel #8
0
def test_parse_unreadable():
    obj = object()
    assert e.parse(obj) is obj
Beispiel #9
0
def test_bug_cp1252():
    resource = 'fixtures/bug_cp1252'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    etree = e.parse(response)
    assert etree.xpath('//title/text()') == ['Problem²']
Beispiel #10
0
def test_parse():
    etree = e.parse(create_response(example))
    assert etree.xpath('//h1/text()') == ['hi']
Beispiel #11
0
def test_get_base_url():
    response = create_response(example)
    tree = e.parse(response)
    base_url = e.get_base_url(tree)
    assert base_url == 'http://base.com/'
Beispiel #12
0
def test_parse_unreadable():
    obj = object()
    assert e.parse(obj) is obj
Beispiel #13
0
def test_parse():
    etree = e.parse(create_response(example))
    assert etree.xpath('//h1/text()') == ['hi']
Beispiel #14
0
def test_get_base_url():
    response = create_response(example)
    tree = e.parse(response)
    base_url = e.get_base_url(tree)
    assert base_url == 'http://base.com/'
Beispiel #15
0
def test_bug_cp1252():
    resource = 'fixtures/bug_cp1252'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    etree = e.parse(response)
    assert etree.xpath('//title/text()') == ['Problem²']