Exemple #1
0
def test_post():
    method = {"post": {"data": {"x": "1"}}}
    url = URL('http://httpbin.org/post').update_fragment_dict(method=method)
    for readable in url.get():
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(response))
        assert data['form'] == method['post']['data']
Exemple #2
0
def test_warc_response():
    readable = resource_stream(__name__, 'fixtures/warc_response')
    response = Response.from_readable(readable)
    assert response.url == 'http://httpbin.org/get?this=that'
    assert response.warc_protocol == b'WARC'
    assert response.warc_version == (1, 0)
    assert response.warc_headers.get('warc-type') == 'response'
Exemple #3
0
def test_post():
    method = {"post": {"data": {"x": "1"}}}
    url = URL('http://httpbin.org/post').update_fragment_dict(method=method)
    for readable in url.get():
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(response))
        assert data['form'] == method['post']['data']
Exemple #4
0
def test_get_gzip():
    url = 'http://httpbin.org/gzip'
    for i, readable in enumerate(URL(url).get(decode_content=False)):
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(decode(response)))
        assert data.get('gzipped')
    assert i == 0
def test_warc_response():
    readable = resource_stream(__name__, 'fixtures/warc_response')
    response = Response.from_readable(readable)
    assert response.url == 'http://httpbin.org/get?this=that'
    assert response.warc_protocol == b'WARC'
    assert response.warc_version == (1, 0)
    assert response.warc_headers.get('warc-type') == 'response'
Exemple #6
0
def test_get_gzip():
    url = 'http://httpbin.org/gzip'
    for i, readable in enumerate(URL(url).get(decode_content=False)):
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(decode(response)))
        assert data.get('gzipped')
    assert i == 0
def test_chained_does_seek_response():
    readable = resource_stream(__name__, 'fixtures/robots_txt')
    response = Response.from_readable(readable)
    # use the same extractor twice
    extract = chained(extract_first_line, extract_first_line)
    values = list(extract(response))
    # and we get the same first line because chained re-seeks to 0
    assert values == [(b'# /robots.txt\n', ), (b'# /robots.txt\n', )]
Exemple #8
0
def test_parse_url_response():
    response = Response.from_readable(
        BytesIO(b"HTTP/1.1 200 OK\r\n"
                b"X-wex-request-url: http://www.foo.com/a/b/c\r\n"
                b"Content-type: text/html\r\n"
                b"\r\n"
                b"<html><h1>hello</h1></html>"))
    assert parse_url(response).path == '/a/b/c'
def test_extract_from_readable():
    readable = BytesIO(b'FTP/1.0 200 OK\r\n\r\nhello')

    def extract(src):
        yield (src.read(), )

    values = list(Response.values_from_readable(extract, readable))
    assert values == [(b'hello', )]
Exemple #10
0
def test_chained_does_seek_response():
    readable = resource_stream(__name__, 'fixtures/robots_txt')
    response = Response.from_readable(readable)
    # use the same extractor twice
    extract = chained(extract_first_line, extract_first_line)
    values = list(extract(response))
    # and we get the same first line because chained re-seeks to 0
    assert values == [(b'# /robots.txt\n',), (b'# /robots.txt\n',)]
Exemple #11
0
def stream_from_fixture(fixture):
    resource = 'fixtures/htmlstream/' + fixture
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    # do a read just to make sure that we seek(0)
    response.read(100)
    stream = HTMLStream(response)
    return stream
Exemple #12
0
def test_parse_next_decoder():
    # borrow a fixture from htmlstream
    resource = 'fixtures/htmlstream/shift-jis-next-decoder'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    # here the parse function will try utf-8 and then shift-jis
    etree = e.parse(response)
    assert list(etree.getroot().itertext()) == ['\n', '巨', '\n']
Exemple #13
0
def test_parse_next_decoder():
    # borrow a fixture from htmlstream
    resource = 'fixtures/htmlstream/shift-jis-next-decoder'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    # here the parse function will try utf-8 and then shift-jis
    etree = e.parse(response)
    assert list(etree.getroot().itertext()) == ['\n', '巨', '\n']
Exemple #14
0
def extract_with_monkeypatched_logging(monkeypatch, excluded=[]):
    logger = FakeLogger('wex.entrypoints')
    monkeypatch.setattr('logging.getLogger', logger.getLogger)
    extractor = extractor_from_entry_points()
    readable = resource_stream(__name__, 'fixtures/robots_txt')
    for value in Response.values_from_readable(extractor, readable):
        pass
    return logger
def test_get_gzip():
    url = "http://httpbin.org/gzip"
    for i, readable in enumerate(URL(url).get(decode_content=False)):
        response = Response.from_readable(readable)
        assert response.headers.get("X-wex-has-gzip-magic") == "1"
        data = json.load(utf8_reader(decode(response)))
        assert data.get("gzipped")
    assert i == 0
Exemple #16
0
def get(url, **kw):
    codes = []
    for readable in URL(url).get(**kw):
        response = Response.from_readable(readable)
        codes.append(response.code)
        if response.code == 200:
            data = json.load(utf8_reader(decode(response)))
            assert 'headers' in data
    return codes
Exemple #17
0
def get(url, **kw):
    codes = []
    for readable in URL(url).get(**kw):
        response = Response.from_readable(readable)
        codes.append(response.code)
        if response.code == 200:
            data = json.load(utf8_reader(decode(response)))
            assert 'headers' in data
    return codes
Exemple #18
0
def test_extractor_from_entry_points_hostname_suffix_excluded():
    import testme
    extractor = extractor_from_entry_points()
    readable = resource_stream(__name__, 'fixtures/robots_txt')
    for value in Response.values_from_readable(extractor, readable):
        pass
    hostname = 'www.foo.com'
    assert list(extractor.extractors.keys()) == [hostname]
    extractors = set(extractor.extractors[hostname].extractors)
    assert testme.example_with_hostname_suffix not in extractors
    assert testme.example in extractors
Exemple #19
0
def test_parse_url_response():
    response = Response.from_readable(
        BytesIO(
            b"HTTP/1.1 200 OK\r\n"
            b"X-wex-request-url: http://www.foo.com/a/b/c\r\n"
            b"Content-type: text/html\r\n"
            b"\r\n"
            b"<html><h1>hello</h1></html>"
        )
    )
    assert parse_url(response).path == '/a/b/c'
Exemple #20
0
def test_extractor_from_entry_points():
    import testme
    extract = extractor_from_entry_points()
    readable = resource_stream(__name__, 'fixtures/get_this_that')
    for value in Response.values_from_readable(extract, readable):
        pass
    hostname = 'httpbin.org'
    assert list(extract.extractors.keys()) == [hostname]
    extractors = set(extract.extractors[hostname].extractors)
    expected = set([testme.example, testme.example_with_hostname_suffix])
    assert expected.issubset(extractors)
Exemple #21
0
def test_get_with_params():
    url = 'http://httpbin.org/get'
    # The use case for adding params at '.get' time is for handling
    # authentication tokens to URLs.  The net effect is that the
    # tokens are not saved in the Wex response which is a way of
    # avoiding sharing your access tokens.
    params = {'token': 'secret'}
    for readable in URL(url).get(params=params):
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(response))
        assert data.get('args') == params
        assert response.request_url == url
        assert not 'secret' in response.url
Exemple #22
0
def test_get_with_params():
    url = 'http://httpbin.org/get'
    # The use case for adding params at '.get' time is for handling
    # authentication tokens to URLs.  The net effect is that the
    # tokens are not saved in the Wex response which is a way of
    # avoiding sharing your access tokens.
    params = {'token': 'secret'}
    for readable in URL(url).get(params=params):
        response = Response.from_readable(readable)
        data = json.load(utf8_reader(response))
        assert data.get('args') == params
        assert response.request_url == url
        assert not 'secret' in response.url
Exemple #23
0
def build_response(content, response=with_content_length, **kw):

    fmt = {
        'content': content,
        'content_length': len(content),
        'protocol_version': 'HTTP/1.1',
        'code': 200,
        'reason': 'OK',
    }

    for k in fmt:
        if k in kw:
            fmt[k] = kw.pop(k)

    response_bytes = response.format(**fmt).encode('utf-8')
    return Response.from_readable(BytesIO(response_bytes), **kw)
Exemple #24
0
def build_response(content, response=with_content_length, **kw):

    fmt = {
        'content': content,
        'content_length': len(content),
        'protocol_version': 'HTTP/1.1',
        'code': 200,
        'reason': 'OK',
    }

    for k in fmt:
        if k in kw:
            fmt[k] = kw.pop(k)

    response_bytes = response.format(**fmt).encode('utf-8')
    return Response.from_readable(BytesIO(response_bytes), **kw)
Exemple #25
0
def create_response(data):
    return Response.from_readable(BytesIO(data))
Exemple #26
0
def test_parse_obj():
    class Response(object):
        url = url1

    assert parse_url(Response()) == url1_parsed
Exemple #27
0
def test_get_with_context():
    url = 'http://httpbin.org/headers'
    for readable in URL(url).get(context={'foo': 'bar'}):
        response = Response.from_readable(readable)
        assert response.headers.get('X-wex-context-foo') == 'bar'
def stream_from_fixture(fixture):
    resource = "fixtures/htmlstream/" + fixture
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    stream = HTMLStream(response)
    return stream
Exemple #29
0
def test_undecodable_url():
    readable = resource_stream(__name__, 'fixtures/undecodable_url.wexin_')
    response = Response.from_readable(readable)
    assert response.url == 'https://www.example.net/Ã'
    assert response.request_url == 'https://www.example.net/Ã#{"method":"get"}'
Exemple #30
0
def test_extract_from_readable():
    readable = BytesIO(b'FTP/1.0 200 OK\r\n\r\nhello')
    def extract(src):
        yield (src.read(),)
    values = list(Response.values_from_readable(extract, readable))
    assert values == [(b'hello',)]
Exemple #31
0
def test_undecodable_url():
    readable = resource_stream(__name__, 'fixtures/undecodable_url.wexin_')
    response = Response.from_readable(readable)
    assert response.url == 'https://www.example.net/Ã'
    assert response.request_url == 'https://www.example.net/Ã#{"method":"get"}'
def test_warc_response():
    readable = resource_stream(__name__, 'fixtures/warc_response')
    response = Response.from_readable(readable)
    assert response.url == 'http://httpbin.org/get?this=that'
Exemple #33
0
def test_bug_cp1252():
    resource = 'fixtures/bug_cp1252'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    etree = e.parse(response)
    assert etree.xpath('//title/text()') == ['Problem²']
Exemple #34
0
def test_get_with_context():
    url = 'http://httpbin.org/headers'
    for readable in URL(url).get(context={'foo': 'bar'}):
        response = Response.from_readable(readable)
        assert response.headers.get('X-wex-context-foo') == 'bar'
Exemple #35
0
def test_bug_cp1252():
    resource = 'fixtures/bug_cp1252'
    readable = resource_stream(__name__, resource)
    response = Response.from_readable(readable)
    etree = e.parse(response)
    assert etree.xpath('//title/text()') == ['Problem²']
def create_response(data):
    return Response.from_readable(BytesIO(data))
Exemple #37
0
def response(resource):
    return Response.from_readable(resource_stream(__name__, resource))
def test_get_with_context():
    url = "http://httpbin.org/headers"
    for readable in URL(url).get(context={"foo": "bar"}):
        response = Response.from_readable(readable)
        assert response.headers.get("X-wex-context-foo") == "bar"