def test_post(): method = {"post": {"data": {"x": "1"}}} url = URL('http://httpbin.org/post').update_fragment_dict(method=method) for readable in url.get(): response = Response.from_readable(readable) data = json.load(utf8_reader(response)) assert data['form'] == method['post']['data']
def test_warc_response(): readable = resource_stream(__name__, 'fixtures/warc_response') response = Response.from_readable(readable) assert response.url == 'http://httpbin.org/get?this=that' assert response.warc_protocol == b'WARC' assert response.warc_version == (1, 0) assert response.warc_headers.get('warc-type') == 'response'
def test_get_gzip(): url = 'http://httpbin.org/gzip' for i, readable in enumerate(URL(url).get(decode_content=False)): response = Response.from_readable(readable) data = json.load(utf8_reader(decode(response))) assert data.get('gzipped') assert i == 0
def test_chained_does_seek_response(): readable = resource_stream(__name__, 'fixtures/robots_txt') response = Response.from_readable(readable) # use the same extractor twice extract = chained(extract_first_line, extract_first_line) values = list(extract(response)) # and we get the same first line because chained re-seeks to 0 assert values == [(b'# /robots.txt\n', ), (b'# /robots.txt\n', )]
def test_parse_url_response(): response = Response.from_readable( BytesIO(b"HTTP/1.1 200 OK\r\n" b"X-wex-request-url: http://www.foo.com/a/b/c\r\n" b"Content-type: text/html\r\n" b"\r\n" b"<html><h1>hello</h1></html>")) assert parse_url(response).path == '/a/b/c'
def test_extract_from_readable(): readable = BytesIO(b'FTP/1.0 200 OK\r\n\r\nhello') def extract(src): yield (src.read(), ) values = list(Response.values_from_readable(extract, readable)) assert values == [(b'hello', )]
def test_chained_does_seek_response(): readable = resource_stream(__name__, 'fixtures/robots_txt') response = Response.from_readable(readable) # use the same extractor twice extract = chained(extract_first_line, extract_first_line) values = list(extract(response)) # and we get the same first line because chained re-seeks to 0 assert values == [(b'# /robots.txt\n',), (b'# /robots.txt\n',)]
def stream_from_fixture(fixture): resource = 'fixtures/htmlstream/' + fixture readable = resource_stream(__name__, resource) response = Response.from_readable(readable) # do a read just to make sure that we seek(0) response.read(100) stream = HTMLStream(response) return stream
def test_parse_next_decoder(): # borrow a fixture from htmlstream resource = 'fixtures/htmlstream/shift-jis-next-decoder' readable = resource_stream(__name__, resource) response = Response.from_readable(readable) # here the parse function will try utf-8 and then shift-jis etree = e.parse(response) assert list(etree.getroot().itertext()) == ['\n', '巨', '\n']
def extract_with_monkeypatched_logging(monkeypatch, excluded=[]): logger = FakeLogger('wex.entrypoints') monkeypatch.setattr('logging.getLogger', logger.getLogger) extractor = extractor_from_entry_points() readable = resource_stream(__name__, 'fixtures/robots_txt') for value in Response.values_from_readable(extractor, readable): pass return logger
def test_get_gzip(): url = "http://httpbin.org/gzip" for i, readable in enumerate(URL(url).get(decode_content=False)): response = Response.from_readable(readable) assert response.headers.get("X-wex-has-gzip-magic") == "1" data = json.load(utf8_reader(decode(response))) assert data.get("gzipped") assert i == 0
def get(url, **kw): codes = [] for readable in URL(url).get(**kw): response = Response.from_readable(readable) codes.append(response.code) if response.code == 200: data = json.load(utf8_reader(decode(response))) assert 'headers' in data return codes
def test_extractor_from_entry_points_hostname_suffix_excluded(): import testme extractor = extractor_from_entry_points() readable = resource_stream(__name__, 'fixtures/robots_txt') for value in Response.values_from_readable(extractor, readable): pass hostname = 'www.foo.com' assert list(extractor.extractors.keys()) == [hostname] extractors = set(extractor.extractors[hostname].extractors) assert testme.example_with_hostname_suffix not in extractors assert testme.example in extractors
def test_parse_url_response(): response = Response.from_readable( BytesIO( b"HTTP/1.1 200 OK\r\n" b"X-wex-request-url: http://www.foo.com/a/b/c\r\n" b"Content-type: text/html\r\n" b"\r\n" b"<html><h1>hello</h1></html>" ) ) assert parse_url(response).path == '/a/b/c'
def test_extractor_from_entry_points(): import testme extract = extractor_from_entry_points() readable = resource_stream(__name__, 'fixtures/get_this_that') for value in Response.values_from_readable(extract, readable): pass hostname = 'httpbin.org' assert list(extract.extractors.keys()) == [hostname] extractors = set(extract.extractors[hostname].extractors) expected = set([testme.example, testme.example_with_hostname_suffix]) assert expected.issubset(extractors)
def test_get_with_params(): url = 'http://httpbin.org/get' # The use case for adding params at '.get' time is for handling # authentication tokens to URLs. The net effect is that the # tokens are not saved in the Wex response which is a way of # avoiding sharing your access tokens. params = {'token': 'secret'} for readable in URL(url).get(params=params): response = Response.from_readable(readable) data = json.load(utf8_reader(response)) assert data.get('args') == params assert response.request_url == url assert not 'secret' in response.url
def build_response(content, response=with_content_length, **kw): fmt = { 'content': content, 'content_length': len(content), 'protocol_version': 'HTTP/1.1', 'code': 200, 'reason': 'OK', } for k in fmt: if k in kw: fmt[k] = kw.pop(k) response_bytes = response.format(**fmt).encode('utf-8') return Response.from_readable(BytesIO(response_bytes), **kw)
def create_response(data): return Response.from_readable(BytesIO(data))
def test_parse_obj(): class Response(object): url = url1 assert parse_url(Response()) == url1_parsed
def test_get_with_context(): url = 'http://httpbin.org/headers' for readable in URL(url).get(context={'foo': 'bar'}): response = Response.from_readable(readable) assert response.headers.get('X-wex-context-foo') == 'bar'
def stream_from_fixture(fixture): resource = "fixtures/htmlstream/" + fixture readable = resource_stream(__name__, resource) response = Response.from_readable(readable) stream = HTMLStream(response) return stream
def test_undecodable_url(): readable = resource_stream(__name__, 'fixtures/undecodable_url.wexin_') response = Response.from_readable(readable) assert response.url == 'https://www.example.net/Ã' assert response.request_url == 'https://www.example.net/Ã#{"method":"get"}'
def test_extract_from_readable(): readable = BytesIO(b'FTP/1.0 200 OK\r\n\r\nhello') def extract(src): yield (src.read(),) values = list(Response.values_from_readable(extract, readable)) assert values == [(b'hello',)]
def test_warc_response(): readable = resource_stream(__name__, 'fixtures/warc_response') response = Response.from_readable(readable) assert response.url == 'http://httpbin.org/get?this=that'
def test_bug_cp1252(): resource = 'fixtures/bug_cp1252' readable = resource_stream(__name__, resource) response = Response.from_readable(readable) etree = e.parse(response) assert etree.xpath('//title/text()') == ['Problem²']
def response(resource): return Response.from_readable(resource_stream(__name__, resource))
def test_get_with_context(): url = "http://httpbin.org/headers" for readable in URL(url).get(context={"foo": "bar"}): response = Response.from_readable(readable) assert response.headers.get("X-wex-context-foo") == "bar"