def test_dirnames(): url = URL('http://example.net/foo?q=v') expected_dirnames = [ 'http', 'example.net', 'foo', 'q%3Dv', '97449b4c8efcdabdfeed82f5cffb09c2' ] assert url.dirnames() == expected_dirnames
def test_post(): method = {"post": {"data": {"x": "1"}}} url = URL('http://httpbin.org/post').update_fragment_dict(method=method) for readable in url.get(): response = Response.from_readable(readable) data = json.load(utf8_reader(response)) assert data['form'] == method['post']['data']
def test_dirnames_unicode(): url = URL(u'http://example.net/m\xf3vil') expected_dirnames = ['http', 'example.net', 'm%C3%B3vil', '8ca739c73e33e28ec0764a1d987b6a71'] assert url.dirnames() == expected_dirnames
def urls_from_urlset_or_sitemapindex(response): """ Yields URLs from ``<urlset>`` or ``<sitemapindex>`` elements as per `sitemaps.org <http://www.sitemaps.org/protocol.html>`_. """ sitemap = URL(response.url).fragment_dict.get('sitemap') content_subtypes = response.headers.get_content_subtype().split('+') if not sitemap and not 'xml' in content_subtypes: return root = None for _, elem in iterparse(decode(response)): if root is None: root = elem.getroottree().getroot() if not (root.tag.endswith('}sitemapindex') or root.tag.endswith('}urlset')): # root element has wrong tag - give up break if elem.tag.endswith('}loc') and elem.text is not None: text = elem.text.strip() if text: # http://www.sitemaps.org/protocol.html#locdef url = URL(urljoin(response.url, text)) if elem.getparent().tag.endswith('}sitemap'): # set sitemap=True to help downstream processing url = url.update_fragment_dict(sitemap=True) yield "url", url if elem.getparent() is root: # release memory for previous elements while elem.getprevious() is not None: del root[0]
def test_get_gzip(): url = 'http://httpbin.org/gzip' for i, readable in enumerate(URL(url).get(decode_content=False)): response = Response.from_readable(readable) data = json.load(utf8_reader(decode(response))) assert data.get('gzipped') assert i == 0
def test_ftp_readline(): readables = list(URL(url).get()) assert len(readables) == 1 n = 3 r0 = readables[0] first_few_lines = [r0.readline() for i in range(n)] assert first_few_lines == expected_lines[:n]
def test_urllib3_issue_709_deflate(): # https://github.com/shazow/urllib3/issues/709 url = 'http://httpbin.org/deflate' for response in map(Response.from_readable, URL(url).get()): # the partial read should let us see some not-so-magic bytes assert response.magic_bytes == b'{\n "def' data = json.load(utf8_reader(response)) assert 'deflated' in data
def get(url, **kw): codes = [] for readable in URL(url).get(**kw): response = Response.from_readable(readable) codes.append(response.code) if response.code == 200: data = json.load(utf8_reader(decode(response))) assert 'headers' in data return codes
def test_ftp_read(): readables = list(URL(url).get()) assert len(readables) == 1 r0 = readables[0] chunk = r0.read(2**16) content = chunk chunk = r0.read(2**16) assert not chunk assert content.startswith(expected_content)
def urls_from_robots_txt(response): """ Yields sitemap URLs from "/robots.txt" """ url = URL(response.request_url or response.url or '') if url.parsed.path != '/robots.txt': return charset = response.headers.get_content_charset() lines = getreader(charset or 'ISO-8859-1')(response) for line in lines: content, _, comment = line.partition('#') field, _, value = content.partition(':') if field.strip().lower() != 'sitemap': continue # we shouldn't need to urljoin but we do just in case joined = URL(urljoin(response.url, value.strip())) # set sitemap=True in fragment to help downstream processing yield "url", joined.update_fragment_dict(sitemap=True)
def test_get_with_params(): url = 'http://httpbin.org/get' # The use case for adding params at '.get' time is for handling # authentication tokens to URLs. The net effect is that the # tokens are not saved in the Wex response which is a way of # avoiding sharing your access tokens. params = {'token': 'secret'} for readable in URL(url).get(params=params): response = Response.from_readable(readable) data = json.load(utf8_reader(response)) assert data.get('args') == params assert response.request_url == url assert not 'secret' in response.url
def urls_from_urlset_or_sitemapindex(response): """ Yields URLs from ``<urlset>`` or ``<sitemapindex>`` elements as per `sitemaps.org <http://www.sitemaps.org/protocol.html>`_. """ sitemap = URL(response.url).fragment_dict.get('sitemap') content_subtypes = response.headers.get_content_subtype().split('+') if not sitemap and 'xml' not in content_subtypes: return root = None try: for _, elem in iterparse(decode(response)): if root is None: root = elem.getroottree().getroot() if not (root.tag.endswith('}sitemapindex') or root.tag.endswith('}urlset')): # root element has wrong tag - give up break if elem.tag.endswith('}loc') and elem.text is not None: text = elem.text.strip() if text: # http://www.sitemaps.org/protocol.html#locdef url = URL(urljoin(response.url, text)) if elem.getparent().tag.endswith('}sitemap'): # set sitemap=True to help downstream processing url = url.update_fragment_dict(sitemap=True) yield "url", url if elem.getparent() is root: # release memory for previous elements while elem.getprevious() is not None: del root[0] except XMLSyntaxError: log.debug("XMLSyntaxError in '%s' (%d)", response.url, response.code)
def run(**kw): url = URL('http://httpbin.org/forms/post') custname = 'Giles' toppings = ('bacon', 'onion') comments = 'Using CSS selector' method = { 'form': { 'form': [ ('custname', custname), ('topping', toppings), ('textarea', comments), ] } } url = url.update_fragment_dict(method=method) responses = list(map(Response.from_readable, url.get(**kw))) # we should have GET and then POST assert len(responses) == 2 data = json.loads(responses[1].read().decode('utf-8')) assert (set(data['form'].keys()) == set(['comments', 'custname', 'topping'])) assert data['form']['custname'] == custname assert data['form']['topping'] == list(toppings) assert data['form']['comments'] == comments
def get_monkeypatched(monkeypatch, url, **kw): request_args = [] def request(session, *args, **request_kwargs): request_args.append((args, request_kwargs)) response = requests.models.Response() response.raw = Raw() return response monkeypatch.setattr('requests.sessions.Session.request', request) for readable in URL(url).get(**kw): pass return request_args
def test_method_without_scheme(): with pytest.raises(ValueError): URL('/foo/bar').method
def test_fragment_quoted(): # requests quotes the fragment - but we do know how to unquote assert URL('http://foo.com/path#%7B%22a%22%3A1%7D').fragment_dict == { 'a': 1 }
def test_dirnames(): url = URL('http://example.net/foo?q=v') expected_dirnames = ['http', 'example.net', 'foo', 'q%3Dv', '97449b4c8efcdabdfeed82f5cffb09c2'] assert url.dirnames() == expected_dirnames
def test_get_with_context(): url = 'http://httpbin.org/headers' for readable in URL(url).get(context={'foo': 'bar'}): response = Response.from_readable(readable) assert response.headers.get('X-wex-context-foo') == 'bar'
def test_update_fragment_dict(): original = 'http://foo.com/path#{"method":"get"}' updated = 'http://foo.com/path#{"cheeky":true,"method":"get"}' assert URL(original).update_fragment_dict(cheeky=True) == updated
def test_method_no_fragment(): assert URL('http://foo.com/path').method.name == DEFAULT_METHOD
def test_method_fragment_no_method_key(): assert URL('http://foo.com/path#{"foo":1}').method.name == u.DEFAULT_METHOD
def test_method_fragment_not_dict(): assert URL('http://foo.com/path#3').method.name == u.DEFAULT_METHOD
from subprocess import check_output, CalledProcessError from six.moves import map import pytest from wex.response import Response from wex.etree import parse from wex.url import URL from httpproxy import HttpProxy, skipif_travis_ci url = URL("http://httpbin.org/html") method = {"phantomjs": {"requires": [["wex", "js/bcr.js"]]}} url = url.update_fragment_dict(method=method) try: version = check_output(["phantomjs", "--version"]) except CalledProcessError: version_info = (0, 0, 0) else: version_info = tuple(map(int, version.split(b"."))) old_phantomjs_version = pytest.mark.skipif(version_info < (2, 0, 0), reason="phantomjs version to old") @old_phantomjs_version def test_phantomjs(): elements = [] context = {"foo": "bar"} for response in map(Response.from_readable, url.get(context=context)): tree = parse(response) elements.extend(tree.xpath("//h1")) assert response.headers.get("X-wex-context-foo") == "bar" assert len(elements) == 1
def test_method_fragment_method_incorrect_type(): with pytest.raises(ValueError): URL('http://foo.com/path#{"method":1}').method
def test_dirnames_unicode(): url = URL(u'http://example.net/m\xf3vil') expected_dirnames = [ 'http', 'example.net', 'm%C3%B3vil', '8ca739c73e33e28ec0764a1d987b6a71' ] assert url.dirnames() == expected_dirnames
def test_method_fragment_method_wrong_number_of_keys(): with pytest.raises(ValueError): URL('http://foo.com/path#{"method":{"foo":1,"bar":2}}').method
def test_url_get_missing_recipe(): with pytest.raises(ValueError): URL('http://httpbin.org/robots.txt#{"method":"whoops"}').get()
def test_method_fragment_method_value_is_string(): url = 'http://foo.com/path#{"method":"foo"}' assert URL(url).method.name == "foo"
def test_method_fragment_method_value_is_dict(): url = 'http://foo.com/path#{"method":{"foo":1}}' assert URL(url).method.name == 'foo'
def test_url_get(): responses = list(URL('http://httpbin.org/robots.txt').get()) assert len(responses) == 1 assert responses[0].readline(2**16) == b'HTTP/1.1 200 OK\r\n'
import io import errno import sys import time import subprocess from itertools import tee from six import BytesIO from six.moves import zip from pkg_resources import working_set, resource_filename import pytest from wex.readable import EXT_WEXIN from wex.output import EXT_WEXOUT, TeeStdOut from wex.url import URL from wex import command url = URL('http://httpbin.org/get?this=that') def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = tee(iterable) next(b, None) return zip(a, b) def setup_module(): entry = resource_filename(__name__, 'fixtures/TestMe.egg') working_set.add_entry(entry)