def test_parse(self): lip = '127.0.0.1' tests = [ ('http://127.0.0.1?c=v&c2=v2#fragment', ('http', lip, lip, 80, '/?c=v&c2=v2')), ('http://127.0.0.1/?c=v&c2=v2#fragment', ('http', lip, lip, 80, '/?c=v&c2=v2')), ('http://127.0.0.1/foo?c=v&c2=v2#frag', ('http', lip, lip, 80, '/foo?c=v&c2=v2')), ('http://127.0.0.1:100?c=v&c2=v2#fragment', ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')), ('http://127.0.0.1:100/?c=v&c2=v2#frag', ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')), ('http://127.0.0.1:100/foo?c=v&c2=v2#frag', ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')), ('http://127.0.0.1', ('http', lip, lip, 80, '/')), ('http://127.0.0.1/', ('http', lip, lip, 80, '/')), ('http://127.0.0.1/foo', ('http', lip, lip, 80, '/foo')), ('http://127.0.0.1?param=value', ('http', lip, lip, 80, '/?param=value')), ('http://127.0.0.1/?param=value', ('http', lip, lip, 80, '/?param=value')), ('http://127.0.0.1:12345/foo', ('http', lip + ':12345', lip, 12345, '/foo')), ('http://spam:12345/foo', ('http', 'spam:12345', 'spam', 12345, '/foo')), ('http://spam.test.org/foo', ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')), ('https://127.0.0.1/foo', ('https', lip, lip, 443, '/foo')), ('https://127.0.0.1/?param=value', ('https', lip, lip, 443, '/?param=value')), ('https://127.0.0.1:12345/', ('https', lip + ':12345', lip, 12345, '/')), ('http://crawlmitest.org/foo ', ('http', 'crawlmitest.org', 'crawlmitest.org', 80, '/foo')), ('http://egg:7890 ', ('http', 'egg:7890', 'egg', 7890, '/')), ] f = CrawlmiHTPPClientFactory(Request(url='http://github.com/')) for url, test in tests: self.assertEqual(f._parse_url_args(url), test, url)
def test_factory_info(self): def _cbFactoryInfo(ingnored_result, factory): self.assertEquals(factory.status, 200) self.assert_(factory.version.startswith('HTTP/')) self.assertEquals(factory.message, 'OK') self.assertEquals(factory.response_headers['content-length'], '10') url = self.get_url('file') factory = CrawlmiHTPPClientFactory(Request(url)) scheme, netloc, host, port, path = factory._parse_url_args(url) reactor.connectTCP(host, port, factory) return factory.deferred.addCallback(_cbFactoryInfo, factory)
def _clientfactory(*args, **kwargs): timeout = kwargs.pop('timeout', 0) download_size = kwargs.pop('download_size', 0) f = CrawlmiHTPPClientFactory(Request(*args, **kwargs), timeout=timeout, download_size=download_size) f.deferred.addCallback(lambda r: r.body) return f
def download_request(self, request): '''Return a deferred for the HTTP download.''' factory = CrawlmiHTPPClientFactory( request, self.settings.get_float('DOWNLOAD_TIMEOUT', 180, request), self.settings.get_int('DOWNLOAD_SIZE_LIMIT', 0, request)) host, port = factory.host, factory.port bind_address = request.meta.get('bind_address') reactor.connectTCP(host, port, factory, bindAddress=bind_address) return factory.deferred
def test_factory_info(self): def _cbFactoryInfo(ingnored_result, factory): self.assertEquals(factory.status, 200) self.assert_(factory.version.startswith('HTTP/')) self.assertEquals(factory.message, 'OK') self.assertEquals(factory.response_headers['content-length'], '10') url = self.get_url('file') factory = CrawlmiHTPPClientFactory(Request(url)) scheme, netloc, host, port, path = _parse_url_args(url) reactor.connectTCP(host, port, factory) return factory.deferred.addCallback(_cbFactoryInfo, factory)
def test_invalid_status(self): transport = StringTransport() factory = CrawlmiHTPPClientFactory(Request(url='http://foo/bar')) protocol = CrawlmiHTTPClient() protocol.factory = factory protocol.makeConnection(transport) protocol.headers = Headers() protocol.dataReceived('HTTP/1.0 BUG OK\r\n') protocol.dataReceived('Hello: World\r\n') protocol.dataReceived('Foo: Bar\r\n') protocol.dataReceived('\r\n') protocol.handleResponse('') return self.assertFailure(factory.deferred, BadHttpHeaderError)
def test_non_standard_line_endings(self): factory = CrawlmiHTPPClientFactory(Request(url='http://foo/bar')) protocol = CrawlmiHTTPClient() protocol.factory = factory protocol.headers = Headers() protocol.dataReceived('HTTP/1.0 200 OK\n') protocol.dataReceived('Hello: World\n') protocol.dataReceived('Foo: Bar\n') protocol.dataReceived('\n') self.assertEqual(protocol.headers, Headers({ 'Hello': ['World'], 'Foo': ['Bar'] }))
def test_parse(self): lip = '127.0.0.1' tests = [ ('http://127.0.0.1?c=v&c2=v2#fragment', ('http', lip, lip, 80, '/?c=v&c2=v2')), ('http://127.0.0.1/?c=v&c2=v2#fragment', ('http', lip, lip, 80, '/?c=v&c2=v2')), ('http://127.0.0.1/foo?c=v&c2=v2#frag', ('http', lip, lip, 80, '/foo?c=v&c2=v2')), ('http://127.0.0.1:100?c=v&c2=v2#fragment', ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')), ('http://127.0.0.1:100/?c=v&c2=v2#frag', ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')), ('http://127.0.0.1:100/foo?c=v&c2=v2#frag', ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')), ('http://127.0.0.1', ('http', lip, lip, 80, '/')), ('http://127.0.0.1/', ('http', lip, lip, 80, '/')), ('http://127.0.0.1/foo', ('http', lip, lip, 80, '/foo')), ('http://127.0.0.1?param=value', ('http', lip, lip, 80, '/?param=value')), ('http://127.0.0.1/?param=value', ('http', lip, lip, 80, '/?param=value')), ('http://127.0.0.1:12345/foo', ('http', lip + ':12345', lip, 12345, '/foo')), ('http://spam:12345/foo', ('http', 'spam:12345', 'spam', 12345, '/foo')), ('http://spam.test.org/foo', ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')), ('https://127.0.0.1/foo', ('https', lip, lip, 443, '/foo')), ('https://127.0.0.1/?param=value', ('https', lip, lip, 443, '/?param=value')), ('https://127.0.0.1:12345/', ('https', lip + ':12345', lip, 12345, '/')), ('http://crawlmitest.org/foo ', ('http', 'crawlmitest.org', 'crawlmitest.org', 80, '/foo')), ('http://egg:7890 ', ('http', 'egg:7890', 'egg', 7890, '/')), ] f = CrawlmiHTPPClientFactory(Request(url='http://github.com/')) for url, test in tests: self.assertEqual(_parse_url_args(url), test, url)
def test_early_headers(self): # basic test stolen from twisted HTTPageGetter factory = CrawlmiHTPPClientFactory( Request(url='http://foo/bar', body='some data', headers={ 'Host': 'example.net', 'User-Agent': 'fooble', 'Cookie': 'blah blah', 'Content-Length': '12981', 'Useful': 'value' })) self._test( factory, 'GET /bar HTTP/1.0\r\n' 'Content-Length: 9\r\n' 'Useful: value\r\n' 'Connection: close\r\n' 'User-Agent: fooble\r\n' 'Host: example.net\r\n' 'Cookie: blah blah\r\n' '\r\n' 'some data') # test minimal sent headers factory = CrawlmiHTPPClientFactory(Request('http://foo/bar')) self._test(factory, 'GET /bar HTTP/1.0\r\n' 'Host: foo\r\n' '\r\n') # test a simple POST with body and content-type factory = CrawlmiHTPPClientFactory( Request( method='POST', url='http://foo/bar', body='name=value', headers={'Content-Type': 'application/x-www-form-urlencoded'})) self._test( factory, 'POST /bar HTTP/1.0\r\n' 'Host: foo\r\n' 'Connection: close\r\n' 'Content-Type: application/x-www-form-urlencoded\r\n' 'Content-Length: 10\r\n' '\r\n' 'name=value') # test with single and multivalued headers factory = CrawlmiHTPPClientFactory( Request(url='http://foo/bar', headers={ 'X-Meta-Single': 'single', 'X-Meta-Multivalued': ['value1', 'value2'] })) self._test( factory, 'GET /bar HTTP/1.0\r\n' 'Host: foo\r\n' 'X-Meta-Multivalued: value1\r\n' 'X-Meta-Multivalued: value2\r\n' 'X-Meta-Single: single\r\n' '\r\n') # same test with single and multivalued headers but using Headers class factory = CrawlmiHTPPClientFactory( Request(url='http://foo/bar', headers=Headers({ 'X-Meta-Single': 'single', 'X-Meta-Multivalued': ['value1', 'value2'] }))) self._test( factory, 'GET /bar HTTP/1.0\r\n' 'Host: foo\r\n' 'X-Meta-Multivalued: value1\r\n' 'X-Meta-Multivalued: value2\r\n' 'X-Meta-Single: single\r\n' '\r\n')
def _parse(self, url): f = CrawlmiHTPPClientFactory(Request(url=url)) return (f.scheme, f.netloc, f.host, f.port, f.path)