Ejemplo n.º 1
0
    def _get_agent(self, request, timeout):
        bindaddress = request.meta.get("bindaddress") or self._bindAddress
        proxy = request.meta.get("proxy")
        if proxy:
            _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            scheme = _parse(request.url)[0]
            omitConnectTunnel = proxyParams.find("noconnect") >= 0
            if scheme == "https" and not omitConnectTunnel:
                proxyConf = (proxyHost, proxyPort, request.headers.get("Proxy-Authorization", None))
                return self._TunnelingAgent(
                    reactor,
                    proxyConf,
                    contextFactory=self._contextFactory,
                    connectTimeout=timeout,
                    bindAddress=bindaddress,
                    pool=self._pool,
                )
            else:
                endpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort, timeout=timeout, bindAddress=bindaddress)
                return self._ProxyAgent(endpoint)

        return self._Agent(
            reactor,
            contextFactory=self._contextFactory,
            connectTimeout=timeout,
            bindAddress=bindaddress,
            pool=self._pool,
        )
Ejemplo n.º 2
0
 def _get_agent(self, request, timeout):
     bindAddress = request.meta.get('bindaddress') or self._bindAddress
     proxy = request.meta.get('proxy')
     if proxy:
         _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
         _, _, host, port, proxyParams = _parse(request.url)
         proxyEndpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort,
                             timeout=timeout, bindAddress=bindAddress)
         agent = SOCKS5Agent(reactor, proxyEndpoint=proxyEndpoint)
         return agent
     return self._Agent(reactor, contextFactory=self._contextFactory,
         connectTimeout=timeout, bindAddress=bindAddress, pool=self._pool)
Ejemplo n.º 3
0
 def _get_agent(self, request, timeout):
     bindAddress = request.meta.get('bindaddress') or self._bindAddress
     proxy = request.meta.get('proxy')
     if proxy:
         _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
         _, _, host, port, proxyParams = _parse(request.url)
         proxyEndpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort,
                             timeout=timeout, bindAddress=bindAddress)
         agent = SOCKS5Agent(reactor, proxyEndpoint=proxyEndpoint)
         return agent
     return self._Agent(reactor, contextFactory=self._contextFactory,
         connectTimeout=timeout, bindAddress=bindAddress, pool=self._pool) 
Ejemplo n.º 4
0
    def _get_agent(self, request, timeout):
        from twisted.internet import reactor
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            proxyScheme, proxyNetloc, proxyHost, proxyPort, proxyParams = _parse(
                proxy)
            scheme = _parse(request.url)[0]
            proxyHost = to_unicode(proxyHost)
            omitConnectTunnel = b'noconnect' in proxyParams
            if omitConnectTunnel:
                warnings.warn(
                    "Using HTTPS proxies in the noconnect mode is deprecated. "
                    "If you use Zyte Smart Proxy Manager, it doesn't require "
                    "this mode anymore, so you should update scrapy-crawlera "
                    "to scrapy-zyte-smartproxy and remove '?noconnect' "
                    "from the Zyte Smart Proxy Manager URL.",
                    ScrapyDeprecationWarning,
                )
            if scheme == b'https' and not omitConnectTunnel:
                proxyAuth = request.headers.get(b'Proxy-Authorization', None)
                proxyConf = (proxyHost, proxyPort, proxyAuth)
                return self._TunnelingAgent(
                    reactor=reactor,
                    proxyConf=proxyConf,
                    contextFactory=self._contextFactory,
                    connectTimeout=timeout,
                    bindAddress=bindaddress,
                    pool=self._pool,
                )
            else:
                proxyScheme = proxyScheme or b'http'
                proxyHost = to_bytes(proxyHost, encoding='ascii')
                proxyPort = to_bytes(str(proxyPort), encoding='ascii')
                proxyURI = urlunparse(
                    (proxyScheme, proxyNetloc, proxyParams, '', '', ''))
                return self._ProxyAgent(
                    reactor=reactor,
                    proxyURI=to_bytes(proxyURI, encoding='ascii'),
                    connectTimeout=timeout,
                    bindAddress=bindaddress,
                    pool=self._pool,
                )

        return self._Agent(
            reactor=reactor,
            contextFactory=self._contextFactory,
            connectTimeout=timeout,
            bindAddress=bindaddress,
            pool=self._pool,
        )
Ejemplo n.º 5
0
    def testParse(self):
        lip = "127.0.0.1"
        tests = (
            ("http://127.0.0.1?c=v&c2=v2#fragment", ("http", lip, lip, 80, "/?c=v&c2=v2")),
            ("http://127.0.0.1/?c=v&c2=v2#fragment", ("http", lip, lip, 80, "/?c=v&c2=v2")),
            ("http://127.0.0.1/foo?c=v&c2=v2#frag", ("http", lip, lip, 80, "/foo?c=v&c2=v2")),
            ("http://127.0.0.1:100?c=v&c2=v2#fragment", ("http", lip + ":100", lip, 100, "/?c=v&c2=v2")),
            ("http://127.0.0.1:100/?c=v&c2=v2#frag", ("http", lip + ":100", lip, 100, "/?c=v&c2=v2")),
            ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ("http", lip + ":100", lip, 100, "/foo?c=v&c2=v2")),
            ("http://127.0.0.1", ("http", lip, lip, 80, "/")),
            ("http://127.0.0.1/", ("http", lip, lip, 80, "/")),
            ("http://127.0.0.1/foo", ("http", lip, lip, 80, "/foo")),
            ("http://127.0.0.1?param=value", ("http", lip, lip, 80, "/?param=value")),
            ("http://127.0.0.1/?param=value", ("http", lip, lip, 80, "/?param=value")),
            ("http://127.0.0.1:12345/foo", ("http", lip + ":12345", lip, 12345, "/foo")),
            ("http://spam:12345/foo", ("http", "spam:12345", "spam", 12345, "/foo")),
            ("http://spam.test.org/foo", ("http", "spam.test.org", "spam.test.org", 80, "/foo")),
            ("https://127.0.0.1/foo", ("https", lip, lip, 443, "/foo")),
            ("https://127.0.0.1/?param=value", ("https", lip, lip, 443, "/?param=value")),
            ("https://127.0.0.1:12345/", ("https", lip + ":12345", lip, 12345, "/")),
            ("http://scrapytest.org/foo ", ("http", "scrapytest.org", "scrapytest.org", 80, "/foo")),
            ("http://egg:7890 ", ("http", "egg:7890", "egg", 7890, "/")),
        )

        for url, test in tests:
            self.assertEquals(client._parse(url), test, url)
Ejemplo n.º 6
0
    def testParse(self):
        lip = '127.0.0.1'
        tests = (
    ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
    ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
    ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
    ("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
    ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
    ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),

    ("http://127.0.0.1", ('http', lip, lip, 80, '/')),
    ("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
    ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
    ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
    ("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
    ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
    ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
    ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),

    ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
    ("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
    ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),

    ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
    ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
    )

        for url, test in tests:
            test = tuple(
                to_bytes(x) if not isinstance(x, int) else x for x in test)
            self.assertEqual(client._parse(url), test, url)
Ejemplo n.º 7
0
    def testParse(self):
        lip = '127.0.0.1'
        tests = (
    ("http://127.0.0.1?c=v&c2=v2#fragment",     ('http', lip, lip, 80, '/?c=v&c2=v2')),
    ("http://127.0.0.1/?c=v&c2=v2#fragment",    ('http', lip, lip, 80, '/?c=v&c2=v2')),
    ("http://127.0.0.1/foo?c=v&c2=v2#frag",     ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
    ("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip+':100', lip, 100, '/?c=v&c2=v2')),
    ("http://127.0.0.1:100/?c=v&c2=v2#frag",    ('http', lip+':100', lip, 100, '/?c=v&c2=v2')),
    ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip+':100', lip, 100, '/foo?c=v&c2=v2')),

    ("http://127.0.0.1",              ('http', lip, lip, 80, '/')),
    ("http://127.0.0.1/",             ('http', lip, lip, 80, '/')),
    ("http://127.0.0.1/foo",          ('http', lip, lip, 80, '/foo')),
    ("http://127.0.0.1?param=value",  ('http', lip, lip, 80, '/?param=value')),
    ("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
    ("http://127.0.0.1:12345/foo",    ('http', lip+':12345', lip, 12345, '/foo')),
    ("http://spam:12345/foo",         ('http', 'spam:12345', 'spam', 12345, '/foo')),
    ("http://spam.test.org/foo",      ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),

    ("https://127.0.0.1/foo",         ('https', lip, lip, 443, '/foo')),
    ("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
    ("https://127.0.0.1:12345/",      ('https', lip+':12345', lip, 12345, '/')),

    ("http://scrapytest.org/foo ",    ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
    ("http://egg:7890 ",              ('http', 'egg:7890', 'egg', 7890, '/')),
    )

        for url, test in tests:
            self.assertEquals(client._parse(url), test, url)
Ejemplo n.º 8
0
    def _get_agent(self, request, timeout):
        from twisted.internet import reactor

        bindaddress = request.meta.get("bindaddress") or self._bindAddress
        proxy = request.meta.get("proxy")
        if proxy:
            _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            scheme = _parse(request.url)[0]
            proxyHost = to_unicode(proxyHost)
            omitConnectTunnel = b"noconnect" in proxyParams
            if omitConnectTunnel:
                warnings.warn(
                    "Using HTTPS proxies in the noconnect mode is deprecated. "
                    "If you use Zyte Smart Proxy Manager (formerly Crawlera), "
                    "it doesn't require this mode anymore, so you should "
                    "update scrapy-crawlera to 1.3.0+ and remove '?noconnect' "
                    "from the Zyte Smart Proxy Manager URL.",
                    ScrapyDeprecationWarning,
                )
            if scheme == b"https" and not omitConnectTunnel:
                proxyAuth = request.headers.get(b"Proxy-Authorization", None)
                proxyConf = (proxyHost, proxyPort, proxyAuth)
                return self._TunnelingAgent(
                    reactor=reactor,
                    proxyConf=proxyConf,
                    contextFactory=self._contextFactory,
                    connectTimeout=timeout,
                    bindAddress=bindaddress,
                    pool=self._pool,
                )
            else:
                return self._ProxyAgent(
                    reactor=reactor,
                    proxyURI=to_bytes(proxy, encoding="ascii"),
                    connectTimeout=timeout,
                    bindAddress=bindaddress,
                    pool=self._pool,
                )

        return self._Agent(
            reactor=reactor,
            contextFactory=self._contextFactory,
            connectTimeout=timeout,
            bindAddress=bindaddress,
            pool=self._pool,
        )
Ejemplo n.º 9
0
    def _get_agent(self, request, timeout):
        from twisted.internet import reactor
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            scheme = _parse(request.url)[0]
            proxyHost = to_unicode(proxyHost)
            omitConnectTunnel = b'noconnect' in proxyParams
            if omitConnectTunnel:
                warnings.warn(
                    "Using HTTPS proxies in the noconnect mode is deprecated. "
                    "If you use Crawlera, it doesn't require this mode anymore, "
                    "so you should update scrapy-crawlera to 1.3.0+ "
                    "and remove '?noconnect' from the Crawlera URL.",
                    ScrapyDeprecationWarning)
            if scheme == b'https' and not omitConnectTunnel:
                proxyAuth = request.headers.get(b'Proxy-Authorization', None)
                proxyConf = (proxyHost, proxyPort, proxyAuth)
                return self._TunnelingAgent(
                    reactor=reactor,
                    proxyConf=proxyConf,
                    contextFactory=self._contextFactory,
                    connectTimeout=timeout,
                    bindAddress=bindaddress,
                    pool=self._pool,
                )
            else:
                return self._ProxyAgent(
                    reactor=reactor,
                    proxyURI=to_bytes(proxy, encoding='ascii'),
                    connectTimeout=timeout,
                    bindAddress=bindaddress,
                    pool=self._pool,
                )

        return self._Agent(
            reactor=reactor,
            contextFactory=self._contextFactory,
            connectTimeout=timeout,
            bindAddress=bindaddress,
            pool=self._pool,
        )
Ejemplo n.º 10
0
    def _get_agent(self, request, timeout):
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            scheme, _, host, port, _ = _parse(proxy)
            endpoint = TCP4ClientEndpoint(reactor, host, port, timeout=timeout,
                bindAddress=bindaddress)
            return self._ProxyAgent(endpoint)

        return self._Agent(reactor, contextFactory=self._contextFactory,
            connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
Ejemplo n.º 11
0
    def _get_agent(self, request, timeout):
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            scheme = _parse(request.url)[0]
            proxyHost = to_unicode(proxyHost)
            omitConnectTunnel = b'noconnect' in proxyParams
            if  scheme == b'https' and not omitConnectTunnel:
                proxyConf = (proxyHost, proxyPort,
                             request.headers.get(b'Proxy-Authorization', None))
                return self._TunnelingAgent(reactor, proxyConf,
                    contextFactory=self._contextFactory, connectTimeout=timeout,
                    bindAddress=bindaddress, pool=self._pool)
            else:
                return self._ProxyAgent(reactor, proxyURI=to_bytes(proxy, encoding='ascii'),
                    connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)

        return self._Agent(reactor, contextFactory=self._contextFactory,
            connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
Ejemplo n.º 12
0
    def _get_agent(self, request, timeout):
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            scheme = _parse(request.url)[0]
            proxyHost = to_unicode(proxyHost)
            omitConnectTunnel = b'noconnect' in proxyParams
            if  scheme == b'https' and not omitConnectTunnel:
                proxyConf = (proxyHost, proxyPort,
                             request.headers.get(b'Proxy-Authorization', None))
                return self._TunnelingAgent(reactor, proxyConf,
                    contextFactory=self._contextFactory, connectTimeout=timeout,
                    bindAddress=bindaddress, pool=self._pool)
            else:
                return self._ProxyAgent(reactor, proxyURI=to_bytes(proxy, encoding='ascii'),
                    connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)

        return self._Agent(reactor, contextFactory=self._contextFactory,
            connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
Ejemplo n.º 13
0
    def _get_agent(self, request, timeout):
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            scheme, _, host, port, _ = _parse(proxy)
            endpoint = TCP4ClientEndpoint(reactor, host, port, timeout=timeout,
                                          bindAddress=bindaddress)
            return self._ProxyAgent(endpoint)

        return self._Agent(reactor, contextFactory=self._contextFactory,
            connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
Ejemplo n.º 14
0
    def _get_agent(self, request, timeout):
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            scheme = _parse(request.url)[0]
            omitConnectTunnel = proxyParams.find('noconnect') >= 0
            if  scheme == 'https' and not omitConnectTunnel:
                proxyConf = (proxyHost, proxyPort,
                             request.headers.get('Proxy-Authorization', None))
                return self._TunnelingAgent(reactor, proxyConf,
                    contextFactory=self._contextFactory, connectTimeout=timeout,
                    bindAddress=bindaddress, pool=self._pool)
            else:
                endpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort,
                    timeout=timeout, bindAddress=bindaddress)
                return self._ProxyAgent(endpoint)

        return self._Agent(reactor, contextFactory=self._contextFactory,
            connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
Ejemplo n.º 15
0
    def _get_agent(self, request: Request,
                   timeout: Optional[float]) -> H2Agent:
        from twisted.internet import reactor
        bind_address = request.meta.get('bindaddress') or self._bind_address
        proxy = request.meta.get('proxy')
        if proxy:
            _, _, proxy_host, proxy_port, proxy_params = _parse(proxy)
            scheme = _parse(request.url)[0]
            proxy_host = proxy_host.decode()
            omit_connect_tunnel = b'noconnect' in proxy_params
            if omit_connect_tunnel:
                warnings.warn(
                    "Using HTTPS proxies in the noconnect mode is not "
                    "supported by the downloader handler. If you use Zyte "
                    "Smart Proxy Manager, it doesn't require this mode "
                    "anymore, so you should update scrapy-crawlera to "
                    "scrapy-zyte-smartproxy and remove '?noconnect' from the "
                    "Zyte Smart Proxy Manager URL.")

            if scheme == b'https' and not omit_connect_tunnel:
                # ToDo
                raise NotImplementedError(
                    'Tunneling via CONNECT method using HTTP/2.0 is not yet supported'
                )
            return self._ProxyAgent(
                reactor=reactor,
                context_factory=self._context_factory,
                proxy_uri=URI.fromBytes(to_bytes(proxy, encoding='ascii')),
                connect_timeout=timeout,
                bind_address=bind_address,
                pool=self._pool,
            )

        return self._Agent(
            reactor=reactor,
            context_factory=self._context_factory,
            connect_timeout=timeout,
            bind_address=bind_address,
            pool=self._pool,
        )
Ejemplo n.º 16
0
    def _get_agent(self, request, timeout):
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            scheme = _parse(request.url)[0]
            omitConnectTunnel = proxyParams.find('noconnect') >= 0
            if  scheme == 'https' and not omitConnectTunnel:
                proxyConf = (proxyHost, proxyPort,
                             request.headers.get('Proxy-Authorization', None))
                return self._TunnelingAgent(reactor, proxyConf,
                    contextFactory=self._contextFactory, connectTimeout=timeout,
                    bindAddress=bindaddress, pool=self._pool)
            else:
                _, _, host, port, proxyParams = _parse(request.url)
                proxyEndpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort,
                    timeout=timeout, bindAddress=bindaddress)
                agent = SOCKS5Agent(reactor, proxyEndpoint=proxyEndpoint)
                return agent

        return self._Agent(reactor, contextFactory=self._contextFactory,
            connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
Ejemplo n.º 17
0
    def _get_agent(self, request, timeout):
        bindAddress = request.meta.get('bindaddress') or self._bindAddress
        # this needs http_proxy environment variable or proxy middleware, 
        # otherwise it will be none
#        proxy = request.meta.get('proxy')
        proxy = settings.get('SOCKSPROXY',  '127.0.0.1:9050')
        logger.debug('downloader agent proxy: %s' % proxy)
        if proxy:
            _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            _, _, host, port, proxyParams = _parse(request.url)
            proxyEndpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort,
                                timeout=timeout, bindAddress=bindAddress)
            newcirc = settings.get('NEWCIRC',  False)
            if newcirc:
                username = hex(random.randint(0, 2**32))                            
                password = hex(random.randint(0, 2**32))
                agent = SOCKS5Agent(reactor, proxyEndpoint=proxyEndpoint,  endpointArgs=dict(methods=dict(login=(username,password))))
            else:
                agent = SOCKS5Agent(reactor, proxyEndpoint=proxyEndpoint)
            return agent
        return self._Agent(reactor, contextFactory=self._contextFactory,
            connectTimeout=timeout, bindAddress=bindAddress, pool=self._pool) 
Ejemplo n.º 18
0
    def _get_agent(self, request, timeout):
        bind_address = request.meta.get('bindaddress') or self._bindAddress
        proxy = os.environ.get("SOCKS_PROXY", request.meta.get('proxy'))
        _proxy_protocol, _proxy_hostport, proxy_host, proxy_port, _proxy_params = _parse(
            proxy)

        proxy_endpoint = TCP4ClientEndpoint(reactor,
                                            proxy_host,
                                            proxy_port,
                                            timeout=timeout,
                                            bindAddress=bind_address)
        agent = txtorcon_web.tor_agent(reactor, socks_endpoint=proxy_endpoint)

        return agent
Ejemplo n.º 19
0
    def _get_agent(self, request, timeout):
        proxy = request.meta['proxy']
        if proxy:
            proxy_scheme, _, proxy_host, proxy_port, _ = _parse(proxy)
            proxy_scheme = str(proxy_scheme, 'utf-8')
            if proxy_scheme == 'socks5':
                endpoint = TCP4ClientEndpoint(reactor, proxy_host, proxy_port)
                self._sslMethod = openssl_methods[DOWNLOADER_CLIENT_TLS_METHOD]
                self._contextFactoryClass = load_object(
                    DOWNLOADER_CLIENTCONTEXTFACTORY)
                self._contextFactory = create_instance(
                    objcls=self._contextFactoryClass,
                    settings=settings,
                    crawler=None,
                    method=self._sslMethod,
                )
                return self._Agent(reactor,
                                   proxyEndpoint=endpoint,
                                   contextFactory=self._contextFactory)

        return super(TorScrapyAgent, self)._get_agent(request, timeout)
Ejemplo n.º 20
0
 def testFactoryInfo(self):
     url = self.getURL('file')
     scheme, netloc, host, port, path = client._parse(url)
     factory = client.ScrapyHTTPClientFactory(Request(url))
     reactor.connectTCP(host, port, factory)
     return factory.deferred.addCallback(self._cbFactoryInfo, factory)
Ejemplo n.º 21
0
 def testFactoryInfo(self):
     url = self.getURL('file')
     _, _, host, port, _ = client._parse(url)
     factory = client.ScrapyHTTPClientFactory(Request(url))
     reactor.connectTCP(to_unicode(host), port, factory)
     return factory.deferred.addCallback(self._cbFactoryInfo, factory)
Ejemplo n.º 22
0
 def testFactoryInfo(self):
     url = self.getURL('file')
     scheme, netloc, host, port, path = client._parse(url)
     factory = client.ScrapyHTTPClientFactory(Request(url))
     reactor.connectTCP(host, port, factory)
     return factory.deferred.addCallback(self._cbFactoryInfo, factory)
Ejemplo n.º 23
0
 def testFactoryInfo(self):
     url = self.getURL('file')
     _, _, host, port, _ = client._parse(url)
     factory = client.ScrapyHTTPClientFactory(Request(url))
     reactor.connectTCP(to_unicode(host), port, factory)
     return factory.deferred.addCallback(self._cbFactoryInfo, factory)
Ejemplo n.º 24
0
    def _get_agent(self, request, timeout):
        bindaddress = request.meta.get('bindaddress') or self._bindAddress
        proxy = request.meta.get('proxy')
        if proxy:
            proxyScheme, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
            if proxyScheme.startswith(b'http'):
                scheme = _parse(request.url)[0]
                proxyHost = to_unicode(proxyHost)
                omitConnectTunnel = b'noconnect' in proxyParams
                if scheme == b'https' and not omitConnectTunnel:
                    proxyConf = (proxyHost, proxyPort,
                                 request.headers.get(b'Proxy-Authorization',
                                                     None))
                    return self._TunnelingAgent(
                        reactor,
                        proxyConf,
                        contextFactory=self._contextFactory,
                        connectTimeout=timeout,
                        bindAddress=bindaddress,
                        pool=self._pool)
                else:
                    return self._ProxyAgent(reactor,
                                            proxyURI=to_bytes(
                                                proxy, encoding='ascii'),
                                            connectTimeout=timeout,
                                            bindAddress=bindaddress,
                                            pool=self._pool)
            elif proxyScheme == b'socks4':
                proxyEndPoint = TCP4ClientEndpoint(reactor,
                                                   proxyHost,
                                                   proxyPort,
                                                   timeout=timeout,
                                                   bindAddress=bindaddress)
                agent = SOCKS4Agent(reactor,
                                    proxyEndPoint=proxyEndPoint,
                                    contextFactory=self._contextFactory,
                                    connectTimeout=timeout,
                                    bindAddress=bindaddress,
                                    pool=self._pool)
                return agent
            elif proxyScheme == b'socks5':
                proxyEndPoint = TCP4ClientEndpoint(reactor,
                                                   proxyHost,
                                                   proxyPort,
                                                   timeout=timeout,
                                                   bindAddress=bindaddress)

                proxyAuth = request.headers.get(b'Proxy-Authorization', None)
                if proxyAuth:
                    proxyUser, proxyPassword = b64decode(
                        proxyAuth.split()[-1]).split(b':')
                    agent = SOCKS5Agent(
                        reactor,
                        proxyEndpoint=proxyEndPoint,
                        endpointArgs=dict(
                            methods={'login': (proxyUser, proxyPassword)}),
                        contextFactory=self._contextFactory,
                        connectTimeout=timeout,
                        bindAddress=bindaddress,
                        pool=self._pool)
                else:
                    agent = SOCKS5Agent(reactor,
                                        proxyEndpoint=proxyEndPoint,
                                        contextFactory=self._contextFactory,
                                        connectTimeout=timeout,
                                        bindAddress=bindaddress,
                                        pool=self._pool)
                return agent

        return self._Agent(reactor,
                           contextFactory=self._contextFactory,
                           connectTimeout=timeout,
                           bindAddress=bindaddress,
                           pool=self._pool)