コード例 #1
0
def _makeGetterFactory(url, factoryFactory, contextFactory=None,
                       *args, **kwargs):
    """
    Create and connect an HTTP page getting factory.

    Any additional positional or keyword arguments are used when calling
    C{factoryFactory}.

    @param factoryFactory: Factory factory that is called with C{url}, C{args}
        and C{kwargs} to produce the getter

    @param contextFactory: Context factory to use when creating a secure
        connection, defaulting to L{None}

    @return: The factory created by C{factoryFactory}
    """
    uri = URI.fromBytes(url)
    factory = factoryFactory(url, *args, **kwargs)
    if uri.scheme == b'https':
        from twisted.internet import ssl
        if contextFactory is None:
            contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(
            nativeString(uri.host), uri.port, factory, contextFactory)
    else:
        reactor.connectTCP(nativeString(uri.host), uri.port, factory)
    return factory
コード例 #2
0
    def setUp(self):
        # Initialize resource tree
        root = self._init_resource()
        self.site = Site(root, timeout=None)

        # Start server for testing
        self.hostname = 'localhost'
        context_factory = ssl_context_factory(self.key_file, self.certificate_file)

        server_endpoint = SSL4ServerEndpoint(reactor, 0, context_factory, interface=self.hostname)
        self.server = yield server_endpoint.listen(self.site)
        self.port_number = self.server.getHost().port

        # Connect H2 client with server
        self.client_certificate = get_client_certificate(self.key_file, self.certificate_file)
        client_options = optionsForClientTLS(
            hostname=self.hostname,
            trustRoot=self.client_certificate,
            acceptableProtocols=[b'h2']
        )
        uri = URI.fromBytes(bytes(self.get_url('/'), 'utf-8'))

        self.conn_closed_deferred = Deferred()
        from scrapy.core.http2.protocol import H2ClientFactory
        h2_client_factory = H2ClientFactory(uri, Settings(), self.conn_closed_deferred)
        client_endpoint = SSL4ClientEndpoint(reactor, self.hostname, self.port_number, client_options)
        self.client = yield client_endpoint.connect(h2_client_factory)
コード例 #3
0
    def from_uri(cls, reactor, uri):
        """Return an AMQEndpoint instance configured with the given AMQP uri.

        @see: https://www.rabbitmq.com/uri-spec.html
        """
        uri = URI.fromBytes(uri.encode(), defaultPort=5672)
        kwargs = {}
        host = uri.host.decode()
        if "@" in host:
            auth, host = uri.netloc.decode().split("@")
            username, password = auth.split(":")
            kwargs.update({"username": username, "password": password})

        vhost = uri.path.decode()
        if len(vhost) > 1:
            vhost = vhost[1:]  # Strip leading "/"
        kwargs["vhost"] = vhost

        params = parse_qs(uri.query)
        kwargs.update({
            name.decode(): value[0].decode()
            for name, value in params.items()
        })

        if "heartbeat" in kwargs:
            kwargs["heartbeat"] = int(kwargs["heartbeat"])
        return cls(reactor, host, uri.port, **kwargs)
コード例 #4
0
ファイル: agents.py プロジェクト: LongJohnCoder/scrapex
def build_agent(req):
    uri = URI.fromBytes(req.url)
    proxy = req.get('proxy')
    if req.get('use_proxy') is False:
        proxy = None

    if proxy:
        if uri.scheme == 'https':

            agent = TunnelingAgent(
                reactor=reactor,
                proxy=proxy,
                contextFactory=ScrapexClientContextFactory(),
                connectTimeout=req.get('timeout'))
        else:
            endpoint = TCP4ClientEndpoint(reactor,
                                          host=proxy.host,
                                          port=proxy.port,
                                          timeout=req.get('timeout'))
            agent = ProxyAgent(endpoint)
            if proxy.auth_header:
                req.get('headers')['Proxy-Authorization'] = proxy.auth_header
    else:
        agent = Agent(reactor)

    agent = RedirectAgent(agent, redirectLimit=3)
    agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)])
    return agent
コード例 #5
0
ファイル: http11.py プロジェクト: Jz52710/pythonPChong
 def __init__(self, reactor, proxyURI,
              connectTimeout=None, bindAddress=None, pool=None):
     super(ScrapyProxyAgent, self).__init__(reactor,
                                            connectTimeout=connectTimeout,
                                            bindAddress=bindAddress,
                                            pool=pool)
     self._proxyURI = URI.fromBytes(proxyURI)
コード例 #6
0
 def makeURL(self):
     url_data = URI.fromBytes(self._url)
     if url_data.scheme:
         args = {
             "scheme": url_data.scheme,
             "hostname": url_data.host,
             "port": url_data.port,
             "path": url_data.path,
         }
     else:
         args = {
             "scheme": "https" if self._ssl else "http",
             "hostname": self._hostname,
             "port": self._port,
             "path": self._url,
         }
     hasHost = bool(url_data.host)
     hostMatch = url_data.host.endswith(self._hostname)
     ipMatch = self._ipAddr in self._hostnameIp
     if hasHost and xor(hostMatch, ipMatch) or not ipMatch:
         self._proxyIp = self._ipAddr
     # Remove port if default (see RFC 2616, 14.23)
     if int(args['port']) in (80, 443) or \
             self._proxyIp and not url_data.scheme:
         self._reqURL = "{scheme}://{hostname}{path}".format(**args)
     else:
         self._reqURL = "{scheme}://{hostname}:{port}{path}".format(**args)
     log.debug("HTTP request URL: %s, Proxy: %s", self._reqURL,
               self._proxyIp)
コード例 #7
0
ファイル: cassette.py プロジェクト: kxz/stenographer
 def from_dict(cls, dct):
     """Create a new cassette from *dct*, as deserialized from JSON
     or YAML format."""
     cassette = cls()
     for interaction in dct['http_interactions']:
         rq = interaction['request']
         # Overwrite the scheme and netloc, leaving just the part of
         # the URI that would be sent in a real request.
         relative_uri = urlunparse(('', '') + urlparse(rq['uri'])[2:])
         request = Request._construct(
             rq['method'], relative_uri, Headers(rq['headers']),
             SavedBodyProducer(body_from_dict(rq)),
             False, URI.fromBytes(rq['uri'].encode('utf-8')))
         rp = interaction['response']
         response = Response._construct(
             ('HTTP', 1, 1), rp['status']['code'], rp['status']['message'],
             Headers(rp['headers']), AbortableStringTransport(), request)
         content_length = response.headers.getRawHeaders('Content-Length')
         if content_length:
             try:
                 response.length = int(content_length[0])
             except ValueError:
                 pass
         cassette.responses.append(
             SavedResponse(response, body_from_dict(rp)))
     return cassette
コード例 #8
0
 def makeURL(self):
     url_data = URI.fromBytes(self._url)
     if url_data.scheme:
         args = {
             "scheme": url_data.scheme,
             "hostname": url_data.host,
             "port": url_data.port,
             "path": url_data.path,
         }
     else:
         args = {
             "scheme": "https" if self._ssl else "http",
             "hostname": self._hostname,
             "port": self._port,
             "path": self._url,
         }
     hasHost = bool(url_data.host)
     hostMatch = url_data.host.endswith(self._hostname)
     ipMatch = self._ipAddr in self._hostnameIp
     portProtoMatch = self._ssl and int(args['port']) == 443 or \
                         not self._ssl and int(args['port']) == 80
     if hasHost and xor(hostMatch, ipMatch) or not ipMatch:
         self._proxyIp = self._ipAddr
     # Remove port if default (see RFC 2616, 14.23)
     if (int(args['port']) in (80, 443) and portProtoMatch) or \
             bool(self._proxyIp) and not url_data.scheme:
         self._reqURL = "{scheme}://{hostname}{path}".format(**args)
     else:
         self._reqURL = "{scheme}://{hostname}:{port}{path}".format(**args)
     log.debug(
         "HTTP request URL: %s, Proxy: %s", self._reqURL, self._proxyIp
     )
コード例 #9
0
ファイル: http11.py プロジェクト: elacuesta/scrapy
 def __init__(self, reactor, proxyURI,
              connectTimeout=None, bindAddress=None, pool=None):
     super(ScrapyProxyAgent, self).__init__(reactor,
                                            connectTimeout=connectTimeout,
                                            bindAddress=bindAddress,
                                            pool=pool)
     self._proxyURI = URI.fromBytes(proxyURI)
コード例 #10
0
ファイル: client.py プロジェクト: LongJohnCoder/scrapex
	def _create_agent(self, req):

		""" create right agent for specific request """

		agent = None

		uri = URI.fromBytes(req.url)
		proxy = req.get('proxy')
		if req.get('use_proxy') is False:
			proxy = None
		
		if proxy:	
			if uri.scheme == 'https':
				
				agent_key = 'httpsproxy-%s-%s' % (proxy.host, proxy.port)
				agent = self._agents.get(agent_key)

				if not agent:
					
					agent = TunnelingAgent(reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool)

					self._agents[agent_key] = agent

			else:
				#http
				agent_key = 'httpproxy-%s-%s' % (proxy.host, proxy.port)
				agent = self._agents.get(agent_key)

				if not agent:
					endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port , timeout=req.get('timeout'))
					agent = ProxyAgent(endpoint, pool=self._pool)
					self._agents[agent_key] = agent


				if proxy.auth_header:
					req.get('headers')['Proxy-Authorization'] = proxy.auth_header

		else:
			
			agent = self._direct_agent #use single agent when no proxies used


		redirectLimit = self.scraper.config.get('max_redirects')
		if redirectLimit is None:
			redirectLimit = 3
	
		if redirectLimit>0:
			agent = BrowserLikeRedirectAgent(agent, redirectLimit=redirectLimit)

		
		agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)])

		if self.cj is not None:
			agent = CookieAgent(agent, self.cj)
		
		return agent	
コード例 #11
0
    def request(self, request: Request, spider: Spider) -> Deferred:
        uri = URI.fromBytes(bytes(request.url, encoding='utf-8'))
        try:
            endpoint = self.get_endpoint(uri)
        except SchemeNotSupported:
            return defer.fail(Failure())

        key = self.get_key(uri)
        d = self._pool.get_connection(key, uri, endpoint)
        d.addCallback(lambda conn: conn.request(request, spider))
        return d
コード例 #12
0
ファイル: proxy_agent.py プロジェクト: lalacat/SimpleScrapy
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a new request via the configured proxy.
        """
        # Cache *all* connections under the same key, since we are only
        # connecting to a single destination, the proxy:

        proxyEndpoint = self._getEndpoint(self._proxyURI)

        key = ("http-proxy", self._proxyURI.host, self._proxyURI.port)
        return self._requestWithEndpoint(key, proxyEndpoint, method,
                                         URI.fromBytes(uri), headers,
                                         bodyProducer, uri)
コード例 #13
0
ファイル: forward.py プロジェクト: ajxchapman/ReServ
    def request(self,
                method,
                uri,
                headers=None,
                bodyProducer=None,
                address=None,
                path=None):
        """
        Adapted from Agent.request but to allow arbitrary text in the request path, e.g. 'GET http://www.example.com HTTP/1.1'

        Path argument can be used to specifically override the path in the uri for non-conforming paths
        Address argument can be used to override the address the url connection is made to
        """
        method = method.encode("UTF-8")
        parsedURI = URI.fromBytes(uri.encode("UTF-8"))

        try:
            if address is not None:
                modifiedURI = URI.fromBytes(uri.encode("UTF-8"))
                modifiedURI.host = address.encode("UTF-8")
                endpoint = self._getEndpoint(modifiedURI)
            else:
                endpoint = self._getEndpoint(parsedURI)
        except SchemeNotSupported:
            return defer.fail("Scheme not supported")

        parsedURI.path = path.encode(
            "UTF-8") if path is not None else parsedURI.path
        key = (parsedURI.scheme, parsedURI.host, parsedURI.port)
        d = self._requestWithEndpoint(key, endpoint, method, parsedURI,
                                      headers, bodyProducer,
                                      parsedURI.originForm)

        # Add a timeout to the deferred to prevent hangs on requests that connect but don't send any data
        d.addTimeout(self.timeout, reactor)
        d.addCallback(self.read_response, parsedURI)
        return d
コード例 #14
0
ファイル: http11.py プロジェクト: elacuesta/scrapy
 def request(self, method, uri, headers=None, bodyProducer=None):
     """
     Issue a new request via the configured proxy.
     """
     # Cache *all* connections under the same key, since we are only
     # connecting to a single destination, the proxy:
     if twisted_version >= (15, 0, 0):
         proxyEndpoint = self._getEndpoint(self._proxyURI)
     else:
         proxyEndpoint = self._getEndpoint(self._proxyURI.scheme,
                                           self._proxyURI.host,
                                           self._proxyURI.port)
     key = ("http-proxy", self._proxyURI.host, self._proxyURI.port)
     return self._requestWithEndpoint(key, proxyEndpoint, method,
                                      URI.fromBytes(uri), headers,
                                      bodyProducer, uri)
コード例 #15
0
    def _command_download(self, data):
        reactor = self._context["reactor"]
        session_files = self._context["session_files"]
        audio_id = data["audio_id"]
        partial_url = data["partial_url"]

        ip_address = str(self.transport.getPeer().host)
        url = "http://" + ip_address + partial_url

        file_path = session_files.session_dir / f"{audio_id}.opus"

        log.info(f"Downloading file from {url} to {file_path}")

        url_bytes = url.encode("utf-8")
        url_parsed = URI.fromBytes(url_bytes)
        factory = HTTPDownloader(url_bytes, str(file_path))
        reactor.connectTCP(url_parsed.host, url_parsed.port, factory)
        d = factory.deferred

        def on_success(data):
            # File downloaded succesfully, tell the server
            result = {
                "command": "update_downloaded",
                "audio_id": audio_id,
                "result": "success"
            }
            result_json = json.dumps(result)
            self._tcp_packetizer.write(result_json)

        def on_error(error):
            # File failed to downloaded succesfully, tell the server
            log.error(f"Failed to download file at '{url}': {error}")
            result = {
                "command": "update_downloaded",
                "audio_id": audio_id,
                "result": "failure",
                "error": str(error)
            }
            result_json = json.dumps(result)
            self._tcp_packetizer.write(result_json)

        d.addCallback(on_success)
        d.addErrback(on_error)

        return d
コード例 #16
0
    def request(self, method, uri, headers, bodyProducer):

        if headers is None:
            headers = Headers()
        else:
            headers = headers.copy()

        contentType = headers.getRawHeaders('content-type', [""])[0]
        date = headers.getRawHeaders('date',
                                     [""])[0] or self._generateRequestDate(uri)
        headers.setRawHeaders('date', [date])

        uri_origin_form = URI.fromBytes(uri).originForm
        contentMD5 = headers.getRawHeaders('content-md5', [""])[0]

        if not contentMD5 and bodyProducer is not None:

            r = getattr(self.agent, '_reactor') or reactor
            bodyConsumer = StringConsumer(callLater=r.callLater)

            yield bodyProducer.startProducing(bodyConsumer)
            body = bodyConsumer.value()
            bodyProducer = StringBodyProducer(body)

            if body:
                contentMD5 = binascii.b2a_base64(
                    hashlib.md5(body).digest()).strip()
                headers.addRawHeader('content-md5', contentMD5)

        sts = "\n".join([
            method, contentType or "", contentMD5, date or "", uri_origin_form
        ])
        mac = hmac.new(self.secretKey, sts, digestmod=hashlib.sha1).digest()
        encodedMAC = binascii.b2a_base64(mac).strip()

        auth_header = "AuthHMAC {0}:{1}".format(self.accessKey, encodedMAC)
        headers.addRawHeader('authorization', auth_header)

        d = yield self.agent.request(method, uri, headers, bodyProducer)
        self._handleResponseDate(uri, d)
        defer.returnValue(d)
コード例 #17
0
ファイル: Downloader.py プロジェクト: hd75hd/enigma2
	def __init__(self, url, outputfile, contextFactory=None, *args, **kwargs):
		if hasattr(client, '_parse'):
			scheme, host, port, path = client._parse(url)
		else:
			from twisted.web.client import URI
			uri = URI.fromBytes(url)
			scheme = uri.scheme
			host = uri.host
			port = uri.port
			path = uri.path
# ======= another twisted fix possibility
#		parsed = urlparse(url)
#		scheme = parsed.scheme
#		host = parsed.hostname
#		port = parsed.port or (443 if scheme == 'https' else 80)

		self.factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs)
		if scheme == "https":
			self.connection = reactor.connectSSL(host, port, self.factory, ssl.ClientContextFactory())
		else:
			self.connection = reactor.connectTCP(host, port, self.factory)
コード例 #18
0
	def __init__(self, url, outputfile, contextFactory=None, *args, **kwargs):
		if hasattr(client, '_parse'):
			scheme, host, port, path = client._parse(url)
		else:
			from twisted.web.client import URI
			uri = URI.fromBytes(url)
			scheme = uri.scheme
			host = uri.host
			port = uri.port
			path = uri.path
# ======= another twisted fix possibility
#		parsed = urlparse(url)
#		scheme = parsed.scheme
#		host = parsed.hostname
#		port = parsed.port or (443 if scheme == 'https' else 80)

		self.factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs)
		if scheme == "https":
			self.connection = reactor.connectSSL(host, port, self.factory, ssl.ClientContextFactory())
		else:
			self.connection = reactor.connectTCP(host, port, self.factory)
コード例 #19
0
ファイル: http2.py プロジェクト: wwjiang007/scrapy
    def _get_agent(self, request: Request,
                   timeout: Optional[float]) -> H2Agent:
        from twisted.internet import reactor
        bind_address = request.meta.get('bindaddress') or self._bind_address
        proxy = request.meta.get('proxy')
        if proxy:
            _, _, proxy_host, proxy_port, proxy_params = _parse(proxy)
            scheme = _parse(request.url)[0]
            proxy_host = proxy_host.decode()
            omit_connect_tunnel = b'noconnect' in proxy_params
            if omit_connect_tunnel:
                warnings.warn(
                    "Using HTTPS proxies in the noconnect mode is not "
                    "supported by the downloader handler. If you use Zyte "
                    "Smart Proxy Manager, it doesn't require this mode "
                    "anymore, so you should update scrapy-crawlera to "
                    "scrapy-zyte-smartproxy and remove '?noconnect' from the "
                    "Zyte Smart Proxy Manager URL.")

            if scheme == b'https' and not omit_connect_tunnel:
                # ToDo
                raise NotImplementedError(
                    'Tunneling via CONNECT method using HTTP/2.0 is not yet supported'
                )
            return self._ProxyAgent(
                reactor=reactor,
                context_factory=self._context_factory,
                proxy_uri=URI.fromBytes(to_bytes(proxy, encoding='ascii')),
                connect_timeout=timeout,
                bind_address=bind_address,
                pool=self._pool,
            )

        return self._Agent(
            reactor=reactor,
            context_factory=self._context_factory,
            connect_timeout=timeout,
            bind_address=bind_address,
            pool=self._pool,
        )
コード例 #20
0
ファイル: agent.py プロジェクト: david415/txtorhttpproxy
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a request to the server indicated by the given C{uri}.

        An existing connection from the connection pool may be used or a new one may be created.
        Without additional modifications this connection pool may not be very useful because
        each connection in the pool will use the same Tor circuit.

        Currently only the I{HTTP} scheme is supported in C{uri}.

        @see: L{twisted.web.iweb.IAgent.request}
        """
        parsedURI = URI.fromBytes(uri)
        endpoint = self._getEndpoint(parsedURI.scheme, parsedURI.host,
                                         parsedURI.port)

        # XXX
        # perhaps the request method should take a key?
        key = (parsedURI.scheme, parsedURI.host, parsedURI.port)

        return self._requestWithEndpoint(key, endpoint, method, parsedURI,
                                         headers, bodyProducer, parsedURI.originForm)
コード例 #21
0
def getPageCached(url, contextFactory=None, *args, **kwargs):
    """download a web page as a string, keep a cache of already downloaded pages

    Download a page. Return a deferred, which will callback with a
    page (as a string) or errback with a description of the error.

    See HTTPClientCacheFactory to see what extra args can be passed.
    """
    uri = URI.fromBytes(url)
    scheme = uri.scheme
    host = uri.host
    port = uri.port

    factory = HTTPClientCacheFactory(url, *args, **kwargs)

    if scheme == 'https':
        if contextFactory is None:
            contextFactory = HTTPSVerifyingContextFactory(host)
        reactor.connectSSL(host, port, factory, contextFactory)
    else:
        reactor.connectTCP(host, port, factory)

    return factory.deferred
コード例 #22
0
ファイル: api_helpers.py プロジェクト: hitsl/bouser
def get_json(url, *args, **kwargs):
    """
    :param json: JSON data
    :param url:
    :param args:
    :param kwargs:
    :return:
    """
    j = kwargs.pop('json', None)
    if j:
        kwargs['postdata'] = as_json(j)
    kwargs.setdefault('agent', 'Twisted JSON Adapter')
    uri = URI.fromBytes(url)
    factory = HTTPClientFactory(url, *args, **kwargs)
    factory.noisy = 0
    if uri.scheme == b'https':
        from twisted.internet import ssl
        contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(
            nativeString(uri.host), uri.port, factory, contextFactory)
    else:
        reactor.connectTCP(nativeString(uri.host), uri.port, factory)
    return factory.deferred.addCallback(_json_loads)
コード例 #23
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a request to the server indicated by the given C{uri}.

        An existing connection from the connection pool may be used or a new one may be created.
        Without additional modifications this connection pool may not be very useful because
        each connection in the pool will use the same Tor circuit.

        Currently only the I{HTTP} scheme is supported in C{uri}.

        @see: L{twisted.web.iweb.IAgent.request}
        """
        parsedURI = URI.fromBytes(uri)
        endpoint = self._getEndpoint(parsedURI.scheme, parsedURI.host,
                                     parsedURI.port)

        # XXX
        # perhaps the request method should take a key?
        key = (parsedURI.scheme, parsedURI.host, parsedURI.port)

        return self._requestWithEndpoint(key, endpoint, method, parsedURI,
                                         headers, bodyProducer,
                                         parsedURI.originForm)
コード例 #24
0
ファイル: lists.py プロジェクト: chojar/Tor2web
def getPageCached(url, contextFactory=None, *args, **kwargs):
    """download a web page as a string, keep a cache of already downloaded pages

    Download a page. Return a deferred, which will callback with a
    page (as a string) or errback with a description of the error.

    See HTTPClientCacheFactory to see what extra args can be passed.
    """
    uri = URI.fromBytes(url)
    scheme = uri.scheme
    host = uri.host
    port = uri.port

    factory = HTTPClientCacheFactory(url, *args, **kwargs)

    if scheme == 'https':
        if contextFactory is None:
            contextFactory = HTTPSVerifyingContextFactory(host)
        reactor.connectSSL(host, port, factory, contextFactory)
    else:
        reactor.connectTCP(host, port, factory)

    return factory.deferred
コード例 #25
0
def get_json(url, *args, **kwargs):
    """
    :param json: JSON data
    :param url:
    :param args:
    :param kwargs:
    :return:
    """
    j = kwargs.pop('json', None)
    if j:
        kwargs['postdata'] = as_json(j)
    kwargs.setdefault('agent', 'Twisted JSON Adapter')
    uri = URI.fromBytes(url)
    factory = HTTPClientFactory(url, *args, **kwargs)
    factory.noisy = 0
    if uri.scheme == b'https':
        from twisted.internet import ssl
        contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(nativeString(uri.host), uri.port, factory,
                           contextFactory)
    else:
        reactor.connectTCP(nativeString(uri.host), uri.port, factory)
    return factory.deferred.addCallback(_json_loads)
コード例 #26
0
def getPage(url, bindAddress=None, *arg, **kw):
    # reimplemented here to insert bindAddress

    uri = URI.fromBytes(url)
    scheme = uri.scheme
    host = uri.host
    port = uri.port
    path = uri.path

    factory = HTTPClientFactory(url, *arg, **kw)
    factory.noisy = False

    if scheme == 'https':
        from twisted.internet import ssl
        context = ssl.ClientContextFactory()
        reactor.connectSSL(host,
                           port,
                           factory,
                           context,
                           bindAddress=bindAddress)
    else:
        reactor.connectTCP(host, port, factory, bindAddress=bindAddress)
    return factory.deferred
コード例 #27
0
ファイル: authhmac.py プロジェクト: wgnet/twoost
    def request(self, method, uri, headers, bodyProducer):

        if headers is None:
            headers = Headers()
        else:
            headers = headers.copy()

        contentType = headers.getRawHeaders('content-type', [""])[0]
        date = headers.getRawHeaders('date', [""])[0] or self._generateRequestDate(uri)
        headers.setRawHeaders('date', [date])

        uri_origin_form = URI.fromBytes(uri).originForm
        contentMD5 = headers.getRawHeaders('content-md5', [""])[0]

        if not contentMD5 and bodyProducer is not None:

            r = getattr(self.agent, '_reactor') or reactor
            bodyConsumer = StringConsumer(callLater=r.callLater)

            yield bodyProducer.startProducing(bodyConsumer)
            body = bodyConsumer.value()
            bodyProducer = StringBodyProducer(body)

            if body:
                contentMD5 = binascii.b2a_base64(hashlib.md5(body).digest()).strip()
                headers.addRawHeader('content-md5', contentMD5)

        sts = "\n".join([method, contentType or "", contentMD5, date or "", uri_origin_form])
        mac = hmac.new(self.secretKey, sts, digestmod=hashlib.sha1).digest()
        encodedMAC = binascii.b2a_base64(mac).strip()

        auth_header = "AuthHMAC {0}:{1}".format(self.accessKey, encodedMAC)
        headers.addRawHeader('authorization', auth_header)

        d = yield self.agent.request(method, uri, headers, bodyProducer)
        self._handleResponseDate(uri, d)
        defer.returnValue(d)
コード例 #28
0
    def _route_matrix_uri(self, parsed_uri, lookup_well_known=True):
        """Helper for `request`: determine the routing for a Matrix URI

        Args:
            parsed_uri (twisted.web.client.URI): uri to route. Note that it should be
                parsed with URI.fromBytes(uri, defaultPort=-1) to set the `port` to -1
                if there is no explicit port given.

            lookup_well_known (bool): True if we should look up the .well-known file if
                there is no SRV record.

        Returns:
            Deferred[_RoutingResult]
        """
        # check for an IP literal
        try:
            ip_address = IPAddress(parsed_uri.host.decode("ascii"))
        except Exception:
            # not an IP address
            ip_address = None

        if ip_address:
            port = parsed_uri.port
            if port == -1:
                port = 8448
            return _RoutingResult(
                host_header=parsed_uri.netloc,
                tls_server_name=parsed_uri.host,
                target_host=parsed_uri.host,
                target_port=port,
            )

        if parsed_uri.port != -1:
            # there is an explicit port
            return _RoutingResult(
                host_header=parsed_uri.netloc,
                tls_server_name=parsed_uri.host,
                target_host=parsed_uri.host,
                target_port=parsed_uri.port,
            )

        if lookup_well_known:
            # try a .well-known lookup
            well_known_result = yield self._well_known_resolver.get_well_known(
                parsed_uri.host)
            well_known_server = well_known_result.delegated_server

            if well_known_server:
                # if we found a .well-known, start again, but don't do another
                # .well-known lookup.

                # parse the server name in the .well-known response into host/port.
                # (This code is lifted from twisted.web.client.URI.fromBytes).
                if b":" in well_known_server:
                    well_known_host, well_known_port = well_known_server.rsplit(
                        b":", 1)
                    try:
                        well_known_port = int(well_known_port)
                    except ValueError:
                        # the part after the colon could not be parsed as an int
                        # - we assume it is an IPv6 literal with no port (the closing
                        # ']' stops it being parsed as an int)
                        well_known_host, well_known_port = well_known_server, -1
                else:
                    well_known_host, well_known_port = well_known_server, -1

                new_uri = URI(
                    scheme=parsed_uri.scheme,
                    netloc=well_known_server,
                    host=well_known_host,
                    port=well_known_port,
                    path=parsed_uri.path,
                    params=parsed_uri.params,
                    query=parsed_uri.query,
                    fragment=parsed_uri.fragment,
                )

                res = yield self._route_matrix_uri(new_uri,
                                                   lookup_well_known=False)
                return res

        # try a SRV lookup
        service_name = b"_matrix._tcp.%s" % (parsed_uri.host, )
        server_list = yield self._srv_resolver.resolve_service(service_name)

        if not server_list:
            target_host = parsed_uri.host
            port = 8448
            logger.debug(
                "No SRV record for %s, using %s:%i",
                parsed_uri.host.decode("ascii"),
                target_host.decode("ascii"),
                port,
            )
        else:
            target_host, port = pick_server_from_list(server_list)
            logger.debug(
                "Picked %s:%i from SRV records for %s",
                target_host.decode("ascii"),
                port,
                parsed_uri.host.decode("ascii"),
            )

        return _RoutingResult(
            host_header=parsed_uri.netloc,
            tls_server_name=parsed_uri.host,
            target_host=target_host,
            target_port=port,
        )
コード例 #29
0
    def request(
        self,
        method: bytes,
        uri: bytes,
        headers: Optional["Headers"] = None,
        bodyProducer: Optional["IBodyProducer"] = None,
    ) -> Generator["defer.Deferred[Any]", Any, IResponse]:
        """
        :param method: HTTP method (GET/POST/etc).

        :param uri: Absolute URI to be retrieved.

        :param headers: HTTP headers to send with the request, or None to
            send no extra headers.

        :param bodyProducer: An object which can generate bytes to make up the
            body of this request (for example, the properly encoded contents of
            a file for a file upload).  Or None if the request is to have
            no body.

        :returns a deferred that fires when the header of the response has
            been received (regardless of the response status code). Fails if
            there is any problem which prevents that response from being received
            (including problems that prevent the request from being sent).
        """
        parsed_uri = URI.fromBytes(uri, defaultPort=-1)
        routing: _RoutingResult
        routing = yield defer.ensureDeferred(
            self._route_matrix_uri(parsed_uri))

        # set up the TLS connection params
        #
        # XXX disabling TLS is really only supported here for the benefit of the
        # unit tests. We should make the UTs cope with TLS rather than having to make
        # the code support the unit tests.
        if self._tls_client_options_factory is None:
            tls_options = None
        else:
            tls_options = self._tls_client_options_factory.get_options(
                routing.tls_server_name.decode("ascii"))

        # make sure that the Host header is set correctly
        if headers is None:
            headers = Headers()
        else:
            # Type safety: Headers.copy doesn't have a return type annotated,
            # and I don't want to stub web.http_headers. Could use stubgen? It's
            # a pretty simple file.
            headers = headers.copy()  # type: ignore[no-untyped-call]
            assert headers is not None

        if not headers.hasHeader(b"host"):
            headers.addRawHeader(b"host", routing.host_header)

        @implementer(IAgentEndpointFactory)
        class EndpointFactory:
            @staticmethod
            def endpointForURI(_uri: URI) -> IStreamClientEndpoint:
                ep: IStreamClientEndpoint = LoggingHostnameEndpoint(
                    self._reactor,
                    routing.target_host,
                    routing.target_port,
                )
                if tls_options is not None:
                    ep = wrapClientTLS(tls_options, ep)
                return ep

        agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(),
                                           self._pool)
        res: IResponse
        res = yield agent.request(method, uri, headers, bodyProducer)
        return res
コード例 #30
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a request to the server indicated by the given uri.

        Supports `http` and `https` schemes.

        An existing connection from the connection pool may be used or a new one may be
        created.

        See also: twisted.web.iweb.IAgent.request

        Args:
            method (bytes): The request method to use, such as `GET`, `POST`, etc

            uri (bytes): The location of the resource to request.

            headers (Headers|None): Extra headers to send with the request

            bodyProducer (IBodyProducer|None): An object which can generate bytes to
                make up the body of this request (for example, the properly encoded
                contents of a file for a file upload). Or, None if the request is to
                have no body.

        Returns:
            Deferred[IResponse]: completes when the header of the response has
                 been received (regardless of the response status code).

                 Can fail with:
                    SchemeNotSupported: if the uri is not http or https

                    twisted.internet.error.TimeoutError if the server we are connecting
                        to (proxy or destination) does not accept a connection before
                        connectTimeout.

                    ... other things too.
        """
        uri = uri.strip()
        if not _VALID_URI.match(uri):
            raise ValueError("Invalid URI {!r}".format(uri))

        parsed_uri = URI.fromBytes(uri)
        pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
        request_path = parsed_uri.originForm

        should_skip_proxy = False
        if self.no_proxy is not None:
            should_skip_proxy = proxy_bypass_environment(
                parsed_uri.host.decode(),
                proxies={"no": self.no_proxy},
            )

        if (parsed_uri.scheme == b"http" and self.http_proxy_endpoint
                and not should_skip_proxy):
            # Cache *all* connections under the same key, since we are only
            # connecting to a single destination, the proxy:
            pool_key = ("http-proxy", self.http_proxy_endpoint)
            endpoint = self.http_proxy_endpoint
            request_path = uri
        elif (parsed_uri.scheme == b"https" and self.https_proxy_endpoint
              and not should_skip_proxy):
            connect_headers = Headers()

            # Determine whether we need to set Proxy-Authorization headers
            if self.https_proxy_creds:
                # Set a Proxy-Authorization header
                connect_headers.addRawHeader(
                    b"Proxy-Authorization",
                    self.https_proxy_creds.as_proxy_authorization_value(),
                )

            endpoint = HTTPConnectProxyEndpoint(
                self.proxy_reactor,
                self.https_proxy_endpoint,
                parsed_uri.host,
                parsed_uri.port,
                headers=connect_headers,
            )
        else:
            # not using a proxy
            endpoint = HostnameEndpoint(self._reactor, parsed_uri.host,
                                        parsed_uri.port,
                                        **self._endpoint_kwargs)

        logger.debug("Requesting %s via %s", uri, endpoint)

        if parsed_uri.scheme == b"https":
            tls_connection_creator = self._policy_for_https.creatorForNetloc(
                parsed_uri.host, parsed_uri.port)
            endpoint = wrapClientTLS(tls_connection_creator, endpoint)
        elif parsed_uri.scheme == b"http":
            pass
        else:
            return defer.fail(
                Failure(
                    SchemeNotSupported("Unsupported scheme: %r" %
                                       (parsed_uri.scheme, ))))

        return self._requestWithEndpoint(pool_key, endpoint, method,
                                         parsed_uri, headers, bodyProducer,
                                         request_path)
コード例 #31
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        :param method: HTTP method (GET/POST/etc).
        :type method: bytes

        :param uri: Absolute URI to be retrieved.
        :type uri: bytes

        :param headers: HTTP headers to send with the request, or None to
            send no extra headers.
        :type headers: twisted.web.http_headers.Headers, None

        :param bodyProducer: An object which can generate bytes to make up the
            body of this request (for example, the properly encoded contents of
            a file for a file upload).  Or None if the request is to have
            no body.
        :type bodyProducer: twisted.web.iweb.IBodyProducer, None

        :returns a deferred that fires when the header of the response has
            been received (regardless of the response status code). Fails if
            there is any problem which prevents that response from being received
            (including problems that prevent the request from being sent).
        :rtype: Deferred[twisted.web.iweb.IResponse]
        """
        parsed_uri = URI.fromBytes(uri, defaultPort=-1)
        res = yield self._route_matrix_uri(parsed_uri)

        # set up the TLS connection params
        #
        # XXX disabling TLS is really only supported here for the benefit of the
        # unit tests. We should make the UTs cope with TLS rather than having to make
        # the code support the unit tests.
        if self._tls_client_options_factory is None:
            tls_options = None
        else:
            tls_options = self._tls_client_options_factory.get_options(
                res.tls_server_name.decode("ascii")
            )

        # make sure that the Host header is set correctly
        if headers is None:
            headers = Headers()
        else:
            headers = headers.copy()

        if not headers.hasHeader(b'host'):
            headers.addRawHeader(b'host', res.host_header)

        class EndpointFactory(object):
            @staticmethod
            def endpointForURI(_uri):
                ep = LoggingHostnameEndpoint(
                    self._reactor, res.target_host, res.target_port,
                )
                if tls_options is not None:
                    ep = wrapClientTLS(tls_options, ep)
                return ep

        agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(), self._pool)
        res = yield agent.request(method, uri, headers, bodyProducer)
        defer.returnValue(res)
コード例 #32
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Args:
            method (bytes): HTTP method: GET/POST/etc

            uri (bytes): Absolute URI to be retrieved

            headers (twisted.web.http_headers.Headers|None):
                HTTP headers to send with the request, or None to
                send no extra headers.

            bodyProducer (twisted.web.iweb.IBodyProducer|None):
                An object which can generate bytes to make up the
                body of this request (for example, the properly encoded contents of
                a file for a file upload).  Or None if the request is to have
                no body.

        Returns:
            Deferred[twisted.web.iweb.IResponse]:
                fires when the header of the response has been received (regardless of the
                response status code). Fails if there is any problem which prevents that
                response from being received (including problems that prevent the request
                from being sent).
        """
        parsed_uri = URI.fromBytes(uri, defaultPort=-1)
        res = yield self._route_matrix_uri(parsed_uri)

        # set up the TLS connection params
        #
        # XXX disabling TLS is really only supported here for the benefit of the
        # unit tests. We should make the UTs cope with TLS rather than having to make
        # the code support the unit tests.
        if self._tls_client_options_factory is None:
            tls_options = None
        else:
            tls_options = self._tls_client_options_factory.get_options(
                res.tls_server_name.decode("ascii"))

        # make sure that the Host header is set correctly
        if headers is None:
            headers = Headers()
        else:
            headers = headers.copy()

        if not headers.hasHeader(b"host"):
            headers.addRawHeader(b"host", res.host_header)

        class EndpointFactory(object):
            @staticmethod
            def endpointForURI(_uri):
                ep = LoggingHostnameEndpoint(self._reactor, res.target_host,
                                             res.target_port)
                if tls_options is not None:
                    ep = wrapClientTLS(tls_options, ep)
                return ep

        agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(),
                                           self._pool)
        res = yield make_deferred_yieldable(
            agent.request(method, uri, headers, bodyProducer))
        return res
コード例 #33
0
ファイル: util.py プロジェクト: fusionapp/fusion-index
    def prePathURL(self):
        from twisted.web.client import URI

        location = URI.fromBytes(self.uri)
        location.path = b"/".join(self.prepath)
        return location.toBytes()
コード例 #34
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a request to the server indicated by the given uri.

        Supports `http` and `https` schemes.

        An existing connection from the connection pool may be used or a new one may be
        created.

        See also: twisted.web.iweb.IAgent.request

        Args:
            method (bytes): The request method to use, such as `GET`, `POST`, etc

            uri (bytes): The location of the resource to request.

            headers (Headers|None): Extra headers to send with the request

            bodyProducer (IBodyProducer|None): An object which can generate bytes to
                make up the body of this request (for example, the properly encoded
                contents of a file for a file upload). Or, None if the request is to
                have no body.

        Returns:
            Deferred[IResponse]: completes when the header of the response has
                 been received (regardless of the response status code).
        """
        uri = uri.strip()
        if not _VALID_URI.match(uri):
            raise ValueError("Invalid URI {!r}".format(uri))

        parsed_uri = URI.fromBytes(uri)
        pool_key: tuple = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
        request_path = parsed_uri.originForm

        if parsed_uri.scheme == b"http" and self.proxy_endpoint:
            # Cache *all* connections under the same key, since we are only
            # connecting to a single destination, the proxy:
            pool_key = ("http-proxy", self.proxy_endpoint)
            endpoint = self.proxy_endpoint
            request_path = uri
        elif parsed_uri.scheme == b"https" and self.proxy_endpoint:
            endpoint = HTTPConnectProxyEndpoint(
                self._reactor,
                self.proxy_endpoint,
                parsed_uri.host,
                parsed_uri.port,
                self._proxy_auth,
            )
        else:
            # not using a proxy
            endpoint = HostnameEndpoint(self._reactor, parsed_uri.host,
                                        parsed_uri.port,
                                        **self._endpoint_kwargs)

        logger.debug("Requesting %s via %s", uri, endpoint)

        if parsed_uri.scheme == b"https":
            tls_connection_creator = self._policy_for_https.creatorForNetloc(
                parsed_uri.host, parsed_uri.port)
            endpoint = wrapClientTLS(tls_connection_creator, endpoint)
        elif parsed_uri.scheme == b"http":
            pass
        else:
            return defer.fail(
                Failure(
                    SchemeNotSupported("Unsupported scheme: %r" %
                                       (parsed_uri.scheme, ))))

        return self._requestWithEndpoint(pool_key, endpoint, method,
                                         parsed_uri, headers, bodyProducer,
                                         request_path)
コード例 #35
0
ファイル: util.py プロジェクト: shiradz/fusion-index
 def prePathURL(self):
     from twisted.web.client import URI
     location = URI.fromBytes(self.uri)
     location.path = b'/'.join(self.prepath)
     return location.toBytes()