def _RetrieveURL(self, url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT): """Retrieves a URL. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. If the payload is unicode, we assume it is utf-8. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: Request object from original request. response: Response object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. Raises: Raises an apiproxy_errors.ApplicationError exception with FETCH_ERROR in cases where: - MAX_REDIRECTS is exceeded - The protocol of the redirected URL is bad or missing. - The port is not in the allowable range of ports. """ last_protocol = '' last_host = '' if isinstance(payload, unicode): payload = payload.encode('utf-8') for redirect_number in xrange(MAX_REDIRECTS + 1): parsed = urlparse.urlsplit(url) protocol, host, path, query, fragment = parsed port = urllib.splitport(urllib.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.error( 'urlfetch received %s ; port %s is not allowed in production!' % (url, port)) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR) if protocol and not host: logging.error('Missing host on redirect; target url is %s' % url) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR) if not host and not protocol: host = last_host protocol = last_protocol adjusted_headers = { 'User-Agent': 'AppEngine-Google; (+http://code.google.com/appengine)', 'Host': host, 'Accept-Encoding': 'gzip', } if payload is not None: adjusted_headers['Content-Length'] = str(len(payload)) if method == 'POST' and payload: adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded' passthrough_content_encoding = False for header in headers: if header.key().title().lower() == 'user-agent': adjusted_headers['User-Agent'] = ( '%s %s' % (header.value(), adjusted_headers['User-Agent'])) else: if header.key().lower() == 'accept-encoding': passthrough_content_encoding = True adjusted_headers[header.key().title()] = header.value() if payload is not None: escaped_payload = payload.encode('string_escape') else: escaped_payload = '' logging.debug('Making HTTP request: host = %r, ' 'url = %r, payload = %.1000r, headers = %r', host, url, escaped_payload, adjusted_headers) try: if protocol == 'http': connection_class = httplib.HTTPConnection elif protocol == 'https': if (validate_certificate and _CanValidateCerts() and CERT_PATH): connection_class = fancy_urllib.create_fancy_connection( ca_certs=CERT_PATH) else: connection_class = httplib.HTTPSConnection else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg) if _CONNECTION_SUPPORTS_TIMEOUT: connection = connection_class(host, timeout=deadline) else: connection = connection_class(host) last_protocol = protocol last_host = host if query != '': full_path = path + '?' + query else: full_path = path if not _CONNECTION_SUPPORTS_TIMEOUT: orig_timeout = socket.getdefaulttimeout() try: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(deadline) connection.request(method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == 'HEAD': http_response_data = '' else: http_response_data = http_response.read() finally: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(orig_timeout) connection.close() except (_fancy_urllib_InvalidCertException, _fancy_urllib_SSLError), e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e)) except socket.timeout, e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
def _RetrieveURL( url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT, ): """Retrieves a URL over network. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. If the payload is unicode, we assume it is utf-8. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: A urlfetch_service_pb.URLFetchRequest proto object from original request. response: A urlfetch_service_pb.URLFetchResponse proto object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. Raises: Raises an apiproxy_errors.ApplicationError exception with INVALID_URL_ERROR in cases where: - The protocol of the redirected URL is bad or missing. - The port is not in the allowable range of ports. Raises an apiproxy_errors.ApplicationError exception with TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded """ last_protocol = "" last_host = "" if isinstance(payload, unicode): payload = payload.encode("utf-8") for redirect_number in xrange(MAX_REDIRECTS + 1): parsed = urlparse.urlsplit(url) protocol, host, path, query, fragment = parsed port = urllib.splitport(urllib.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.error("urlfetch received %s ; port %s is not allowed in production!" % (url, port)) raise apiproxy_errors.ApplicationError(urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if protocol and not host: logging.error("Missing host on redirect; target url is %s" % url) raise apiproxy_errors.ApplicationError(urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if not host and not protocol: host = last_host protocol = last_protocol adjusted_headers = { "User-Agent": ( "AppEngine-Google; (+http://code.google.com/appengine; appid: %s)" % os.getenv("APPLICATION_ID") ), "Host": host, "Accept-Encoding": "gzip", } if payload is not None: adjusted_headers["Content-Length"] = str(len(payload)) if method == "POST" and payload: adjusted_headers["Content-Type"] = "application/x-www-form-urlencoded" passthrough_content_encoding = False for header in headers: if header.key().title().lower() == "user-agent": adjusted_headers["User-Agent"] = "%s %s" % (header.value(), adjusted_headers["User-Agent"]) else: if header.key().lower() == "accept-encoding": passthrough_content_encoding = True adjusted_headers[header.key().title()] = header.value() if payload is not None: escaped_payload = payload.encode("string_escape") else: escaped_payload = "" logging.debug( "Making HTTP request: host = %r, " "url = %r, payload = %.1000r, headers = %r", host, url, escaped_payload, adjusted_headers, ) try: proxy_host = None if protocol == "http": connection_class = httplib.HTTPConnection default_port = 80 if os.environ.get("HTTP_PROXY") and not _IsLocalhost(host): _, proxy_host, _, _, _ = urlparse.urlsplit(os.environ.get("HTTP_PROXY")) full_path = urlparse.urlunsplit((protocol, host, path, query, "")) elif protocol == "https": if validate_certificate and _CanValidateCerts() and CERT_PATH: connection_class = fancy_urllib.create_fancy_connection(ca_certs=CERT_PATH) else: connection_class = httplib.HTTPSConnection default_port = 443 if _CONNECTION_SUPPORTS_SSL_TUNNEL and os.environ.get("HTTPS_PROXY") and not _IsLocalhost(host): _, proxy_host, _, _, _ = urlparse.urlsplit(os.environ.get("HTTPS_PROXY")) full_path = urlparse.urlunsplit(("", "", path, query, "")) else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg ) connection_kwargs = {"timeout": deadline} if _CONNECTION_SUPPORTS_TIMEOUT else {} if proxy_host: proxy_address, _, proxy_port = proxy_host.partition(":") connection = connection_class( proxy_address, proxy_port if proxy_port else default_port, **connection_kwargs ) if protocol == "https": connection.set_tunnel(host) else: connection = connection_class(host, **connection_kwargs) last_protocol = protocol last_host = host if not _CONNECTION_SUPPORTS_TIMEOUT: orig_timeout = socket.getdefaulttimeout() try: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(deadline) connection.request(method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == "HEAD": http_response_data = "" else: http_response_data = http_response.read() finally: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(orig_timeout) connection.close() except _fancy_urllib_InvalidCertException, e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e) ) except _fancy_urllib_SSLError, e: app_error = ( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED if "timed out" in e.message else urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR ) raise apiproxy_errors.ApplicationError(app_error, str(e))
def _RetrieveURL(url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT): """Retrieves a URL over network. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. If the payload is unicode, we assume it is utf-8. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: A urlfetch_service_pb.URLFetchRequest proto object from original request. response: A urlfetch_service_pb.URLFetchResponse proto object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. Raises: Raises an apiproxy_errors.ApplicationError exception with INVALID_URL_ERROR in cases where: - The protocol of the redirected URL is bad or missing. - The port is not in the allowable range of ports. Raises an apiproxy_errors.ApplicationError exception with TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded """ last_protocol = '' last_host = '' if isinstance(payload, unicode): payload = payload.encode('utf-8') for redirect_number in xrange(MAX_REDIRECTS + 1): parsed = urlparse.urlsplit(url) protocol, host, path, query, fragment = parsed port = urllib.splitport(urllib.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.error( 'urlfetch received %s ; port %s is not allowed in production!' % (url, port)) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if protocol and not host: logging.error('Missing host on redirect; target url is %s' % url) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if not host and not protocol: host = last_host protocol = last_protocol adjusted_headers = { 'User-Agent': 'AppEngine-Google; (+http://code.google.com/appengine)', 'Host': host, 'Accept-Encoding': 'gzip', } if not follow_redirects: adjusted_headers['X-Appengine-Inbound-Appid'] = app_identity.get_application_id() if payload is not None: adjusted_headers['Content-Length'] = str(len(payload)) if method == 'POST' and payload: adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded' passthrough_content_encoding = False for header in headers: if header.key().title().lower() == 'user-agent': adjusted_headers['User-Agent'] = ( '%s %s' % (header.value(), adjusted_headers['User-Agent'])) else: if header.key().lower() == 'accept-encoding': passthrough_content_encoding = True adjusted_headers[header.key().title()] = header.value() if payload is not None: escaped_payload = payload.encode('string_escape') else: escaped_payload = '' logging.debug('Making HTTP request: host = %r, ' 'url = %r, payload = %.1000r, headers = %r', host, url, escaped_payload, adjusted_headers) try: if protocol == 'http': connection_class = httplib.HTTPConnection elif protocol == 'https': if (validate_certificate and _CanValidateCerts() and CERT_PATH): connection_class = fancy_urllib.create_fancy_connection( ca_certs=CERT_PATH) else: connection_class = httplib.HTTPSConnection else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg) if _CONNECTION_SUPPORTS_TIMEOUT: connection = connection_class(host, timeout=deadline) else: connection = connection_class(host) last_protocol = protocol last_host = host if query != '': full_path = path + '?' + query else: full_path = path if not _CONNECTION_SUPPORTS_TIMEOUT: orig_timeout = socket.getdefaulttimeout() try: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(deadline) connection.request(method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == 'HEAD': http_response_data = '' else: http_response_data = http_response.read() finally: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(orig_timeout) connection.close() except (_fancy_urllib_InvalidCertException, _fancy_urllib_SSLError), e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e)) except socket.timeout, e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
def _RetrieveURL(url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT): """Retrieves a URL over network. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. If the payload is unicode, we assume it is utf-8. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: A urlfetch_service_pb.URLFetchRequest proto object from original request. response: A urlfetch_service_pb.URLFetchResponse proto object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. Raises: Raises an apiproxy_errors.ApplicationError exception with INVALID_URL_ERROR in cases where: - The protocol of the redirected URL is bad or missing. - The port is not in the allowable range of ports. Raises an apiproxy_errors.ApplicationError exception with TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded """ last_protocol = '' last_host = '' if isinstance(payload, str): payload = payload.encode('utf-8') for redirect_number in range(MAX_REDIRECTS + 1): parsed = urllib.parse.urlsplit(url) protocol, host, path, query, fragment = parsed port = urllib.parse.splitport(urllib.parse.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.error( 'urlfetch received %s ; port %s is not allowed in production!' % (url, port)) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if protocol and not host: logging.error('Missing host on redirect; target url is %s' % url) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if not host and not protocol: host = last_host protocol = last_protocol adjusted_headers = { 'User-Agent': 'AppEngine-Google; (+http://code.google.com/appengine)', 'Host': host, 'Accept-Encoding': 'gzip', } if payload is not None: adjusted_headers['Content-Length'] = str(len(payload)) if method == 'POST' and payload: adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded' passthrough_content_encoding = False for header in headers: if header.key().decode().title().lower() == 'user-agent': adjusted_headers['User-Agent'] = ( '%s %s' % (header.value().decode(), adjusted_headers['User-Agent'])) else: if header.key().decode().lower() == 'accept-encoding': passthrough_content_encoding = True adjusted_headers[header.key().decode().title()] = ( header.value().decode()) if payload is not None: escaped_payload = payload.encode('string_escape') else: escaped_payload = '' logging.debug('Making HTTP request: host = %r, ' 'url = %r, payload = %.1000r, headers = %r', host, url, escaped_payload, adjusted_headers) try: if protocol == 'http': connection_class = http.client.HTTPConnection elif protocol == 'https': if (validate_certificate and _CanValidateCerts() and CERT_PATH): connection_class = fancy_urllib.create_fancy_connection( ca_certs=CERT_PATH) else: connection_class = http.client.HTTPSConnection else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg) if _CONNECTION_SUPPORTS_TIMEOUT: connection = connection_class(host, timeout=deadline) else: connection = connection_class(host) last_protocol = protocol last_host = host if query != '': full_path = path + '?' + query else: full_path = path if not _CONNECTION_SUPPORTS_TIMEOUT: orig_timeout = socket.getdefaulttimeout() try: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(deadline) connection.request(method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == 'HEAD': http_response_data = '' else: http_response_data = http_response.read() finally: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(orig_timeout) connection.close() except _fancy_urllib_InvalidCertException as e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e)) except _fancy_urllib_SSLError as e: app_error = ( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED if 'timed out' in e.message else urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR) raise apiproxy_errors.ApplicationError(app_error, str(e)) except socket.timeout as e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e)) except (http.client.error, socket.error, IOError) as e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e)) if http_response.status in REDIRECT_STATUSES and follow_redirects: url = http_response.getheader('Location', None) if url is None: error_msg = 'Redirecting response was missing "Location" header' logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.MALFORMED_REPLY, error_msg) if (http_response.status != http.client.TEMPORARY_REDIRECT and method not in PRESERVE_ON_REDIRECT): logging.warn('Received a %s to a %s. Redirecting with a GET', http_response.status, method) method = 'GET' payload = None else: response.set_statuscode(http_response.status) if (http_response.getheader('content-encoding') == 'gzip' and not passthrough_content_encoding): gzip_stream = io.StringIO(http_response_data) gzip_file = gzip.GzipFile(fileobj=gzip_stream) http_response_data = gzip_file.read() response.set_content(http_response_data[:MAX_RESPONSE_SIZE]) for header_key in list(http_response.msg.keys()): for header_value in http_response.msg.get_all(header_key): if (header_key.lower() == 'content-encoding' and header_value == 'gzip' and not passthrough_content_encoding): continue if header_key.lower() == 'content-length' and method != 'HEAD': header_value = str(len(response.content())) header_proto = response.add_header() header_proto.set_key(header_key.encode()) header_proto.set_value(header_value.encode()) if len(http_response_data) > MAX_RESPONSE_SIZE: response.set_contentwastruncated(True) if request.url() != url: response.set_finalurl(url.encode()) break else: error_msg = 'Too many repeated redirects' logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS, error_msg)
def _RetrieveURL( url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT, http_proxy=None): """Retrieves a URL over network. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. If the payload is unicode, we assume it is utf-8. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: A urlfetch_service_pb.URLFetchRequest proto object from original request. response: A urlfetch_service_pb.URLFetchResponse proto object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. http_proxy: Tuple of (hostname, port), where hostname is a string and port is an int, to use as the http proxy. Raises: Raises an apiproxy_errors.ApplicationError exception with INVALID_URL_ERROR in cases where: - The protocol of the redirected URL is bad or missing. - The port is not in the allowable range of ports. Raises an apiproxy_errors.ApplicationError exception with TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded """ last_protocol = '' last_host = '' if isinstance(payload, unicode): payload = payload.encode('utf-8') for redirect_number in xrange(MAX_REDIRECTS + 1): parsed = urlparse.urlsplit(url) protocol, host, path, query, fragment = parsed port = urllib.splitport(urllib.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.error( 'urlfetch received %s ; port %s is not allowed in production!' % (url, port)) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if protocol and not host: logging.error('Missing host on redirect; target url is %s' % url) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if not host and not protocol: host = last_host protocol = last_protocol if port == '0': host = host.replace(':0', '') adjusted_headers = { 'User-Agent': [('AppEngine-Google; (+http://code.google.com/appengine; appid: %s)' % os.getenv('APPLICATION_ID'))], 'Host': [host], 'Accept-Encoding': ['gzip'], } if payload is not None: adjusted_headers['Content-Length'] = [str(len(payload))] if method == 'POST' and payload: adjusted_headers['Content-Type'] = [ 'application/x-www-form-urlencoded' ] passthrough_content_encoding = False for header in headers: header_key = header.key() if header_key.lower() == 'user-agent': adjusted_headers[header_key.title()] = [ ('%s %s' % (header.value(), adjusted_headers['User-Agent'][0])) ] elif header_key.lower() == 'accept-encoding': passthrough_content_encoding = True adjusted_headers[header_key.title()] = [header.value()] elif header_key.lower() == 'content-type': adjusted_headers[header_key.title()] = [header.value()] else: adjusted_headers.setdefault(header_key, []).append(header.value()) if payload is not None: escaped_payload = payload.encode('string_escape') else: escaped_payload = '' logging.debug( 'Making HTTP request: host = %r, ' 'url = %r, payload = %.1000r, headers = %r', host, url, escaped_payload, adjusted_headers) try: proxy_host = None if protocol == 'http': connection_class = httplib.HTTPConnection default_port = 80 if http_proxy and not _IsLocalhost(host): proxy_host = '%s:%d' % (http_proxy[0], http_proxy[1]) elif os.environ.get( 'HTTP_PROXY') and not _IsLocalhost(host): _, proxy_host, _, _, _ = (urlparse.urlsplit( os.environ.get('HTTP_PROXY'))) elif protocol == 'https': if (validate_certificate and _CanValidateCerts() and CERT_PATH): connection_class = fancy_urllib.create_fancy_connection( ca_certs=CERT_PATH) else: connection_class = httplib.HTTPSConnection default_port = 443 if os.environ.get( 'HTTPS_PROXY') and not _IsLocalhost(host): _, proxy_host, _, _, _ = (urlparse.urlsplit( os.environ.get('HTTPS_PROXY'))) else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg) connection_kwargs = {'timeout': deadline} if (not validate_certificate and sys.version_info >= (2, 7, 9) and protocol == 'https'): import ssl connection_kwargs[ 'context'] = ssl._create_unverified_context() if proxy_host: proxy_address, _, proxy_port = proxy_host.partition(':') connection = connection_class( proxy_address, proxy_port if proxy_port else default_port, **connection_kwargs) full_path = urlparse.urlunsplit( (protocol, host, path, query, '')) if protocol == 'https': connection.set_tunnel(host) else: connection = connection_class(host, **connection_kwargs) full_path = urlparse.urlunsplit(('', '', path, query, '')) last_protocol = protocol last_host = host try: _SendRequest(connection, method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == 'HEAD': http_response_data = '' else: http_response_data = http_response.read() finally: connection.close() except _fancy_urllib_InvalidCertException as e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError. SSL_CERTIFICATE_ERROR, str(e)) except _fancy_urllib_SSLError as e: app_error = ( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED if 'timed out' in e.message else urlfetch_service_pb. URLFetchServiceError.SSL_CERTIFICATE_ERROR) raise apiproxy_errors.ApplicationError(app_error, str(e)) except socket.timeout as e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e)) except (httplib.error, socket.error, IOError) as e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e)) if http_response.status >= 600: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, 'Status %s unknown' % http_response.status) if http_response.status in REDIRECT_STATUSES and follow_redirects: url = http_response.getheader('Location', None) if url is None: error_msg = 'Missing "Location" header for redirect.' logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError. MALFORMED_REPLY, error_msg) if (http_response.status != httplib.TEMPORARY_REDIRECT and method not in PRESERVE_ON_REDIRECT): logging.warn( 'Received a %s to a %s. Redirecting with a GET', http_response.status, method) method = 'GET' payload = None else: response.set_statuscode(http_response.status) if (http_response.getheader('content-encoding') == 'gzip' and not passthrough_content_encoding): gzip_stream = StringIO.StringIO(http_response_data) gzip_file = gzip.GzipFile(fileobj=gzip_stream) http_response_data = gzip_file.read() response.set_content(http_response_data[:MAX_RESPONSE_SIZE]) for header_key in http_response.msg.keys(): for header_value in http_response.msg.getheaders( header_key): if (header_key.lower() == 'content-encoding' and header_value == 'gzip' and not passthrough_content_encoding): continue if header_key.lower( ) == 'content-length' and method != 'HEAD': header_value = str(len(response.content())) header_proto = response.add_header() header_proto.set_key(header_key) header_proto.set_value(header_value) if len(http_response_data) > MAX_RESPONSE_SIZE: response.set_contentwastruncated(True) if request.url() != url: response.set_finalurl(url) break else: error_msg = 'Too many repeated redirects' logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS, error_msg)
def _RetrieveURL( url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT): """Retrieves a URL over network. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. If the payload is unicode, we assume it is utf-8. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: A urlfetch_service_pb.URLFetchRequest proto object from original request. response: A urlfetch_service_pb.URLFetchResponse proto object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. Raises: Raises an apiproxy_errors.ApplicationError exception with INVALID_URL_ERROR in cases where: - The protocol of the redirected URL is bad or missing. - The port is not in the allowable range of ports. Raises an apiproxy_errors.ApplicationError exception with TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded """ last_protocol = '' last_host = '' if isinstance(payload, unicode): payload = payload.encode('utf-8') for redirect_number in xrange(MAX_REDIRECTS + 1): parsed = urlparse.urlsplit(url) protocol, host, path, query, fragment = parsed port = urllib.splitport(urllib.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.error( 'urlfetch received %s ; port %s is not allowed in production!' % (url, port)) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if protocol and not host: logging.error('Missing host on redirect; target url is %s' % url) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL) if not host and not protocol: host = last_host protocol = last_protocol adjusted_headers = { 'User-Agent': [('AppEngine-Google; (+http://code.google.com/appengine; appid: %s)' % os.getenv('APPLICATION_ID'))], 'Host': [host], 'Accept-Encoding': ['gzip'], } if payload is not None: adjusted_headers['Content-Length'] = [str(len(payload))] if method == 'POST' and payload: adjusted_headers['Content-Type'] = [ 'application/x-www-form-urlencoded' ] passthrough_content_encoding = False for header in headers: header_key = header.key() if header_key.lower() == 'user-agent': adjusted_headers[header_key.title()] = [ ('%s %s' % (header.value(), adjusted_headers['User-Agent'][0])) ] elif header_key.lower() == 'accept-encoding': passthrough_content_encoding = True adjusted_headers[header_key.title()] = [header.value()] elif header_key.lower() == 'content-type': adjusted_headers[header_key.title()] = [header.value()] else: adjusted_headers.setdefault(header_key, []).append(header.value()) if payload is not None: escaped_payload = payload.encode('string_escape') else: escaped_payload = '' logging.debug( 'Making HTTP request: host = %r, ' 'url = %r, payload = %.1000r, headers = %r', host, url, escaped_payload, adjusted_headers) try: proxy_host = None if protocol == 'http': connection_class = httplib.HTTPConnection default_port = 80 if os.environ.get('HTTP_PROXY') and not _IsLocalhost(host): _, proxy_host, _, _, _ = (urlparse.urlsplit( os.environ.get('HTTP_PROXY'))) elif protocol == 'https': if (validate_certificate and _CanValidateCerts() and CERT_PATH): connection_class = fancy_urllib.create_fancy_connection( ca_certs=CERT_PATH) else: connection_class = httplib.HTTPSConnection default_port = 443 if (_CONNECTION_SUPPORTS_SSL_TUNNEL and os.environ.get('HTTPS_PROXY') and not _IsLocalhost(host)): _, proxy_host, _, _, _ = (urlparse.urlsplit( os.environ.get('HTTPS_PROXY'))) else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg) connection_kwargs = ({ 'timeout': deadline } if _CONNECTION_SUPPORTS_TIMEOUT else {}) if proxy_host: proxy_address, _, proxy_port = proxy_host.partition(':') connection = connection_class( proxy_address, proxy_port if proxy_port else default_port, **connection_kwargs) full_path = urlparse.urlunsplit( (protocol, host, path, query, '')) if protocol == 'https': connection.set_tunnel(host) else: connection = connection_class(host, **connection_kwargs) full_path = urlparse.urlunsplit(('', '', path, query, '')) last_protocol = protocol last_host = host if not _CONNECTION_SUPPORTS_TIMEOUT: orig_timeout = socket.getdefaulttimeout() try: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(deadline) _SendRequest(connection, method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == 'HEAD': http_response_data = '' else: http_response_data = http_response.read() finally: if not _CONNECTION_SUPPORTS_TIMEOUT: socket.setdefaulttimeout(orig_timeout) connection.close() except _fancy_urllib_InvalidCertException, e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError. SSL_CERTIFICATE_ERROR, str(e)) except _fancy_urllib_SSLError, e: app_error = ( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED if 'timed out' in e.message else urlfetch_service_pb. URLFetchServiceError.SSL_CERTIFICATE_ERROR) raise apiproxy_errors.ApplicationError(app_error, str(e))
def _RetrieveURL(self, url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT): """Retrieves a URL. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: Request object from original request. response: Response object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. Raises: Raises an apiproxy_errors.ApplicationError exception with FETCH_ERROR in cases where: - MAX_REDIRECTS is exceeded - The protocol of the redirected URL is bad or missing. """ last_protocol = '' last_host = '' for redirect_number in xrange(MAX_REDIRECTS + 1): parsed = urlparse.urlparse(url) protocol, host, path, parameters, query, fragment = parsed port = urllib.splitport(urllib.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.warning( 'urlfetch received %s ; port %s is not allowed in production!' % (url, port)) if protocol and not host: logging.error('Missing host on redirect; target url is %s' % url) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR) if not host and not protocol: host = last_host protocol = last_protocol adjusted_headers = { 'User-Agent': 'AppEngine-Google; (+http://code.google.com/appengine)', 'Host': host, 'Accept-Encoding': 'gzip', } if payload is not None: adjusted_headers['Content-Length'] = len(payload) if method == 'POST' and payload: adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded' for header in headers: if header.key().title().lower() == 'user-agent': adjusted_headers['User-Agent'] = ( '%s %s' % (header.value(), adjusted_headers['User-Agent'])) else: adjusted_headers[header.key().title()] = header.value() logging.debug('Making HTTP request: host = %s, ' 'url = %s, payload = %s, headers = %s', host, url, payload, adjusted_headers) try: if protocol == 'http': connection = httplib.HTTPConnection(host) elif protocol == 'https': if (validate_certificate and fancy_urllib.can_validate_certs() and CERT_PATH): connection_class = fancy_urllib.create_fancy_connection( ca_certs=CERT_PATH) connection = connection_class(host) else: connection = httplib.HTTPSConnection(host) else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg) last_protocol = protocol last_host = host if query != '': full_path = path + '?' + query else: full_path = path orig_timeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(deadline) connection.request(method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == 'HEAD': http_response_data = '' else: http_response_data = http_response.read() finally: socket.setdefaulttimeout(orig_timeout) connection.close() except (fancy_urllib.InvalidCertificateException, fancy_urllib.SSLError), e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e)) except (httplib.error, socket.error, IOError), e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
def _RetrieveURL( self, url, payload, method, headers, request, response, follow_redirects=True, deadline=_API_CALL_DEADLINE, validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT, ): """Retrieves a URL. Args: url: String containing the URL to access. payload: Request payload to send, if any; None if no payload. If the payload is unicode, we assume it is utf-8. method: HTTP method to use (e.g., 'GET') headers: List of additional header objects to use for the request. request: Request object from original request. response: Response object to populate with the response data. follow_redirects: optional setting (defaulting to True) for whether or not we should transparently follow redirects (up to MAX_REDIRECTS) deadline: Number of seconds to wait for the urlfetch to finish. validate_certificate: If true, do not send request to server unless the certificate is valid, signed by a trusted CA and the hostname matches the certificate. Raises: Raises an apiproxy_errors.ApplicationError exception with FETCH_ERROR in cases where: - MAX_REDIRECTS is exceeded - The protocol of the redirected URL is bad or missing. - The port is not in the allowable range of ports. """ last_protocol = "" last_host = "" if isinstance(payload, unicode): payload = payload.encode("utf-8") for redirect_number in xrange(MAX_REDIRECTS + 1): parsed = urlparse.urlsplit(url) protocol, host, path, query, fragment = parsed port = urllib.splitport(urllib.splituser(host)[1])[1] if not _IsAllowedPort(port): logging.error("urlfetch received %s ; port %s is not allowed in production!" % (url, port)) raise apiproxy_errors.ApplicationError(urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR) if protocol and not host: logging.error("Missing host on redirect; target url is %s" % url) raise apiproxy_errors.ApplicationError(urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR) if not host and not protocol: host = last_host protocol = last_protocol adjusted_headers = { "User-Agent": "AppEngine-Google; (+http://code.google.com/appengine)", "Host": host, "Accept-Encoding": "gzip", } if payload is not None: adjusted_headers["Content-Length"] = str(len(payload)) if method == "POST" and payload: adjusted_headers["Content-Type"] = "application/x-www-form-urlencoded" for header in headers: if header.key().title().lower() == "user-agent": adjusted_headers["User-Agent"] = "%s %s" % (header.value(), adjusted_headers["User-Agent"]) else: adjusted_headers[header.key().title()] = header.value() if payload is not None: escaped_payload = payload.encode("string_escape") else: escaped_payload = "" logging.debug( "Making HTTP request: host = %s, " "url = %s, payload = %s, headers = %s", host, url, escaped_payload, adjusted_headers, ) try: if protocol == "http": connection = httplib.HTTPConnection(host) elif protocol == "https": if validate_certificate and _CanValidateCerts() and CERT_PATH: connection_class = fancy_urllib.create_fancy_connection(ca_certs=CERT_PATH) connection = connection_class(host) else: connection = httplib.HTTPSConnection(host) else: error_msg = 'Redirect specified invalid protocol: "%s"' % protocol logging.error(error_msg) raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg ) last_protocol = protocol last_host = host if query != "": full_path = path + "?" + query else: full_path = path orig_timeout = socket.getdefaulttimeout() try: socket.setdefaulttimeout(deadline) connection.request(method, full_path, payload, adjusted_headers) http_response = connection.getresponse() if method == "HEAD": http_response_data = "" else: http_response_data = http_response.read() finally: socket.setdefaulttimeout(orig_timeout) connection.close() except (_fancy_urllib_InvalidCertException, _fancy_urllib_SSLError), e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e) ) except socket.timeout, e: raise apiproxy_errors.ApplicationError( urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e) )