Example #1
0
  def _RetrieveURL(self, url, payload, method, headers, request, response,
                   follow_redirects=True, deadline=_API_CALL_DEADLINE,
                   validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
    """Retrieves a URL.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: Request object from original request.
      response: Response object to populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with FETCH_ERROR
      in cases where:
        - MAX_REDIRECTS is exceeded
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
    """
    last_protocol = ''
    last_host = ''
    if isinstance(payload, unicode):
      payload = payload.encode('utf-8')

    for redirect_number in xrange(MAX_REDIRECTS + 1):
      parsed = urlparse.urlsplit(url)
      protocol, host, path, query, fragment = parsed







      port = urllib.splitport(urllib.splituser(host)[1])[1]

      if not _IsAllowedPort(port):
        logging.error(
          'urlfetch received %s ; port %s is not allowed in production!' %
          (url, port))





        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR)

      if protocol and not host:

        logging.error('Missing host on redirect; target url is %s' % url)
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR)




      if not host and not protocol:
        host = last_host
        protocol = last_protocol






      adjusted_headers = {
          'User-Agent':
          'AppEngine-Google; (+http://code.google.com/appengine)',
          'Host': host,
          'Accept-Encoding': 'gzip',
      }
      if payload is not None:


        adjusted_headers['Content-Length'] = str(len(payload))
      if method == 'POST' and payload:
        adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'

      passthrough_content_encoding = False
      for header in headers:
        if header.key().title().lower() == 'user-agent':
          adjusted_headers['User-Agent'] = (
              '%s %s' %
              (header.value(), adjusted_headers['User-Agent']))
        else:
          if header.key().lower() == 'accept-encoding':
            passthrough_content_encoding = True
          adjusted_headers[header.key().title()] = header.value()

      if payload is not None:
        escaped_payload = payload.encode('string_escape')
      else:
        escaped_payload = ''
      logging.debug('Making HTTP request: host = %r, '
                    'url = %r, payload = %.1000r, headers = %r',
                    host, url, escaped_payload, adjusted_headers)
      try:
        if protocol == 'http':
          connection_class = httplib.HTTPConnection
        elif protocol == 'https':
          if (validate_certificate and _CanValidateCerts() and
              CERT_PATH):

            connection_class = fancy_urllib.create_fancy_connection(
                ca_certs=CERT_PATH)
          else:
            connection_class = httplib.HTTPSConnection
        else:

          error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)






        if _CONNECTION_SUPPORTS_TIMEOUT:
          connection = connection_class(host, timeout=deadline)
        else:
          connection = connection_class(host)



        last_protocol = protocol
        last_host = host

        if query != '':
          full_path = path + '?' + query
        else:
          full_path = path

        if not _CONNECTION_SUPPORTS_TIMEOUT:
          orig_timeout = socket.getdefaulttimeout()
        try:
          if not _CONNECTION_SUPPORTS_TIMEOUT:


            socket.setdefaulttimeout(deadline)
          connection.request(method, full_path, payload, adjusted_headers)
          http_response = connection.getresponse()
          if method == 'HEAD':
            http_response_data = ''
          else:
            http_response_data = http_response.read()
        finally:
          if not _CONNECTION_SUPPORTS_TIMEOUT:
            socket.setdefaulttimeout(orig_timeout)
          connection.close()
      except (_fancy_urllib_InvalidCertException,
              _fancy_urllib_SSLError), e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
          str(e))
      except socket.timeout, e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
Example #2
0
    def _RetrieveURL(
        url,
        payload,
        method,
        headers,
        request,
        response,
        follow_redirects=True,
        deadline=_API_CALL_DEADLINE,
        validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT,
    ):
        """Retrieves a URL over network.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: A urlfetch_service_pb.URLFetchRequest proto object from
          original request.
      response: A urlfetch_service_pb.URLFetchResponse proto object to
          populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with
      INVALID_URL_ERROR in cases where:
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
      Raises an apiproxy_errors.ApplicationError exception with
      TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
    """
        last_protocol = ""
        last_host = ""
        if isinstance(payload, unicode):
            payload = payload.encode("utf-8")

        for redirect_number in xrange(MAX_REDIRECTS + 1):
            parsed = urlparse.urlsplit(url)
            protocol, host, path, query, fragment = parsed

            port = urllib.splitport(urllib.splituser(host)[1])[1]

            if not _IsAllowedPort(port):
                logging.error("urlfetch received %s ; port %s is not allowed in production!" % (url, port))

                raise apiproxy_errors.ApplicationError(urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

            if protocol and not host:

                logging.error("Missing host on redirect; target url is %s" % url)
                raise apiproxy_errors.ApplicationError(urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

            if not host and not protocol:
                host = last_host
                protocol = last_protocol

            adjusted_headers = {
                "User-Agent": (
                    "AppEngine-Google; (+http://code.google.com/appengine; appid: %s)" % os.getenv("APPLICATION_ID")
                ),
                "Host": host,
                "Accept-Encoding": "gzip",
            }
            if payload is not None:

                adjusted_headers["Content-Length"] = str(len(payload))

            if method == "POST" and payload:
                adjusted_headers["Content-Type"] = "application/x-www-form-urlencoded"

            passthrough_content_encoding = False
            for header in headers:
                if header.key().title().lower() == "user-agent":
                    adjusted_headers["User-Agent"] = "%s %s" % (header.value(), adjusted_headers["User-Agent"])
                else:
                    if header.key().lower() == "accept-encoding":
                        passthrough_content_encoding = True
                    adjusted_headers[header.key().title()] = header.value()

            if payload is not None:
                escaped_payload = payload.encode("string_escape")
            else:
                escaped_payload = ""
            logging.debug(
                "Making HTTP request: host = %r, " "url = %r, payload = %.1000r, headers = %r",
                host,
                url,
                escaped_payload,
                adjusted_headers,
            )
            try:
                proxy_host = None

                if protocol == "http":
                    connection_class = httplib.HTTPConnection
                    default_port = 80

                    if os.environ.get("HTTP_PROXY") and not _IsLocalhost(host):
                        _, proxy_host, _, _, _ = urlparse.urlsplit(os.environ.get("HTTP_PROXY"))

                    full_path = urlparse.urlunsplit((protocol, host, path, query, ""))
                elif protocol == "https":
                    if validate_certificate and _CanValidateCerts() and CERT_PATH:

                        connection_class = fancy_urllib.create_fancy_connection(ca_certs=CERT_PATH)
                    else:
                        connection_class = httplib.HTTPSConnection

                    default_port = 443

                    if _CONNECTION_SUPPORTS_SSL_TUNNEL and os.environ.get("HTTPS_PROXY") and not _IsLocalhost(host):
                        _, proxy_host, _, _, _ = urlparse.urlsplit(os.environ.get("HTTPS_PROXY"))

                    full_path = urlparse.urlunsplit(("", "", path, query, ""))
                else:

                    error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
                    logging.error(error_msg)
                    raise apiproxy_errors.ApplicationError(
                        urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg
                    )

                connection_kwargs = {"timeout": deadline} if _CONNECTION_SUPPORTS_TIMEOUT else {}

                if proxy_host:
                    proxy_address, _, proxy_port = proxy_host.partition(":")
                    connection = connection_class(
                        proxy_address, proxy_port if proxy_port else default_port, **connection_kwargs
                    )

                    if protocol == "https":
                        connection.set_tunnel(host)
                else:
                    connection = connection_class(host, **connection_kwargs)

                last_protocol = protocol
                last_host = host

                if not _CONNECTION_SUPPORTS_TIMEOUT:
                    orig_timeout = socket.getdefaulttimeout()
                try:
                    if not _CONNECTION_SUPPORTS_TIMEOUT:

                        socket.setdefaulttimeout(deadline)
                    connection.request(method, full_path, payload, adjusted_headers)
                    http_response = connection.getresponse()
                    if method == "HEAD":
                        http_response_data = ""
                    else:
                        http_response_data = http_response.read()
                finally:
                    if not _CONNECTION_SUPPORTS_TIMEOUT:
                        socket.setdefaulttimeout(orig_timeout)
                    connection.close()
            except _fancy_urllib_InvalidCertException, e:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e)
                )
            except _fancy_urllib_SSLError, e:

                app_error = (
                    urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED
                    if "timed out" in e.message
                    else urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR
                )
                raise apiproxy_errors.ApplicationError(app_error, str(e))
Example #3
0
  def _RetrieveURL(url, payload, method, headers, request, response,
                   follow_redirects=True, deadline=_API_CALL_DEADLINE,
                   validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
    """Retrieves a URL over network.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: A urlfetch_service_pb.URLFetchRequest proto object from
          original request.
      response: A urlfetch_service_pb.URLFetchResponse proto object to
          populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with
      INVALID_URL_ERROR in cases where:
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
      Raises an apiproxy_errors.ApplicationError exception with
      TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
    """
    last_protocol = ''
    last_host = ''
    if isinstance(payload, unicode):
      payload = payload.encode('utf-8')

    for redirect_number in xrange(MAX_REDIRECTS + 1):
      parsed = urlparse.urlsplit(url)
      protocol, host, path, query, fragment = parsed







      port = urllib.splitport(urllib.splituser(host)[1])[1]

      if not _IsAllowedPort(port):
        logging.error(
          'urlfetch received %s ; port %s is not allowed in production!' %
          (url, port))





        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

      if protocol and not host:

        logging.error('Missing host on redirect; target url is %s' % url)
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)




      if not host and not protocol:
        host = last_host
        protocol = last_protocol






      adjusted_headers = {
          'User-Agent':
          'AppEngine-Google; (+http://code.google.com/appengine)',
          'Host': host,
          'Accept-Encoding': 'gzip',
      }

      if not follow_redirects:
        adjusted_headers['X-Appengine-Inbound-Appid'] = app_identity.get_application_id()

      if payload is not None:


        adjusted_headers['Content-Length'] = str(len(payload))


      if method == 'POST' and payload:
        adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'

      passthrough_content_encoding = False
      for header in headers:
        if header.key().title().lower() == 'user-agent':
          adjusted_headers['User-Agent'] = (
              '%s %s' %
              (header.value(), adjusted_headers['User-Agent']))
        else:
          if header.key().lower() == 'accept-encoding':
            passthrough_content_encoding = True
          adjusted_headers[header.key().title()] = header.value()

      if payload is not None:
        escaped_payload = payload.encode('string_escape')
      else:
        escaped_payload = ''
      logging.debug('Making HTTP request: host = %r, '
                    'url = %r, payload = %.1000r, headers = %r',
                    host, url, escaped_payload, adjusted_headers)
      try:
        if protocol == 'http':
          connection_class = httplib.HTTPConnection
        elif protocol == 'https':
          if (validate_certificate and _CanValidateCerts() and
              CERT_PATH):

            connection_class = fancy_urllib.create_fancy_connection(
                ca_certs=CERT_PATH)
          else:
            connection_class = httplib.HTTPSConnection
        else:

          error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg)






        if _CONNECTION_SUPPORTS_TIMEOUT:
          connection = connection_class(host, timeout=deadline)
        else:
          connection = connection_class(host)



        last_protocol = protocol
        last_host = host

        if query != '':
          full_path = path + '?' + query
        else:
          full_path = path

        if not _CONNECTION_SUPPORTS_TIMEOUT:
          orig_timeout = socket.getdefaulttimeout()
        try:
          if not _CONNECTION_SUPPORTS_TIMEOUT:


            socket.setdefaulttimeout(deadline)
          connection.request(method, full_path, payload, adjusted_headers)
          http_response = connection.getresponse()
          if method == 'HEAD':
            http_response_data = ''
          else:
            http_response_data = http_response.read()
        finally:
          if not _CONNECTION_SUPPORTS_TIMEOUT:
            socket.setdefaulttimeout(orig_timeout)
          connection.close()
      except (_fancy_urllib_InvalidCertException,
              _fancy_urllib_SSLError), e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
          str(e))
      except socket.timeout, e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
  def _RetrieveURL(url, payload, method, headers, request, response,
                   follow_redirects=True, deadline=_API_CALL_DEADLINE,
                   validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
    """Retrieves a URL over network.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: A urlfetch_service_pb.URLFetchRequest proto object from
          original request.
      response: A urlfetch_service_pb.URLFetchResponse proto object to
          populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with
      INVALID_URL_ERROR in cases where:
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
      Raises an apiproxy_errors.ApplicationError exception with
      TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
    """
    last_protocol = ''
    last_host = ''
    if isinstance(payload, str):
      payload = payload.encode('utf-8')

    for redirect_number in range(MAX_REDIRECTS + 1):
      parsed = urllib.parse.urlsplit(url)
      protocol, host, path, query, fragment = parsed







      port = urllib.parse.splitport(urllib.parse.splituser(host)[1])[1]

      if not _IsAllowedPort(port):
        logging.error(
          'urlfetch received %s ; port %s is not allowed in production!' %
          (url, port))





        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

      if protocol and not host:

        logging.error('Missing host on redirect; target url is %s' % url)
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.INVALID_URL)




      if not host and not protocol:
        host = last_host
        protocol = last_protocol






      adjusted_headers = {
          'User-Agent':
          'AppEngine-Google; (+http://code.google.com/appengine)',
          'Host': host,
          'Accept-Encoding': 'gzip',
      }
      if payload is not None:


        adjusted_headers['Content-Length'] = str(len(payload))


      if method == 'POST' and payload:
        adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'

      passthrough_content_encoding = False
      for header in headers:
        if header.key().decode().title().lower() == 'user-agent':
          adjusted_headers['User-Agent'] = (
              '%s %s' %
              (header.value().decode(), adjusted_headers['User-Agent']))
        else:
          if header.key().decode().lower() == 'accept-encoding':
            passthrough_content_encoding = True
          adjusted_headers[header.key().decode().title()] = (
              header.value().decode())

      if payload is not None:
        escaped_payload = payload.encode('string_escape')
      else:
        escaped_payload = ''
      logging.debug('Making HTTP request: host = %r, '
                    'url = %r, payload = %.1000r, headers = %r',
                    host, url, escaped_payload, adjusted_headers)
      try:
        if protocol == 'http':
          connection_class = http.client.HTTPConnection
        elif protocol == 'https':
          if (validate_certificate and _CanValidateCerts() and
              CERT_PATH):

            connection_class = fancy_urllib.create_fancy_connection(
                ca_certs=CERT_PATH)
          else:
            connection_class = http.client.HTTPSConnection
        else:

          error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg)






        if _CONNECTION_SUPPORTS_TIMEOUT:
          connection = connection_class(host, timeout=deadline)
        else:
          connection = connection_class(host)



        last_protocol = protocol
        last_host = host

        if query != '':
          full_path = path + '?' + query
        else:
          full_path = path

        if not _CONNECTION_SUPPORTS_TIMEOUT:
          orig_timeout = socket.getdefaulttimeout()
        try:
          if not _CONNECTION_SUPPORTS_TIMEOUT:


            socket.setdefaulttimeout(deadline)
          connection.request(method, full_path, payload, adjusted_headers)
          http_response = connection.getresponse()
          if method == 'HEAD':
            http_response_data = ''
          else:
            http_response_data = http_response.read()
        finally:
          if not _CONNECTION_SUPPORTS_TIMEOUT:
            socket.setdefaulttimeout(orig_timeout)
          connection.close()
      except _fancy_urllib_InvalidCertException as e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
          str(e))
      except _fancy_urllib_SSLError as e:





        app_error = (
            urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED
            if 'timed out' in e.message else
            urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR)
        raise apiproxy_errors.ApplicationError(app_error, str(e))
      except socket.timeout as e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
      except (http.client.error, socket.error, IOError) as e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))




      if http_response.status in REDIRECT_STATUSES and follow_redirects:

        url = http_response.getheader('Location', None)
        if url is None:
          error_msg = 'Redirecting response was missing "Location" header'
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb.URLFetchServiceError.MALFORMED_REPLY,
              error_msg)



        if (http_response.status != http.client.TEMPORARY_REDIRECT and
            method not in PRESERVE_ON_REDIRECT):
          logging.warn('Received a %s to a %s. Redirecting with a GET',
                       http_response.status, method)
          method = 'GET'
          payload = None
      else:
        response.set_statuscode(http_response.status)
        if (http_response.getheader('content-encoding') == 'gzip' and
            not passthrough_content_encoding):
          gzip_stream = io.StringIO(http_response_data)
          gzip_file = gzip.GzipFile(fileobj=gzip_stream)
          http_response_data = gzip_file.read()
        response.set_content(http_response_data[:MAX_RESPONSE_SIZE])


        for header_key in list(http_response.msg.keys()):
          for header_value in http_response.msg.get_all(header_key):
            if (header_key.lower() == 'content-encoding' and
                header_value == 'gzip' and
                not passthrough_content_encoding):
              continue
            if header_key.lower() == 'content-length' and method != 'HEAD':
              header_value = str(len(response.content()))
            header_proto = response.add_header()
            header_proto.set_key(header_key.encode())
            header_proto.set_value(header_value.encode())

        if len(http_response_data) > MAX_RESPONSE_SIZE:
          response.set_contentwastruncated(True)



        if request.url() != url:
          response.set_finalurl(url.encode())


        break
    else:
      error_msg = 'Too many repeated redirects'
      logging.error(error_msg)
      raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS,
          error_msg)
    def _RetrieveURL(
            url,
            payload,
            method,
            headers,
            request,
            response,
            follow_redirects=True,
            deadline=_API_CALL_DEADLINE,
            validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT,
            http_proxy=None):
        """Retrieves a URL over network.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: A urlfetch_service_pb.URLFetchRequest proto object from
          original request.
      response: A urlfetch_service_pb.URLFetchResponse proto object to
          populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.
      http_proxy: Tuple of (hostname, port), where hostname is a string and port
        is an int, to use as the http proxy.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with
      INVALID_URL_ERROR in cases where:
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
      Raises an apiproxy_errors.ApplicationError exception with
      TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
    """
        last_protocol = ''
        last_host = ''
        if isinstance(payload, unicode):
            payload = payload.encode('utf-8')

        for redirect_number in xrange(MAX_REDIRECTS + 1):
            parsed = urlparse.urlsplit(url)
            protocol, host, path, query, fragment = parsed

            port = urllib.splitport(urllib.splituser(host)[1])[1]

            if not _IsAllowedPort(port):
                logging.error(
                    'urlfetch received %s ; port %s is not allowed in production!'
                    % (url, port))

                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

            if protocol and not host:

                logging.error('Missing host on redirect; target url is %s' %
                              url)
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

            if not host and not protocol:
                host = last_host
                protocol = last_protocol

            if port == '0':
                host = host.replace(':0', '')

            adjusted_headers = {
                'User-Agent':
                [('AppEngine-Google; (+http://code.google.com/appengine; appid: %s)'
                  % os.getenv('APPLICATION_ID'))],
                'Host': [host],
                'Accept-Encoding': ['gzip'],
            }
            if payload is not None:

                adjusted_headers['Content-Length'] = [str(len(payload))]

            if method == 'POST' and payload:
                adjusted_headers['Content-Type'] = [
                    'application/x-www-form-urlencoded'
                ]

            passthrough_content_encoding = False
            for header in headers:

                header_key = header.key()
                if header_key.lower() == 'user-agent':
                    adjusted_headers[header_key.title()] = [
                        ('%s %s' %
                         (header.value(), adjusted_headers['User-Agent'][0]))
                    ]
                elif header_key.lower() == 'accept-encoding':
                    passthrough_content_encoding = True
                    adjusted_headers[header_key.title()] = [header.value()]
                elif header_key.lower() == 'content-type':
                    adjusted_headers[header_key.title()] = [header.value()]
                else:
                    adjusted_headers.setdefault(header_key,
                                                []).append(header.value())

            if payload is not None:
                escaped_payload = payload.encode('string_escape')
            else:
                escaped_payload = ''
            logging.debug(
                'Making HTTP request: host = %r, '
                'url = %r, payload = %.1000r, headers = %r', host, url,
                escaped_payload, adjusted_headers)
            try:
                proxy_host = None

                if protocol == 'http':
                    connection_class = httplib.HTTPConnection
                    default_port = 80

                    if http_proxy and not _IsLocalhost(host):
                        proxy_host = '%s:%d' % (http_proxy[0], http_proxy[1])
                    elif os.environ.get(
                            'HTTP_PROXY') and not _IsLocalhost(host):
                        _, proxy_host, _, _, _ = (urlparse.urlsplit(
                            os.environ.get('HTTP_PROXY')))
                elif protocol == 'https':
                    if (validate_certificate and _CanValidateCerts()
                            and CERT_PATH):

                        connection_class = fancy_urllib.create_fancy_connection(
                            ca_certs=CERT_PATH)
                    else:
                        connection_class = httplib.HTTPSConnection

                    default_port = 443

                    if os.environ.get(
                            'HTTPS_PROXY') and not _IsLocalhost(host):
                        _, proxy_host, _, _, _ = (urlparse.urlsplit(
                            os.environ.get('HTTPS_PROXY')))
                else:

                    error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
                    logging.error(error_msg)
                    raise apiproxy_errors.ApplicationError(
                        urlfetch_service_pb.URLFetchServiceError.INVALID_URL,
                        error_msg)

                connection_kwargs = {'timeout': deadline}

                if (not validate_certificate and sys.version_info >= (2, 7, 9)
                        and protocol == 'https'):

                    import ssl
                    connection_kwargs[
                        'context'] = ssl._create_unverified_context()

                if proxy_host:
                    proxy_address, _, proxy_port = proxy_host.partition(':')
                    connection = connection_class(
                        proxy_address,
                        proxy_port if proxy_port else default_port,
                        **connection_kwargs)
                    full_path = urlparse.urlunsplit(
                        (protocol, host, path, query, ''))

                    if protocol == 'https':
                        connection.set_tunnel(host)
                else:
                    connection = connection_class(host, **connection_kwargs)
                    full_path = urlparse.urlunsplit(('', '', path, query, ''))

                last_protocol = protocol
                last_host = host

                try:
                    _SendRequest(connection, method, full_path, payload,
                                 adjusted_headers)
                    http_response = connection.getresponse()
                    if method == 'HEAD':
                        http_response_data = ''
                    else:
                        http_response_data = http_response.read()
                finally:
                    connection.close()
            except _fancy_urllib_InvalidCertException as e:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.
                    SSL_CERTIFICATE_ERROR, str(e))
            except _fancy_urllib_SSLError as e:

                app_error = (
                    urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED
                    if 'timed out' in e.message else urlfetch_service_pb.
                    URLFetchServiceError.SSL_CERTIFICATE_ERROR)
                raise apiproxy_errors.ApplicationError(app_error, str(e))
            except socket.timeout as e:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED,
                    str(e))
            except (httplib.error, socket.error, IOError) as e:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR,
                    str(e))

            if http_response.status >= 600:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR,
                    'Status %s unknown' % http_response.status)

            if http_response.status in REDIRECT_STATUSES and follow_redirects:

                url = http_response.getheader('Location', None)
                if url is None:
                    error_msg = 'Missing "Location" header for redirect.'
                    logging.error(error_msg)
                    raise apiproxy_errors.ApplicationError(
                        urlfetch_service_pb.URLFetchServiceError.
                        MALFORMED_REPLY, error_msg)

                if (http_response.status != httplib.TEMPORARY_REDIRECT
                        and method not in PRESERVE_ON_REDIRECT):
                    logging.warn(
                        'Received a %s to a %s. Redirecting with a GET',
                        http_response.status, method)
                    method = 'GET'
                    payload = None
            else:
                response.set_statuscode(http_response.status)
                if (http_response.getheader('content-encoding') == 'gzip'
                        and not passthrough_content_encoding):
                    gzip_stream = StringIO.StringIO(http_response_data)
                    gzip_file = gzip.GzipFile(fileobj=gzip_stream)
                    http_response_data = gzip_file.read()
                response.set_content(http_response_data[:MAX_RESPONSE_SIZE])

                for header_key in http_response.msg.keys():
                    for header_value in http_response.msg.getheaders(
                            header_key):
                        if (header_key.lower() == 'content-encoding'
                                and header_value == 'gzip'
                                and not passthrough_content_encoding):
                            continue
                        if header_key.lower(
                        ) == 'content-length' and method != 'HEAD':
                            header_value = str(len(response.content()))
                        header_proto = response.add_header()
                        header_proto.set_key(header_key)
                        header_proto.set_value(header_value)

                if len(http_response_data) > MAX_RESPONSE_SIZE:
                    response.set_contentwastruncated(True)

                if request.url() != url:
                    response.set_finalurl(url)

                break
        else:
            error_msg = 'Too many repeated redirects'
            logging.error(error_msg)
            raise apiproxy_errors.ApplicationError(
                urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS,
                error_msg)
Example #6
0
    def _RetrieveURL(
            url,
            payload,
            method,
            headers,
            request,
            response,
            follow_redirects=True,
            deadline=_API_CALL_DEADLINE,
            validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
        """Retrieves a URL over network.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: A urlfetch_service_pb.URLFetchRequest proto object from
          original request.
      response: A urlfetch_service_pb.URLFetchResponse proto object to
          populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with
      INVALID_URL_ERROR in cases where:
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
      Raises an apiproxy_errors.ApplicationError exception with
      TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
    """
        last_protocol = ''
        last_host = ''
        if isinstance(payload, unicode):
            payload = payload.encode('utf-8')

        for redirect_number in xrange(MAX_REDIRECTS + 1):
            parsed = urlparse.urlsplit(url)
            protocol, host, path, query, fragment = parsed

            port = urllib.splitport(urllib.splituser(host)[1])[1]

            if not _IsAllowedPort(port):
                logging.error(
                    'urlfetch received %s ; port %s is not allowed in production!'
                    % (url, port))

                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

            if protocol and not host:

                logging.error('Missing host on redirect; target url is %s' %
                              url)
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.INVALID_URL)

            if not host and not protocol:
                host = last_host
                protocol = last_protocol

            adjusted_headers = {
                'User-Agent':
                [('AppEngine-Google; (+http://code.google.com/appengine; appid: %s)'
                  % os.getenv('APPLICATION_ID'))],
                'Host': [host],
                'Accept-Encoding': ['gzip'],
            }
            if payload is not None:

                adjusted_headers['Content-Length'] = [str(len(payload))]

            if method == 'POST' and payload:
                adjusted_headers['Content-Type'] = [
                    'application/x-www-form-urlencoded'
                ]

            passthrough_content_encoding = False
            for header in headers:

                header_key = header.key()
                if header_key.lower() == 'user-agent':
                    adjusted_headers[header_key.title()] = [
                        ('%s %s' %
                         (header.value(), adjusted_headers['User-Agent'][0]))
                    ]
                elif header_key.lower() == 'accept-encoding':
                    passthrough_content_encoding = True
                    adjusted_headers[header_key.title()] = [header.value()]
                elif header_key.lower() == 'content-type':
                    adjusted_headers[header_key.title()] = [header.value()]
                else:
                    adjusted_headers.setdefault(header_key,
                                                []).append(header.value())

            if payload is not None:
                escaped_payload = payload.encode('string_escape')
            else:
                escaped_payload = ''
            logging.debug(
                'Making HTTP request: host = %r, '
                'url = %r, payload = %.1000r, headers = %r', host, url,
                escaped_payload, adjusted_headers)
            try:
                proxy_host = None

                if protocol == 'http':
                    connection_class = httplib.HTTPConnection
                    default_port = 80

                    if os.environ.get('HTTP_PROXY') and not _IsLocalhost(host):
                        _, proxy_host, _, _, _ = (urlparse.urlsplit(
                            os.environ.get('HTTP_PROXY')))
                elif protocol == 'https':
                    if (validate_certificate and _CanValidateCerts()
                            and CERT_PATH):

                        connection_class = fancy_urllib.create_fancy_connection(
                            ca_certs=CERT_PATH)
                    else:
                        connection_class = httplib.HTTPSConnection

                    default_port = 443

                    if (_CONNECTION_SUPPORTS_SSL_TUNNEL
                            and os.environ.get('HTTPS_PROXY')
                            and not _IsLocalhost(host)):
                        _, proxy_host, _, _, _ = (urlparse.urlsplit(
                            os.environ.get('HTTPS_PROXY')))
                else:

                    error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
                    logging.error(error_msg)
                    raise apiproxy_errors.ApplicationError(
                        urlfetch_service_pb.URLFetchServiceError.INVALID_URL,
                        error_msg)

                connection_kwargs = ({
                    'timeout': deadline
                } if _CONNECTION_SUPPORTS_TIMEOUT else {})

                if proxy_host:
                    proxy_address, _, proxy_port = proxy_host.partition(':')
                    connection = connection_class(
                        proxy_address,
                        proxy_port if proxy_port else default_port,
                        **connection_kwargs)
                    full_path = urlparse.urlunsplit(
                        (protocol, host, path, query, ''))

                    if protocol == 'https':
                        connection.set_tunnel(host)
                else:
                    connection = connection_class(host, **connection_kwargs)
                    full_path = urlparse.urlunsplit(('', '', path, query, ''))

                last_protocol = protocol
                last_host = host

                if not _CONNECTION_SUPPORTS_TIMEOUT:
                    orig_timeout = socket.getdefaulttimeout()
                try:
                    if not _CONNECTION_SUPPORTS_TIMEOUT:

                        socket.setdefaulttimeout(deadline)
                    _SendRequest(connection, method, full_path, payload,
                                 adjusted_headers)
                    http_response = connection.getresponse()
                    if method == 'HEAD':
                        http_response_data = ''
                    else:
                        http_response_data = http_response.read()
                finally:
                    if not _CONNECTION_SUPPORTS_TIMEOUT:
                        socket.setdefaulttimeout(orig_timeout)
                    connection.close()
            except _fancy_urllib_InvalidCertException, e:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.
                    SSL_CERTIFICATE_ERROR, str(e))
            except _fancy_urllib_SSLError, e:

                app_error = (
                    urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED
                    if 'timed out' in e.message else urlfetch_service_pb.
                    URLFetchServiceError.SSL_CERTIFICATE_ERROR)
                raise apiproxy_errors.ApplicationError(app_error, str(e))
Example #7
0
  def _RetrieveURL(self, url, payload, method, headers, request, response,
                   follow_redirects=True, deadline=_API_CALL_DEADLINE,
                   validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
    """Retrieves a URL.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: Request object from original request.
      response: Response object to populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with FETCH_ERROR
      in cases where:
        - MAX_REDIRECTS is exceeded
        - The protocol of the redirected URL is bad or missing.
    """
    last_protocol = ''
    last_host = ''

    for redirect_number in xrange(MAX_REDIRECTS + 1):
      parsed = urlparse.urlparse(url)
      protocol, host, path, parameters, query, fragment = parsed

      port = urllib.splitport(urllib.splituser(host)[1])[1]

      if not _IsAllowedPort(port):
        logging.warning(
          'urlfetch received %s ; port %s is not allowed in production!' %
          (url, port))

      if protocol and not host:
        logging.error('Missing host on redirect; target url is %s' % url)
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR)

      if not host and not protocol:
        host = last_host
        protocol = last_protocol

      adjusted_headers = {
          'User-Agent':
          'AppEngine-Google; (+http://code.google.com/appengine)',
          'Host': host,
          'Accept-Encoding': 'gzip',
      }
      if payload is not None:
        adjusted_headers['Content-Length'] = len(payload)
      if method == 'POST' and payload:
        adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'

      for header in headers:
        if header.key().title().lower() == 'user-agent':
          adjusted_headers['User-Agent'] = (
              '%s %s' %
              (header.value(), adjusted_headers['User-Agent']))
        else:
          adjusted_headers[header.key().title()] = header.value()

      logging.debug('Making HTTP request: host = %s, '
                    'url = %s, payload = %s, headers = %s',
                    host, url, payload, adjusted_headers)
      try:
        if protocol == 'http':
          connection = httplib.HTTPConnection(host)
        elif protocol == 'https':
          if (validate_certificate and fancy_urllib.can_validate_certs() and
              CERT_PATH):
            connection_class = fancy_urllib.create_fancy_connection(
                ca_certs=CERT_PATH)
            connection = connection_class(host)
          else:
            connection = httplib.HTTPSConnection(host)
        else:
          error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
          logging.error(error_msg)
          raise apiproxy_errors.ApplicationError(
              urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg)

        last_protocol = protocol
        last_host = host

        if query != '':
          full_path = path + '?' + query
        else:
          full_path = path

        orig_timeout = socket.getdefaulttimeout()
        try:
          socket.setdefaulttimeout(deadline)
          connection.request(method, full_path, payload, adjusted_headers)
          http_response = connection.getresponse()
          if method == 'HEAD':
            http_response_data = ''
          else:
            http_response_data = http_response.read()
        finally:
          socket.setdefaulttimeout(orig_timeout)
          connection.close()
      except (fancy_urllib.InvalidCertificateException,
              fancy_urllib.SSLError), e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
          str(e))
      except (httplib.error, socket.error, IOError), e:
        raise apiproxy_errors.ApplicationError(
          urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
    def _RetrieveURL(
        self,
        url,
        payload,
        method,
        headers,
        request,
        response,
        follow_redirects=True,
        deadline=_API_CALL_DEADLINE,
        validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT,
    ):
        """Retrieves a URL.

    Args:
      url: String containing the URL to access.
      payload: Request payload to send, if any; None if no payload.
        If the payload is unicode, we assume it is utf-8.
      method: HTTP method to use (e.g., 'GET')
      headers: List of additional header objects to use for the request.
      request: Request object from original request.
      response: Response object to populate with the response data.
      follow_redirects: optional setting (defaulting to True) for whether or not
        we should transparently follow redirects (up to MAX_REDIRECTS)
      deadline: Number of seconds to wait for the urlfetch to finish.
      validate_certificate: If true, do not send request to server unless the
        certificate is valid, signed by a trusted CA and the hostname matches
        the certificate.

    Raises:
      Raises an apiproxy_errors.ApplicationError exception with FETCH_ERROR
      in cases where:
        - MAX_REDIRECTS is exceeded
        - The protocol of the redirected URL is bad or missing.
        - The port is not in the allowable range of ports.
    """
        last_protocol = ""
        last_host = ""
        if isinstance(payload, unicode):
            payload = payload.encode("utf-8")

        for redirect_number in xrange(MAX_REDIRECTS + 1):
            parsed = urlparse.urlsplit(url)
            protocol, host, path, query, fragment = parsed

            port = urllib.splitport(urllib.splituser(host)[1])[1]

            if not _IsAllowedPort(port):
                logging.error("urlfetch received %s ; port %s is not allowed in production!" % (url, port))

                raise apiproxy_errors.ApplicationError(urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR)

            if protocol and not host:

                logging.error("Missing host on redirect; target url is %s" % url)
                raise apiproxy_errors.ApplicationError(urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR)

            if not host and not protocol:
                host = last_host
                protocol = last_protocol

            adjusted_headers = {
                "User-Agent": "AppEngine-Google; (+http://code.google.com/appengine)",
                "Host": host,
                "Accept-Encoding": "gzip",
            }
            if payload is not None:

                adjusted_headers["Content-Length"] = str(len(payload))
            if method == "POST" and payload:
                adjusted_headers["Content-Type"] = "application/x-www-form-urlencoded"

            for header in headers:
                if header.key().title().lower() == "user-agent":
                    adjusted_headers["User-Agent"] = "%s %s" % (header.value(), adjusted_headers["User-Agent"])
                else:
                    adjusted_headers[header.key().title()] = header.value()

            if payload is not None:
                escaped_payload = payload.encode("string_escape")
            else:
                escaped_payload = ""
            logging.debug(
                "Making HTTP request: host = %s, " "url = %s, payload = %s, headers = %s",
                host,
                url,
                escaped_payload,
                adjusted_headers,
            )
            try:
                if protocol == "http":
                    connection = httplib.HTTPConnection(host)
                elif protocol == "https":
                    if validate_certificate and _CanValidateCerts() and CERT_PATH:

                        connection_class = fancy_urllib.create_fancy_connection(ca_certs=CERT_PATH)
                        connection = connection_class(host)
                    else:
                        connection = httplib.HTTPSConnection(host)
                else:

                    error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
                    logging.error(error_msg)
                    raise apiproxy_errors.ApplicationError(
                        urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, error_msg
                    )

                last_protocol = protocol
                last_host = host

                if query != "":
                    full_path = path + "?" + query
                else:
                    full_path = path

                orig_timeout = socket.getdefaulttimeout()
                try:
                    socket.setdefaulttimeout(deadline)
                    connection.request(method, full_path, payload, adjusted_headers)
                    http_response = connection.getresponse()
                    if method == "HEAD":
                        http_response_data = ""
                    else:
                        http_response_data = http_response.read()
                finally:
                    socket.setdefaulttimeout(orig_timeout)
                    connection.close()
            except (_fancy_urllib_InvalidCertException, _fancy_urllib_SSLError), e:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR, str(e)
                )
            except socket.timeout, e:
                raise apiproxy_errors.ApplicationError(
                    urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e)
                )