Beispiel #1
0
  def Bind(self, configuration):
    """Bind to LDAP, retrying if necessary."""
    # If the server is unavailable, we are going to find out now, as this
    # actually initiates the network connection.
    retry_count = 0
    while retry_count < configuration['retry_max']:
      self.log.debug('opening ldap connection and binding to %s',
                     configuration['uri'])
      try:
        if 'use_sasl' in configuration and configuration['use_sasl']:
          if ('sasl_mech' in configuration and
              configuration['sasl_mech'] and
              configuration['sasl_mech'].lower() == 'gssapi'):
            sasl = ldap.sasl.gssapi(configuration['sasl_authzid'])
          # TODO: Add other sasl mechs
          else:
            raise error.ConfigurationError('SASL mechanism not supported')

          self.conn.sasl_interactive_bind_s('', sasl)
        else:
          self.conn.simple_bind_s(who=configuration['bind_dn'],
                                cred=str(configuration['bind_password']))
        break
      except ldap.SERVER_DOWN, e:
        retry_count += 1
        self.log.warning('Failed LDAP connection: attempt #%s.', retry_count)
        self.log.debug('ldap error is %r', e)
        if retry_count == configuration['retry_max']:
          self.log.debug('max retries hit')
          raise error.SourceUnavailable(e)
        self.log.debug('sleeping %d seconds', configuration['retry_delay'])
        time.sleep(configuration['retry_delay'])
Beispiel #2
0
def HandleCurlError(e, logger=None):
    """Handle a curl exception.

  See http://curl.haxx.se/libcurl/c/libcurl-errors.html for a list of codes.

  Args:
    e: pycurl.error
    logger: logger object

  Raises:
    ConfigurationError:
    PermissionDenied:
    SourceUnavailable:
    Error:
  """
    if not logger:
        logger = logging

    code = e[0]
    msg = e[1]

    # Config errors
    if code in (pycurl.E_UNSUPPORTED_PROTOCOL, pycurl.E_URL_MALFORMAT,
                pycurl.E_SSL_ENGINE_NOTFOUND, pycurl.E_SSL_ENGINE_SETFAILED,
                pycurl.E_SSL_CACERT_BADFILE):
        raise error.ConfigurationError(msg)

    # Possibly transient errors, try again
    if code in (pycurl.E_FAILED_INIT, pycurl.E_COULDNT_CONNECT,
                pycurl.E_PARTIAL_FILE, pycurl.E_WRITE_ERROR,
                pycurl.E_READ_ERROR, pycurl.E_OPERATION_TIMEOUTED,
                pycurl.E_SSL_CONNECT_ERROR, pycurl.E_COULDNT_RESOLVE_PROXY,
                pycurl.E_COULDNT_RESOLVE_HOST, pycurl.E_GOT_NOTHING):
        logger.debug('Possibly transient error: %s', msg)
        return

    # SSL issues
    if code in (pycurl.E_SSL_PEER_CERTIFICATE, ):
        raise error.SourceUnavailable(msg)

    # Anything else
    raise error.Error(msg)
Beispiel #3
0
    def GetUpdates(self, s3_client, bucket, obj, since):
        """Get updates from a source.

        Args:
          s3_client: initialized s3 client
          bucket: s3 bucket
          obj: object with the data
          since: a timestamp representing the last change (None to force-get)

        Returns:
          A tuple containing the map of updates and a maximum timestamp

        Raises:
          ValueError: an object in the source map is malformed
          ConfigurationError:
        """
        try:
            if since is not None:
                response = s3_client.get_object(
                    Bucket=bucket,
                    IfModifiedSince=self.FromTimestampToDateTime(since),
                    Key=obj)
            else:
                response = s3_client.get_object(Bucket=bucket, Key=obj)
            body = response['Body']
            last_modified_ts = self.FromDateTimeToTimestamp(
                response['LastModified'])
        except ClientError as e:
            error_code = int(e.response['Error']['Code'])
            if error_code == 304:
                return []
            self.log.error('error getting S3 object ({}): {}'.format(obj, e))
            raise error.SourceUnavailable('unable to download object from S3')

        data_map = self.GetMap(cache_info=body)
        data_map.SetModifyTimestamp(last_modified_ts)
        return data_map
Beispiel #4
0
    def GetUpdates(self, source, url, since):
        """Get updates from a source.

    Args:
      source: A data source
      url: url to the data we want
      since: a timestamp representing the last change (None to force-get)

    Returns:
      A tuple containing the map of updates and a maximum timestamp

    Raises:
      ValueError: an object in the source map is malformed
      ConfigurationError:
    """
        proto = url.split(':')[0]
        # Newer libcurl allow you to disable protocols there. Unfortunately
        # it's not in dapper or hardy.
        if proto not in ('http', 'https'):
            raise error.ConfigurationError('Unsupported protocol %s' % proto)

        conn = source.conn
        conn.setopt(pycurl.OPT_FILETIME, 1)
        conn.setopt(pycurl.ENCODING, 'bzip2, gzip')
        if since is not None:
            conn.setopt(pycurl.TIMEVALUE, int(since))
            conn.setopt(pycurl.TIMECONDITION, pycurl.TIMECONDITION_IFMODSINCE)

        retry_count = 0
        resp_code = 500
        while retry_count < source.conf['retry_max']:
            try:
                source.log.debug('fetching %s', url)
                (resp_code, headers,
                 body) = curl.CurlFetch(url, conn, self.log)
                self.log.debug('response code: %s', resp_code)
            finally:
                if resp_code < 400:
                    # Not modified-since
                    if resp_code == 304:
                        return []
                    if resp_code == 200:
                        break
                retry_count += 1
                self.log.warning('Failed connection: attempt #%s.',
                                 retry_count)
                if retry_count == source.conf['retry_max']:
                    self.log.debug('max retries hit')
                    raise error.SourceUnavailable('Max retries exceeded.')
                time.sleep(source.conf['retry_delay'])

        headers = headers.split('\r\n')
        last_modified = conn.getinfo(pycurl.INFO_FILETIME)
        self.log.debug('last modified: %s', last_modified)
        if last_modified == -1:
            for header in headers:
                if header.lower().startswith('last-modified'):
                    self.log.debug('%s', header)
                    http_ts_string = header[header.find(':') + 1:].strip()
                    last_modified = self.FromHttpToTimestamp(http_ts_string)
                    break
            else:
                http_ts_string = ''
        else:
            http_ts_string = self.FromTimestampToHttp(last_modified)

        self.log.debug('Last-modified is: %s', http_ts_string)

        # curl (on Ubuntu hardy at least) will handle gzip, but not bzip2
        try:
            response = cStringIO.StringIO(bz2.decompress(body))
            self.log.debug('bzip encoding found')
        except IOError:
            response = cStringIO.StringIO(body)

        data_map = self.GetMap(cache_info=response)
        if http_ts_string:
            http_ts = self.FromHttpToTimestamp(http_ts_string)
            self.log.debug('setting last modified to: %s', http_ts)
            data_map.SetModifyTimestamp(http_ts)

        return data_map