Ejemplo n.º 1
0
    def __init__(self, uploader_conf, plugin_name):
        super(LogUploader, self).__init__(uploader_conf)
        log_name = '%s-log-uploader' % plugin_name
        self.logger = utils.get_logger(uploader_conf,
                                       log_name,
                                       log_route=plugin_name)
        self.log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
        self.swift_account = uploader_conf['swift_account']
        self.container_name = uploader_conf['container_name']
        proxy_server_conf_loc = uploader_conf.get(
            'proxy_server_conf', '/etc/swift/proxy-server.conf')
        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                      name='proxy-server')
        self.internal_proxy = InternalProxy(proxy_server_conf)
        self.new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
        self.unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
                utils.TRUE_VALUES

        # source_filename_format is deprecated
        source_filename_format = uploader_conf.get('source_filename_format')
        source_filename_pattern = uploader_conf.get('source_filename_pattern')
        if source_filename_format and not source_filename_pattern:
            self.logger.warning(
                _('source_filename_format is unreliable and '
                  'deprecated; use source_filename_pattern'))
            self.pattern = self.convert_glob_to_regex(source_filename_format)
        else:
            self.pattern = source_filename_pattern or '%Y%m%d%H'
Ejemplo n.º 2
0
    def __init__(self, uploader_conf, plugin_name):
        super(LogUploader, self).__init__(uploader_conf)
        log_name = '%s-log-uploader' % plugin_name
        self.logger = utils.get_logger(uploader_conf, log_name,
                                       log_route=plugin_name)
        self.log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
        self.swift_account = uploader_conf['swift_account']
        self.container_name = uploader_conf['container_name']
        proxy_server_conf_loc = uploader_conf.get('proxy_server_conf',
                                            '/etc/swift/proxy-server.conf')
        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                      name='proxy-server')
        self.internal_proxy = InternalProxy(proxy_server_conf)
        self.new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
        self.unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
                utils.TRUE_VALUES

        # source_filename_format is deprecated
        source_filename_format = uploader_conf.get('source_filename_format')
        source_filename_pattern = uploader_conf.get('source_filename_pattern')
        if source_filename_format and not source_filename_pattern:
            self.logger.warning(_('source_filename_format is unreliable and '
                                  'deprecated; use source_filename_pattern'))
            self.pattern = self.convert_glob_to_regex(source_filename_format)
        else:
            self.pattern = source_filename_pattern or '%Y%m%d%H'
Ejemplo n.º 3
0
 def internal_proxy(self):
     if self._internal_proxy is None:
         stats_conf = self.conf.get('log-processor', {})
         proxy_server_conf_loc = stats_conf.get(
             'proxy_server_conf', '/etc/swift/proxy-server.conf')
         proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                       name='proxy-server')
         self._internal_proxy = InternalProxy(proxy_server_conf,
                                              self.logger,
                                              retries=3)
     return self._internal_proxy
Ejemplo n.º 4
0
 def __init__(self, uploader_conf, plugin_name):
     super(LogUploader, self).__init__(uploader_conf)
     log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
     swift_account = uploader_conf['swift_account']
     container_name = uploader_conf['container_name']
     source_filename_format = uploader_conf['source_filename_format']
     proxy_server_conf_loc = uploader_conf.get(
         'proxy_server_conf', '/etc/swift/proxy-server.conf')
     proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                   name='proxy-server')
     new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
     unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
                                                 ('true', 'on', '1', 'yes')
     self.unlink_log = unlink_log
     self.new_log_cutoff = new_log_cutoff
     if not log_dir.endswith('/'):
         log_dir = log_dir + '/'
     self.log_dir = log_dir
     self.swift_account = swift_account
     self.container_name = container_name
     self.filename_format = source_filename_format
     self.internal_proxy = InternalProxy(proxy_server_conf)
     log_name = 'swift-log-uploader-%s' % plugin_name
     self.logger = utils.get_logger(uploader_conf, plugin_name)
Ejemplo n.º 5
0
 def __init__(self, uploader_conf, plugin_name):
     super(LogUploader, self).__init__(uploader_conf)
     log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
     swift_account = uploader_conf['swift_account']
     container_name = uploader_conf['container_name']
     source_filename_format = uploader_conf['source_filename_format']
     proxy_server_conf_loc = uploader_conf.get('proxy_server_conf',
                                         '/etc/swift/proxy-server.conf')
     proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                   name='proxy-server')
     new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
     unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
                                                 ('true', 'on', '1', 'yes')
     self.unlink_log = unlink_log
     self.new_log_cutoff = new_log_cutoff
     if not log_dir.endswith('/'):
         log_dir = log_dir + '/'
     self.log_dir = log_dir
     self.swift_account = swift_account
     self.container_name = container_name
     self.filename_format = source_filename_format
     self.internal_proxy = InternalProxy(proxy_server_conf)
     log_name = 'swift-log-uploader-%s' % plugin_name
     self.logger = utils.get_logger(uploader_conf, plugin_name)
Ejemplo n.º 6
0
class LogUploader(Daemon):
    '''
    Given a local directory, a swift account, and a container name, LogParser
    will upload all files in the local directory to the given account/
    container.  All but the newest files will be uploaded, and the files' md5
    sum will be computed. The hash is used to prevent duplicate data from
    being uploaded multiple times in different files (ex: log lines). Since
    the hash is computed, it is also used as the uploaded object's etag to
    ensure data integrity.

    Note that after the file is successfully uploaded, it will be unlinked.

    The given proxy server config is used to instantiate a proxy server for
    the object uploads.
    '''
    def __init__(self, uploader_conf, plugin_name):
        super(LogUploader, self).__init__(uploader_conf)
        log_name = '%s-log-uploader' % plugin_name
        self.logger = utils.get_logger(uploader_conf,
                                       log_name,
                                       log_route=plugin_name)
        self.log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
        self.swift_account = uploader_conf['swift_account']
        self.container_name = uploader_conf['container_name']
        proxy_server_conf_loc = uploader_conf.get(
            'proxy_server_conf', '/etc/swift/proxy-server.conf')
        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                      name='proxy-server')
        self.internal_proxy = InternalProxy(proxy_server_conf)
        self.new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
        self.unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
                utils.TRUE_VALUES

        # source_filename_format is deprecated
        source_filename_format = uploader_conf.get('source_filename_format')
        source_filename_pattern = uploader_conf.get('source_filename_pattern')
        if source_filename_format and not source_filename_pattern:
            self.logger.warning(
                _('source_filename_format is unreliable and '
                  'deprecated; use source_filename_pattern'))
            self.pattern = self.convert_glob_to_regex(source_filename_format)
        else:
            self.pattern = source_filename_pattern or '%Y%m%d%H'

    def run_once(self, *args, **kwargs):
        self.logger.info(_("Uploading logs"))
        start = time.time()
        self.upload_all_logs()
        self.logger.info(
            _("Uploading logs complete (%0.2f minutes)") %
            ((time.time() - start) / 60))

    def convert_glob_to_regex(self, glob):
        """
        Make a best effort to support old style config globs

        :param : old style config source_filename_format

        :returns : new style config source_filename_pattern
        """
        pattern = glob
        pattern = pattern.replace('.', r'\.')
        pattern = pattern.replace('*', r'.*')
        pattern = pattern.replace('?', r'.?')
        return pattern

    def validate_filename_pattern(self):
        """
        Validate source_filename_pattern

        :returns : valid regex pattern based on soruce_filename_pattern with
                   group matches substituded for date fmt markers
        """
        pattern = self.pattern
        markers = {
            '%Y': ('year', '(?P<year>[0-9]{4})'),
            '%m': ('month', '(?P<month>[0-1][0-9])'),
            '%d': ('day', '(?P<day>[0-3][0-9])'),
            '%H': ('hour', '(?P<hour>[0-2][0-9])'),
        }
        for marker, (mtype, group) in markers.items():
            if marker not in self.pattern:
                self.logger.error(
                    _('source_filename_pattern much contain a '
                      'marker %(marker)s to match the '
                      '%(mtype)s') % {
                          'marker': marker,
                          'mtype': mtype
                      })
                return
            pattern = pattern.replace(marker, group)
        return pattern

    def get_relpath_to_files_under_log_dir(self):
        """
        Look under log_dir recursively and return all filenames as relpaths

        :returns : list of strs, the relpath to all filenames under log_dir
        """
        all_files = []
        for path, dirs, files in os.walk(self.log_dir):
            all_files.extend(os.path.join(path, f) for f in files)
        return [os.path.relpath(f, start=self.log_dir) for f in all_files]

    def filter_files(self, all_files, pattern):
        """
        Filter files based on regex pattern

        :param all_files: list of strs, relpath of the filenames under log_dir
        :param pattern: regex pattern to match against filenames

        :returns : dict mapping full path of file to match group dict
        """
        filename2match = {}
        found_match = False
        for filename in all_files:
            match = re.match(pattern, filename)
            if match:
                found_match = True
                full_path = os.path.join(self.log_dir, filename)
                filename2match[full_path] = match.groupdict()
            else:
                self.logger.debug(
                    _('%(filename)s does not match '
                      '%(pattern)s') % {
                          'filename': filename,
                          'pattern': pattern
                      })
        return filename2match

    def upload_all_logs(self):
        """
        Match files under log_dir to source_filename_pattern and upload to swift
        """
        pattern = self.validate_filename_pattern()
        if not pattern:
            self.logger.error(_('Invalid filename_format'))
            return
        all_files = self.get_relpath_to_files_under_log_dir()
        filename2match = self.filter_files(all_files, pattern)
        if not filename2match:
            self.logger.info(
                _('No files in %(log_dir)s match %(pattern)s') % {
                    'log_dir': self.log_dir,
                    'pattern': pattern
                })
            return
        if not self.internal_proxy.create_container(self.swift_account,
                                                    self.container_name):
            self.logger.error(
                _('Unable to create container for '
                  '%(account)s/%(container)s') % {
                      'account': self.swift_account,
                      'container': self.container_name
                  })
            return
        for filename, match in filename2match.items():
            # don't process very new logs
            seconds_since_mtime = time.time() - os.stat(filename).st_mtime
            if seconds_since_mtime < self.new_log_cutoff:
                self.logger.debug(
                    _("Skipping log: %(file)s "
                      "(< %(cutoff)d seconds old)") % {
                          'file': filename,
                          'cutoff': self.new_log_cutoff
                      })
                continue
            self.upload_one_log(filename, **match)

    def upload_one_log(self, filename, year, month, day, hour):
        """
        Upload one file to swift
        """
        if os.path.getsize(filename) == 0:
            self.logger.debug(_("Log %s is 0 length, skipping") % filename)
            return
        self.logger.debug(_("Processing log: %s") % filename)
        filehash = hashlib.md5()
        already_compressed = True if filename.endswith('.gz') else False
        opener = gzip.open if already_compressed else open
        f = opener(filename, 'rb')
        try:
            for line in f:
                # filter out bad lines here?
                filehash.update(line)
        finally:
            f.close()
        filehash = filehash.hexdigest()
        # By adding a hash to the filename, we ensure that uploaded files
        # have unique filenames and protect against uploading one file
        # more than one time. By using md5, we get an etag for free.
        target_filename = '/'.join([year, month, day, hour, filehash + '.gz'])
        if self.internal_proxy.upload_file(filename,
                                           self.swift_account,
                                           self.container_name,
                                           target_filename,
                                           compress=(not already_compressed)):
            self.logger.debug(
                _("Uploaded log %(file)s to %(target)s") % {
                    'file': filename,
                    'target': target_filename
                })
            if self.unlink_log:
                os.unlink(filename)
        else:
            self.logger.error(_("ERROR: Upload of log %s failed!") % filename)
Ejemplo n.º 7
0
class LogUploader(Daemon):
    '''
    Given a local directory, a swift account, and a container name, LogParser
    will upload all files in the local directory to the given account/
    container.  All but the newest files will be uploaded, and the files' md5
    sum will be computed. The hash is used to prevent duplicate data from
    being uploaded multiple times in different files (ex: log lines). Since
    the hash is computed, it is also used as the uploaded object's etag to
    ensure data integrity.

    Note that after the file is successfully uploaded, it will be unlinked.

    The given proxy server config is used to instantiate a proxy server for
    the object uploads.
    '''

    def __init__(self, uploader_conf, plugin_name):
        super(LogUploader, self).__init__(uploader_conf)
        log_name = '%s-log-uploader' % plugin_name
        self.logger = utils.get_logger(uploader_conf, log_name,
                                       log_route=plugin_name)
        self.log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
        self.swift_account = uploader_conf['swift_account']
        self.container_name = uploader_conf['container_name']
        proxy_server_conf_loc = uploader_conf.get('proxy_server_conf',
                                            '/etc/swift/proxy-server.conf')
        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                      name='proxy-server')
        self.internal_proxy = InternalProxy(proxy_server_conf)
        self.new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
        self.unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
                utils.TRUE_VALUES

        # source_filename_format is deprecated
        source_filename_format = uploader_conf.get('source_filename_format')
        source_filename_pattern = uploader_conf.get('source_filename_pattern')
        if source_filename_format and not source_filename_pattern:
            self.logger.warning(_('source_filename_format is unreliable and '
                                  'deprecated; use source_filename_pattern'))
            self.pattern = self.convert_glob_to_regex(source_filename_format)
        else:
            self.pattern = source_filename_pattern or '%Y%m%d%H'

    def run_once(self, *args, **kwargs):
        self.logger.info(_("Uploading logs"))
        start = time.time()
        self.upload_all_logs()
        self.logger.info(_("Uploading logs complete (%0.2f minutes)") %
            ((time.time() - start) / 60))

    def convert_glob_to_regex(self, glob):
        """
        Make a best effort to support old style config globs

        :param : old style config source_filename_format

        :returns : new style config source_filename_pattern
        """
        pattern = glob
        pattern = pattern.replace('.', r'\.')
        pattern = pattern.replace('*', r'.*')
        pattern = pattern.replace('?', r'.?')
        return pattern

    def validate_filename_pattern(self):
        """
        Validate source_filename_pattern

        :returns : valid regex pattern based on soruce_filename_pattern with
                   group matches substituded for date fmt markers
        """
        pattern = self.pattern
        markers = {
            '%Y': ('year', '(?P<year>[0-9]{4})'),
            '%m': ('month', '(?P<month>[0-1][0-9])'),
            '%d': ('day', '(?P<day>[0-3][0-9])'),
            '%H': ('hour', '(?P<hour>[0-2][0-9])'),
        }
        for marker, (mtype, group) in markers.items():
            if marker not in self.pattern:
                self.logger.error(_('source_filename_pattern much contain a '
                                    'marker %(marker)s to match the '
                                    '%(mtype)s') % {'marker': marker,
                                                   'mtype': mtype})
                return
            pattern = pattern.replace(marker, group)
        return pattern

    def get_relpath_to_files_under_log_dir(self):
        """
        Look under log_dir recursively and return all filenames as relpaths

        :returns : list of strs, the relpath to all filenames under log_dir
        """
        all_files = []
        for path, dirs, files in os.walk(self.log_dir):
            all_files.extend(os.path.join(path, f) for f in files)
        return [os.path.relpath(f, start=self.log_dir) for f in all_files]

    def filter_files(self, all_files, pattern):
        """
        Filter files based on regex pattern

        :param all_files: list of strs, relpath of the filenames under log_dir
        :param pattern: regex pattern to match against filenames

        :returns : dict mapping full path of file to match group dict
        """
        filename2match = {}
        found_match = False
        for filename in all_files:
            match = re.match(pattern, filename)
            if match:
                found_match = True
                full_path = os.path.join(self.log_dir, filename)
                filename2match[full_path] = match.groupdict()
            else:
                self.logger.debug(_('%(filename)s does not match '
                                    '%(pattern)s') % {'filename': filename,
                                                      'pattern': pattern})
        return filename2match

    def upload_all_logs(self):
        """
        Match files under log_dir to source_filename_pattern and upload to swift
        """
        pattern = self.validate_filename_pattern()
        if not pattern:
            self.logger.error(_('Invalid filename_format'))
            return
        all_files = self.get_relpath_to_files_under_log_dir()
        filename2match = self.filter_files(all_files, pattern)
        if not filename2match:
            self.logger.info(_('No files in %(log_dir)s match %(pattern)s') %
                             {'log_dir': self.log_dir, 'pattern': pattern})
            return
        if not self.internal_proxy.create_container(self.swift_account,
                                                    self.container_name):
            self.logger.error(_('Unable to create container for '
                                '%(account)s/%(container)s') % {
                                    'account': self.swift_account,
                                    'container': self.container_name})
            return
        for filename, match in filename2match.items():
            # don't process very new logs
            seconds_since_mtime = time.time() - os.stat(filename).st_mtime
            if seconds_since_mtime < self.new_log_cutoff:
                self.logger.debug(_("Skipping log: %(file)s "
                                    "(< %(cutoff)d seconds old)") % {
                                        'file': filename,
                                        'cutoff': self.new_log_cutoff})
                continue
            self.upload_one_log(filename, **match)

    def upload_one_log(self, filename, year, month, day, hour):
        """
        Upload one file to swift
        """
        if os.path.getsize(filename) == 0:
            self.logger.debug(_("Log %s is 0 length, skipping") % filename)
            return
        self.logger.debug(_("Processing log: %s") % filename)
        filehash = hashlib.md5()
        already_compressed = True if filename.endswith('.gz') else False
        opener = gzip.open if already_compressed else open
        f = opener(filename, 'rb')
        try:
            for line in f:
                # filter out bad lines here?
                filehash.update(line)
        finally:
            f.close()
        filehash = filehash.hexdigest()
        # By adding a hash to the filename, we ensure that uploaded files
        # have unique filenames and protect against uploading one file
        # more than one time. By using md5, we get an etag for free.
        target_filename = '/'.join([year, month, day, hour, filehash + '.gz'])
        if self.internal_proxy.upload_file(filename,
                                          self.swift_account,
                                          self.container_name,
                                          target_filename,
                                          compress=(not already_compressed)):
            self.logger.debug(_("Uploaded log %(file)s to %(target)s") %
                {'file': filename, 'target': target_filename})
            if self.unlink_log:
                os.unlink(filename)
        else:
            self.logger.error(_("ERROR: Upload of log %s failed!") % filename)
Ejemplo n.º 8
0
class LogUploader(Daemon):
    '''
    Given a local directory, a swift account, and a container name, LogParser
    will upload all files in the local directory to the given account/
    container.  All but the newest files will be uploaded, and the files' md5
    sum will be computed. The hash is used to prevent duplicate data from
    being uploaded multiple times in different files (ex: log lines). Since
    the hash is computed, it is also used as the uploaded object's etag to
    ensure data integrity.

    Note that after the file is successfully uploaded, it will be unlinked.

    The given proxy server config is used to instantiate a proxy server for
    the object uploads.
    '''
    def __init__(self, uploader_conf, plugin_name):
        super(LogUploader, self).__init__(uploader_conf)
        log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
        swift_account = uploader_conf['swift_account']
        container_name = uploader_conf['container_name']
        source_filename_format = uploader_conf['source_filename_format']
        proxy_server_conf_loc = uploader_conf.get(
            'proxy_server_conf', '/etc/swift/proxy-server.conf')
        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                      name='proxy-server')
        new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
        unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
                                                    ('true', 'on', '1', 'yes')
        self.unlink_log = unlink_log
        self.new_log_cutoff = new_log_cutoff
        if not log_dir.endswith('/'):
            log_dir = log_dir + '/'
        self.log_dir = log_dir
        self.swift_account = swift_account
        self.container_name = container_name
        self.filename_format = source_filename_format
        self.internal_proxy = InternalProxy(proxy_server_conf)
        log_name = 'swift-log-uploader-%s' % plugin_name
        self.logger = utils.get_logger(uploader_conf, plugin_name)

    def run_once(self):
        self.logger.info(_("Uploading logs"))
        start = time.time()
        self.upload_all_logs()
        self.logger.info(
            _("Uploading logs complete (%0.2f minutes)") %
            ((time.time() - start) / 60))

    def upload_all_logs(self):
        i = [(self.filename_format.index(c), c) for c in '%Y %m %d %H'.split()]
        i.sort()
        year_offset = month_offset = day_offset = hour_offset = None
        base_offset = len(self.log_dir)
        for start, c in i:
            offset = base_offset + start
            if c == '%Y':
                year_offset = offset, offset + 4
                # Add in the difference between len(%Y) and the expanded
                # version of %Y (????). This makes sure the codes after this
                # one will align properly in the final filename.
                base_offset += 2
            elif c == '%m':
                month_offset = offset, offset + 2
            elif c == '%d':
                day_offset = offset, offset + 2
            elif c == '%H':
                hour_offset = offset, offset + 2
        if not (year_offset and month_offset and day_offset and hour_offset):
            # don't have all the parts, can't upload anything
            return
        glob_pattern = self.filename_format
        glob_pattern = glob_pattern.replace('%Y', '????', 1)
        glob_pattern = glob_pattern.replace('%m', '??', 1)
        glob_pattern = glob_pattern.replace('%d', '??', 1)
        glob_pattern = glob_pattern.replace('%H', '??', 1)
        filelist = glob.iglob(os.path.join(self.log_dir, glob_pattern))
        current_hour = int(time.strftime('%H'))
        today = int(time.strftime('%Y%m%d'))
        self.internal_proxy.create_container(self.swift_account,
                                             self.container_name)
        for filename in filelist:
            try:
                # From the filename, we need to derive the year, month, day,
                # and hour for the file. These values are used in the uploaded
                # object's name, so they should be a reasonably accurate
                # representation of the time for which the data in the file was
                # collected. The file's last modified time is not a reliable
                # representation of the data in the file. For example, an old
                # log file (from hour A) may be uploaded or moved into the
                # log_dir in hour Z. The file's modified time will be for hour
                # Z, and therefore the object's name in the system will not
                # represent the data in it.
                # If the filename doesn't match the format, it shouldn't be
                # uploaded.
                year = filename[slice(*year_offset)]
                month = filename[slice(*month_offset)]
                day = filename[slice(*day_offset)]
                hour = filename[slice(*hour_offset)]
            except IndexError:
                # unexpected filename format, move on
                self.logger.error(_("Unexpected log: %s") % filename)
                continue
            if ((time.time() - os.stat(filename).st_mtime) <
                    self.new_log_cutoff):
                # don't process very new logs
                self.logger.debug(
                    _("Skipping log: %(file)s (< %(cutoff)d seconds old)") % {
                        'file': filename,
                        'cutoff': self.new_log_cutoff
                    })
                continue
            self.upload_one_log(filename, year, month, day, hour)

    def upload_one_log(self, filename, year, month, day, hour):
        if os.path.getsize(filename) == 0:
            self.logger.debug(_("Log %s is 0 length, skipping") % filename)
            return
        self.logger.debug(_("Processing log: %s") % filename)
        filehash = hashlib.md5()
        already_compressed = True if filename.endswith('.gz') else False
        opener = gzip.open if already_compressed else open
        f = opener(filename, 'rb')
        try:
            for line in f:
                # filter out bad lines here?
                filehash.update(line)
        finally:
            f.close()
        filehash = filehash.hexdigest()
        # By adding a hash to the filename, we ensure that uploaded files
        # have unique filenames and protect against uploading one file
        # more than one time. By using md5, we get an etag for free.
        target_filename = '/'.join([year, month, day, hour, filehash + '.gz'])
        if self.internal_proxy.upload_file(filename,
                                           self.swift_account,
                                           self.container_name,
                                           target_filename,
                                           compress=(not already_compressed)):
            self.logger.debug(
                _("Uploaded log %(file)s to %(target)s") % {
                    'file': filename,
                    'target': target_filename
                })
            if self.unlink_log:
                os.unlink(filename)
        else:
            self.logger.error(_("ERROR: Upload of log %s failed!") % filename)
Ejemplo n.º 9
0
class LogUploader(Daemon):
    '''
    Given a local directory, a swift account, and a container name, LogParser
    will upload all files in the local directory to the given account/
    container.  All but the newest files will be uploaded, and the files' md5
    sum will be computed. The hash is used to prevent duplicate data from
    being uploaded multiple times in different files (ex: log lines). Since
    the hash is computed, it is also used as the uploaded object's etag to
    ensure data integrity.

    Note that after the file is successfully uploaded, it will be unlinked.

    The given proxy server config is used to instantiate a proxy server for
    the object uploads.
    '''

    def __init__(self, uploader_conf, plugin_name):
        super(LogUploader, self).__init__(uploader_conf)
        log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
        swift_account = uploader_conf['swift_account']
        container_name = uploader_conf['container_name']
        source_filename_format = uploader_conf['source_filename_format']
        proxy_server_conf_loc = uploader_conf.get('proxy_server_conf',
                                            '/etc/swift/proxy-server.conf')
        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                      name='proxy-server')
        new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
        unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
                                                    ('true', 'on', '1', 'yes')
        self.unlink_log = unlink_log
        self.new_log_cutoff = new_log_cutoff
        if not log_dir.endswith('/'):
            log_dir = log_dir + '/'
        self.log_dir = log_dir
        self.swift_account = swift_account
        self.container_name = container_name
        self.filename_format = source_filename_format
        self.internal_proxy = InternalProxy(proxy_server_conf)
        log_name = 'swift-log-uploader-%s' % plugin_name
        self.logger = utils.get_logger(uploader_conf, plugin_name)

    def run_once(self):
        self.logger.info(_("Uploading logs"))
        start = time.time()
        self.upload_all_logs()
        self.logger.info(_("Uploading logs complete (%0.2f minutes)") %
            ((time.time() - start) / 60))

    def upload_all_logs(self):
        i = [(self.filename_format.index(c), c) for c in '%Y %m %d %H'.split()]
        i.sort()
        year_offset = month_offset = day_offset = hour_offset = None
        base_offset = len(self.log_dir)
        for start, c in i:
            offset = base_offset + start
            if c == '%Y':
                year_offset = offset, offset + 4
                # Add in the difference between len(%Y) and the expanded
                # version of %Y (????). This makes sure the codes after this
                # one will align properly in the final filename.
                base_offset += 2
            elif c == '%m':
                month_offset = offset, offset + 2
            elif c == '%d':
                day_offset = offset, offset + 2
            elif c == '%H':
                hour_offset = offset, offset + 2
        if not (year_offset and month_offset and day_offset and hour_offset):
            # don't have all the parts, can't upload anything
            return
        glob_pattern = self.filename_format
        glob_pattern = glob_pattern.replace('%Y', '????', 1)
        glob_pattern = glob_pattern.replace('%m', '??', 1)
        glob_pattern = glob_pattern.replace('%d', '??', 1)
        glob_pattern = glob_pattern.replace('%H', '??', 1)
        filelist = glob.iglob(os.path.join(self.log_dir, glob_pattern))
        current_hour = int(time.strftime('%H'))
        today = int(time.strftime('%Y%m%d'))
        self.internal_proxy.create_container(self.swift_account,
                                            self.container_name)
        for filename in filelist:
            try:
                # From the filename, we need to derive the year, month, day,
                # and hour for the file. These values are used in the uploaded
                # object's name, so they should be a reasonably accurate
                # representation of the time for which the data in the file was
                # collected. The file's last modified time is not a reliable
                # representation of the data in the file. For example, an old
                # log file (from hour A) may be uploaded or moved into the
                # log_dir in hour Z. The file's modified time will be for hour
                # Z, and therefore the object's name in the system will not
                # represent the data in it.
                # If the filename doesn't match the format, it shouldn't be
                # uploaded.
                year = filename[slice(*year_offset)]
                month = filename[slice(*month_offset)]
                day = filename[slice(*day_offset)]
                hour = filename[slice(*hour_offset)]
            except IndexError:
                # unexpected filename format, move on
                self.logger.error(_("Unexpected log: %s") % filename)
                continue
            if ((time.time() - os.stat(filename).st_mtime) <
                                                        self.new_log_cutoff):
                # don't process very new logs
                self.logger.debug(
                    _("Skipping log: %(file)s (< %(cutoff)d seconds old)") %
                    {'file': filename, 'cutoff': self.new_log_cutoff})
                continue
            self.upload_one_log(filename, year, month, day, hour)

    def upload_one_log(self, filename, year, month, day, hour):
        if os.path.getsize(filename) == 0:
            self.logger.debug(_("Log %s is 0 length, skipping") % filename)
            return
        self.logger.debug(_("Processing log: %s") % filename)
        filehash = hashlib.md5()
        already_compressed = True if filename.endswith('.gz') else False
        opener = gzip.open if already_compressed else open
        f = opener(filename, 'rb')
        try:
            for line in f:
                # filter out bad lines here?
                filehash.update(line)
        finally:
            f.close()
        filehash = filehash.hexdigest()
        # By adding a hash to the filename, we ensure that uploaded files
        # have unique filenames and protect against uploading one file
        # more than one time. By using md5, we get an etag for free.
        target_filename = '/'.join([year, month, day, hour, filehash + '.gz'])
        if self.internal_proxy.upload_file(filename,
                                          self.swift_account,
                                          self.container_name,
                                          target_filename,
                                          compress=(not already_compressed)):
            self.logger.debug(_("Uploaded log %(file)s to %(target)s") %
                {'file': filename, 'target': target_filename})
            if self.unlink_log:
                os.unlink(filename)
        else:
            self.logger.error(_("ERROR: Upload of log %s failed!") % filename)