Beispiel #1
0
 def __init__(self, uploader_conf, plugin_name, regex=None, cutoff=None):
     super(LogUploader, self).__init__(uploader_conf)
     log_name = '%s-log-uploader' % plugin_name
     self.logger = utils.get_logger(uploader_conf,
                                    log_name,
                                    log_route=plugin_name)
     self.log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
     self.swift_account = uploader_conf['swift_account']
     self.container_name = uploader_conf['container_name']
     proxy_server_conf_loc = uploader_conf.get(
         'proxy_server_conf', '/etc/swift/proxy-server.conf')
     proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                   name='proxy-server')
     self.internal_proxy = InternalProxy(proxy_server_conf)
     self.new_log_cutoff = int(
         cutoff or uploader_conf.get('new_log_cutoff', '7200'))
     self.unlink_log = uploader_conf.get('unlink_log', 'true').lower() in \
             utils.TRUE_VALUES
     self.filename_pattern = regex or \
         uploader_conf.get('source_filename_pattern',
             '''
             ^%s-
             (?P<year>[0-9]{4})
             (?P<month>[0-1][0-9])
             (?P<day>[0-3][0-9])
             (?P<hour>[0-2][0-9])
             .*$''' % plugin_name)
     self.content_type = uploader_conf.get('content_type')
 def __init__(self, uploader_conf, plugin_name, regex=None, cutoff=None):
     super(LogUploader, self).__init__(uploader_conf)
     log_name = "%s-log-uploader" % plugin_name
     self.logger = utils.get_logger(uploader_conf, log_name, log_route=plugin_name)
     self.log_dir = uploader_conf.get("log_dir", "/var/log/swift/")
     self.swift_account = uploader_conf["swift_account"]
     self.container_name = uploader_conf["container_name"]
     proxy_server_conf_loc = uploader_conf.get("proxy_server_conf", "/etc/swift/proxy-server.conf")
     proxy_server_conf = appconfig("config:%s" % proxy_server_conf_loc, name="proxy-server")
     memcache = loadfilter("config:%s" % proxy_server_conf_loc, name="cache")(object).memcache
     self.internal_proxy = InternalProxy(proxy_server_conf, memcache=memcache)
     self.new_log_cutoff = int(cutoff or uploader_conf.get("new_log_cutoff", "7200"))
     self.unlink_log = uploader_conf.get("unlink_log", "true").lower() in utils.TRUE_VALUES
     self.filename_pattern = regex or uploader_conf.get(
         "source_filename_pattern",
         """
             ^%s-
             (?P<year>[0-9]{4})
             (?P<month>[0-1][0-9])
             (?P<day>[0-3][0-9])
             (?P<hour>[0-2][0-9])
             .*$"""
         % plugin_name,
     )
     self.content_type = uploader_conf.get("content_type")
Beispiel #3
0
 def __init__(self, uploader_conf, plugin_name, regex=None, cutoff=None):
     super(LogUploader, self).__init__(uploader_conf)
     log_name = '%s-log-uploader' % plugin_name
     self.logger = utils.get_logger(uploader_conf, log_name,
                                    log_route=plugin_name)
     self.log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
     self.swift_account = uploader_conf['swift_account']
     self.container_name = uploader_conf['container_name']
     proxy_server_conf_loc = uploader_conf.get('proxy_server_conf',
                                         '/etc/swift/proxy-server.conf')
     proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                   name='proxy-server')
     self.internal_proxy = InternalProxy(proxy_server_conf)
     self.new_log_cutoff = int(cutoff or
                               uploader_conf.get('new_log_cutoff', '7200'))
     self.unlink_log = uploader_conf.get('unlink_log', 'true').lower() in \
             utils.TRUE_VALUES
     self.filename_pattern = regex or \
         uploader_conf.get('source_filename_pattern',
             '''
             ^%s-
             (?P<year>[0-9]{4})
             (?P<month>[0-1][0-9])
             (?P<day>[0-3][0-9])
             (?P<hour>[0-2][0-9])
             .*$''' % plugin_name)
     self.content_type = uploader_conf.get('content_type')
Beispiel #4
0
 def internal_proxy(self):
     if self._internal_proxy is None:
         # first look in the conf directly
         proxy_server_conf_loc = self.conf.get('proxy_server_conf')
         if proxy_server_conf_loc is None:
             # then look in a section called log-processor
             stats_conf = self.conf.get('log-processor', {})
             proxy_server_conf_loc = stats_conf.get(
                 'proxy_server_conf', '/etc/swift/proxy-server.conf')
         if proxy_server_conf_loc:
             proxy_server_conf = appconfig('config:%s' %
                                           proxy_server_conf_loc,
                                           name='proxy-server')
         else:
             proxy_server_conf = None
         self._internal_proxy = InternalProxy(proxy_server_conf,
                                              self.logger,
                                              retries=3,
                                              memcache=self.memcache)
     return self._internal_proxy
Beispiel #5
0
class LogUploader(Daemon):
    '''
    Given a local directory, a swift account, and a container name, LogParser
    will upload all files in the local directory to the given account/
    container.  All but the newest files will be uploaded, and the files' md5
    sum will be computed. The hash is used to prevent duplicate data from
    being uploaded multiple times in different files (ex: log lines). Since
    the hash is computed, it is also used as the uploaded object's etag to
    ensure data integrity.

    Note that after the file is successfully uploaded, it will be unlinked.

    The given proxy server config is used to instantiate a proxy server for
    the object uploads.

    The default log file format is: plugin_name-%Y%m%d%H* . Any other format
    of log file names must supply a regular expression that defines groups
    for year, month, day, and hour. The regular expression will be evaluated
    with re.VERBOSE. A common example may be:
    source_filename_pattern = ^cdn_logger-
        (?P<year>[0-9]{4})
        (?P<month>[0-1][0-9])
        (?P<day>[0-3][0-9])
        (?P<hour>[0-2][0-9])
        .*$
    '''
    def __init__(self, uploader_conf, plugin_name, regex=None, cutoff=None):
        super(LogUploader, self).__init__(uploader_conf)
        log_name = '%s-log-uploader' % plugin_name
        self.logger = utils.get_logger(uploader_conf,
                                       log_name,
                                       log_route=plugin_name)
        self.log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
        self.swift_account = uploader_conf['swift_account']
        self.container_name = uploader_conf['container_name']
        proxy_server_conf_loc = uploader_conf.get(
            'proxy_server_conf', '/etc/swift/proxy-server.conf')
        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
                                      name='proxy-server')
        self.internal_proxy = InternalProxy(proxy_server_conf)
        self.new_log_cutoff = int(
            cutoff or uploader_conf.get('new_log_cutoff', '7200'))
        self.unlink_log = uploader_conf.get('unlink_log', 'true').lower() in \
                utils.TRUE_VALUES
        self.filename_pattern = regex or \
            uploader_conf.get('source_filename_pattern',
                '''
                ^%s-
                (?P<year>[0-9]{4})
                (?P<month>[0-1][0-9])
                (?P<day>[0-3][0-9])
                (?P<hour>[0-2][0-9])
                .*$''' % plugin_name)
        self.content_type = uploader_conf.get('content_type')

    def run_once(self, *args, **kwargs):
        self.logger.info(_("Uploading logs"))
        start = time.time()
        self.upload_all_logs()
        self.logger.info(
            _("Uploading logs complete (%0.2f minutes)") %
            ((time.time() - start) / 60))

    def get_relpath_to_files_under_log_dir(self):
        """
        Look under log_dir recursively and return all filenames as relpaths

        :returns : list of strs, the relpath to all filenames under log_dir
        """
        all_files = []
        for path, dirs, files in os.walk(self.log_dir):
            all_files.extend(os.path.join(path, f) for f in files)
        return [os.path.relpath(f, start=self.log_dir) for f in all_files]

    def filter_files(self, all_files):
        """
        Filter files based on regex pattern

        :param all_files: list of strs, relpath of the filenames under log_dir
        :param pattern: regex pattern to match against filenames

        :returns : dict mapping full path of file to match group dict
        """
        filename2match = {}
        found_match = False
        for filename in all_files:
            match = re.match(self.filename_pattern, filename, re.VERBOSE)
            if match:
                found_match = True
                full_path = os.path.join(self.log_dir, filename)
                filename2match[full_path] = match.groupdict()
            else:
                self.logger.debug(
                    _('%(filename)s does not match '
                      '%(pattern)s') % {
                          'filename': filename,
                          'pattern': self.filename_pattern
                      })
        return filename2match

    def upload_all_logs(self):
        """
        Match files under log_dir to source_filename_pattern and upload to
        swift
        """
        all_files = self.get_relpath_to_files_under_log_dir()
        filename2match = self.filter_files(all_files)
        if not filename2match:
            self.logger.error(
                _('No files in %(log_dir)s match %(pattern)s') % {
                    'log_dir': self.log_dir,
                    'pattern': self.filename_pattern
                })
            sys.exit(1)
        if not self.internal_proxy.create_container(self.swift_account,
                                                    self.container_name):
            self.logger.error(
                _('Unable to create container for '
                  '%(account)s/%(container)s') % {
                      'account': self.swift_account,
                      'container': self.container_name
                  })
            return
        for filename, match in filename2match.items():
            # don't process very new logs
            try:
                seconds_since_mtime = time.time() - os.stat(filename).st_mtime
            except OSError:
                # filename wasn't found, skip it
                continue
            if seconds_since_mtime < self.new_log_cutoff:
                self.logger.debug(
                    _("Skipping log: %(file)s "
                      "(< %(cutoff)d seconds old)") % {
                          'file': filename,
                          'cutoff': self.new_log_cutoff
                      })
                continue
            try:
                self.upload_one_log(filename, **match)
            except Exception:
                self.logger.exception(
                    _('ERROR: could not upload %s') % filename)

    def upload_one_log(self, filename, year, month, day, hour):
        """
        Upload one file to swift
        """
        if os.path.getsize(filename) == 0:
            self.logger.debug(_("Log %s is 0 length, skipping") % filename)
            return
        self.logger.debug(_("Processing log: %s") % filename)
        filehash = hashlib.md5()
        already_compressed = True if filename.endswith('.gz') else False
        opener = gzip.open if already_compressed else open
        f = opener(filename, 'rb')
        try:
            for line in f:
                # filter out bad lines here?
                filehash.update(line)
        finally:
            f.close()
        filehash = filehash.hexdigest()
        # By adding a hash to the filename, we ensure that uploaded files
        # have unique filenames and protect against uploading one file
        # more than one time. By using md5, we get an etag for free.
        target_filename = '/'.join([year, month, day, hour, filehash + '.gz'])
        metadata = {'x-object-meta-original-name': filename}
        if self.content_type:
            metadata['Content-Type'] = self.content_type
        if self.internal_proxy.upload_file(filename,
                                           self.swift_account,
                                           self.container_name,
                                           target_filename,
                                           compress=(not already_compressed),
                                           headers=metadata):
            self.logger.debug(
                _("Uploaded log %(file)s to %(target)s") % {
                    'file': filename,
                    'target': target_filename
                })
            if self.unlink_log:
                os.unlink(filename)
        else:
            self.logger.error(_("ERROR: Upload of log %s failed!") % filename)
class LogUploader(Daemon):
    """
    Given a local directory, a swift account, and a container name, LogParser
    will upload all files in the local directory to the given account/
    container.  All but the newest files will be uploaded, and the files' md5
    sum will be computed. The hash is used to prevent duplicate data from
    being uploaded multiple times in different files (ex: log lines). Since
    the hash is computed, it is also used as the uploaded object's etag to
    ensure data integrity.

    Note that after the file is successfully uploaded, it will be unlinked.

    The given proxy server config is used to instantiate a proxy server for
    the object uploads.

    The default log file format is: plugin_name-%Y%m%d%H* . Any other format
    of log file names must supply a regular expression that defines groups
    for year, month, day, and hour. The regular expression will be evaluated
    with re.VERBOSE. A common example may be:
    source_filename_pattern = ^cdn_logger-
        (?P<year>[0-9]{4})
        (?P<month>[0-1][0-9])
        (?P<day>[0-3][0-9])
        (?P<hour>[0-2][0-9])
        .*$
    """

    def __init__(self, uploader_conf, plugin_name, regex=None, cutoff=None):
        super(LogUploader, self).__init__(uploader_conf)
        log_name = "%s-log-uploader" % plugin_name
        self.logger = utils.get_logger(uploader_conf, log_name, log_route=plugin_name)
        self.log_dir = uploader_conf.get("log_dir", "/var/log/swift/")
        self.swift_account = uploader_conf["swift_account"]
        self.container_name = uploader_conf["container_name"]
        proxy_server_conf_loc = uploader_conf.get("proxy_server_conf", "/etc/swift/proxy-server.conf")
        proxy_server_conf = appconfig("config:%s" % proxy_server_conf_loc, name="proxy-server")
        memcache = loadfilter("config:%s" % proxy_server_conf_loc, name="cache")(object).memcache
        self.internal_proxy = InternalProxy(proxy_server_conf, memcache=memcache)
        self.new_log_cutoff = int(cutoff or uploader_conf.get("new_log_cutoff", "7200"))
        self.unlink_log = uploader_conf.get("unlink_log", "true").lower() in utils.TRUE_VALUES
        self.filename_pattern = regex or uploader_conf.get(
            "source_filename_pattern",
            """
                ^%s-
                (?P<year>[0-9]{4})
                (?P<month>[0-1][0-9])
                (?P<day>[0-3][0-9])
                (?P<hour>[0-2][0-9])
                .*$"""
            % plugin_name,
        )
        self.content_type = uploader_conf.get("content_type")

    def run_once(self, *args, **kwargs):
        self.logger.info(_("Uploading logs"))
        start = time.time()
        self.upload_all_logs()
        self.logger.info(_("Uploading logs complete (%0.2f minutes)") % ((time.time() - start) / 60))

    def get_relpath_to_files_under_log_dir(self):
        """
        Look under log_dir recursively and return all filenames as relpaths

        :returns : list of strs, the relpath to all filenames under log_dir
        """
        all_files = []
        for path, dirs, files in os.walk(self.log_dir):
            all_files.extend(os.path.join(path, f) for f in files)
        return [os.path.relpath(f, start=self.log_dir) for f in all_files]

    def filter_files(self, all_files):
        """
        Filter files based on regex pattern

        :param all_files: list of strs, relpath of the filenames under log_dir
        :param pattern: regex pattern to match against filenames

        :returns : dict mapping full path of file to match group dict
        """
        filename2match = {}
        found_match = False
        for filename in all_files:
            match = re.match(self.filename_pattern, filename, re.VERBOSE)
            if match:
                found_match = True
                full_path = os.path.join(self.log_dir, filename)
                filename2match[full_path] = match.groupdict()
            else:
                self.logger.debug(
                    _("%(filename)s does not match " "%(pattern)s")
                    % {"filename": filename, "pattern": self.filename_pattern}
                )
        return filename2match

    def upload_all_logs(self):
        """
        Match files under log_dir to source_filename_pattern and upload to
        swift
        """
        all_files = self.get_relpath_to_files_under_log_dir()
        filename2match = self.filter_files(all_files)
        if not filename2match:
            self.logger.error(
                _("No files in %(log_dir)s match %(pattern)s")
                % {"log_dir": self.log_dir, "pattern": self.filename_pattern}
            )
            sys.exit(1)
        if not self.internal_proxy.create_container(self.swift_account, self.container_name):
            self.logger.error(
                _("Unable to create container for " "%(account)s/%(container)s")
                % {"account": self.swift_account, "container": self.container_name}
            )
            return
        for filename, match in filename2match.items():
            # don't process very new logs
            try:
                seconds_since_mtime = time.time() - os.stat(filename).st_mtime
            except OSError:
                # filename wasn't found, skip it
                continue
            if seconds_since_mtime < self.new_log_cutoff:
                self.logger.debug(
                    _("Skipping log: %(file)s " "(< %(cutoff)d seconds old)")
                    % {"file": filename, "cutoff": self.new_log_cutoff}
                )
                continue
            try:
                self.upload_one_log(filename, **match)
            except Exception:
                self.logger.exception(_("ERROR: could not upload %s") % filename)

    def upload_one_log(self, filename, year, month, day, hour):
        """
        Upload one file to swift
        """
        if os.path.getsize(filename) == 0:
            self.logger.debug(_("Log %s is 0 length, skipping") % filename)
            return
        self.logger.debug(_("Processing log: %s") % filename)
        filehash = hashlib.md5()
        already_compressed = True if filename.endswith(".gz") else False
        opener = gzip.open if already_compressed else open
        f = opener(filename, "rb")
        try:
            for line in f:
                # filter out bad lines here?
                filehash.update(line)
        finally:
            f.close()
        filehash = filehash.hexdigest()
        # By adding a hash to the filename, we ensure that uploaded files
        # have unique filenames and protect against uploading one file
        # more than one time. By using md5, we get an etag for free.
        target_filename = "/".join([year, month, day, hour, filehash + ".gz"])
        metadata = {"x-object-meta-original-name": filename}
        if self.content_type:
            metadata["Content-Type"] = self.content_type
        if self.internal_proxy.upload_file(
            filename,
            self.swift_account,
            self.container_name,
            target_filename,
            compress=(not already_compressed),
            headers=metadata,
        ):
            self.logger.debug(_("Uploaded log %(file)s to %(target)s") % {"file": filename, "target": target_filename})
            if self.unlink_log:
                os.unlink(filename)
        else:
            self.logger.error(_("ERROR: Upload of log %s failed!") % filename)